X-Git-Url: https://git.rrq.au/?a=blobdiff_plain;f=vector%2Fvector.c;h=16d5577f19a44e17977229aa16cb6882b1bb196e;hb=2bd18d410e42775bbdaab680447d1e2c43f5491c;hp=c4482a7d55bb6f977075ab9e6bab400d5b1e1cd2;hpb=5a02e98f44e95db2660e1468c5765d924af68534;p=rrq%2Frrqmisc.git diff --git a/vector/vector.c b/vector/vector.c index c4482a7..16d5577 100644 --- a/vector/vector.c +++ b/vector/vector.c @@ -1,3 +1,5 @@ +#include +#include #include #include "vector.h" @@ -7,45 +9,183 @@ * void*, and an index is "unsigned long" (64 bits). */ -#if VECTOR_LEVEL_BITS == 4 -typedef union { - vector_index as_whole; - struct { - unsigned int msb:4; unsigned int lsb:4; - } __attribute__ ((__packed__)) as_byte[8]; -} vector_indexing; +/** ============================================================ **/ + +static int VECTOR_BITS[4] = { 8, 4, 2, 64 }; + +typedef struct { + unsigned int a:2; + unsigned int b:2; + unsigned int c:2; + unsigned int d:2; +} bitpair; -#define VECTOR_LEVEL_MASK ( VECTOR_SLOTS - 1 ) +typedef struct { + unsigned int a:4; + unsigned int b:4; +} nibble; -#define VECTOR_PART_BYTE(i,p) ((vector_indexing*)(i))->as_byte[ (p)/2 ] +typedef struct { + unsigned int a:8; +} byte; -static int VECTOR_INDEX_PART(vector_index *index,int part) { - if ( part & 1 ) { - return VECTOR_PART_BYTE(index,part).lsb; +/** + * Return the index part for the given level of the vector's leveling + * variant. + * + * The vector variant indicates whether indexing uses 8, 4, or 2 bits + * per level. + */ +unsigned long VECTOR_INDEX_PART(vector *pv,vector_index *index, int level) { + unsigned char *px = (unsigned char *) index; + switch ( pv->variant ) { + case byte_index_levels: { + byte *pp = (byte*)(px + level); + return pp->a; + } + case nibble_index_levels: { + nibble *pp = (nibble*)( px + ( level / 2 ) ); + switch ( level & 1 ) { + case 0: return pp->a; + case 1: return pp->b; + } + break; + } + case bitpair_index_levels: { + bitpair *pp = (bitpair*)( px + ( level / 4 ) ); + switch ( level & 3 ) { + case 0: return pp->a; + case 1: return pp->b; + case 2: return pp->c; + case 3: return pp->d; + } + break; } - return VECTOR_PART_BYTE(index,part).msb; + case single_index_level: + return (*index); + } + return 0; } -static int VECTOR_INDEX_PART_INC(vector_index *index,int part) { - if ( part & 1 ) { - return ++VECTOR_PART_BYTE(index,part).lsb; +/** + * Increment the index part at the indivated level, cyclic but not + * carrying over to the upper level. Returns the new level index. + */ +static unsigned long VECTOR_INDEX_PART_INC( + vector *pv,vector_index *index, int level) +{ + unsigned char *px = (unsigned char *) index; + switch ( pv->variant ) { + case byte_index_levels: { + byte *pp = (byte*)( px + level ); + return ++(pp->a); + } + case nibble_index_levels: { + nibble *pp = (nibble*)( px + ( level / 2 ) ); + switch ( level & 1 ) { + case 0: return ++(pp->a); + case 1: return ++(pp->b); + } + break; + } + case bitpair_index_levels: { + bitpair *pp = (bitpair*)( px + level / 4 ); + switch ( level & 3 ) { + case 0: return ++(pp->a); + case 1: return ++(pp->b); + case 2: return ++(pp->c); + case 3: return ++(pp->d); + } + break; } - return ++VECTOR_PART_BYTE(index,part).msb; + case single_index_level: + return ++(*index); + } + return 0; } -#elif VECTOR_LEVEL_BITS == 8 -#define VECTOR_LEVEL_MASK ( VECTOR_SLOTS - 1 ) +/** + * Decrement the index part at the indicated level, cyclic but not + * carrying over to the upper level. Returns the prior level index. + */ +static unsigned long VECTOR_INDEX_PART_DEC( + vector *pv,vector_index *index, int level) +{ + unsigned char *px = (unsigned char *) index; + switch ( pv->variant ) { + case byte_index_levels: { + byte *pp = (byte*)( px + level ); + return (pp->a)--; + } + case nibble_index_levels: { + nibble *pp = (nibble*)( px + ( level / 2 ) ); + switch ( level & 1 ) { + case 0: return (pp->a)--; + case 1: return (pp->b)--; + } + break; + } + case bitpair_index_levels: { + bitpair *pp = (bitpair*)( px + level / 4 ); + switch ( level & 0xf ) { + case 0: return (pp->a)--; + case 1: return (pp->b)--; + case 2: return (pp->c)--; + case 3: return (pp->d)--; + } + break; + } + case single_index_level: + return (*index)--; + } + return 0; +} -typedef union { - vector_index as_whole; - unsigned char as_byte[8]; -} vector_indexing; +#define ONES (~((vector_index) 0)) -#define VECTOR_INDEX_PART(i,p) (((vector_indexing*)(i))->as_byte[p]) +// Set index to first value for all index parts at level and lower. +static void VECTOR_INDEX_FIRST(vector *pv,vector_index *index, int level) { + (*index) &= ONES << ( VECTOR_BITS[ pv->variant ] * level ); +} -#define VECTOR_INDEX_PART_INC(i,p) (++VECTOR_INDEX_PART(i,p)) +// Set index to last value for all index parts at level and lower. +static void VECTOR_INDEX_LAST(vector *pv,vector_index *index, int level) { + static unsigned long ones[] = { 255, 15, 3 }; + unsigned long x = ones[ pv->variant ]; + while ( level-- ) { + (*index) |= x; + x <<= VECTOR_BITS[ pv->variant ]; + } + // 255, 25, 3 + //(*index) |= ONES >> ( 64 - ( VECTOR_BITS[ pv->variant ] * level ) ); +} -#endif +// Return number of slots for a vector variant. +unsigned long VECTOR_SLOTS(vector *pv) { + switch ( pv->variant ) { + case byte_index_levels: return 256; + case nibble_index_levels: return 16; + case bitpair_index_levels: return 4; + case single_index_level: return pv->size; + } + return 0; +} + +// The number of levels to span vector pv wrt its size and variant +static unsigned int vector_levels(vector *pv,unsigned int size) { + if ( size < 4 ) { + return 1; + } + switch ( pv->variant ) { + case byte_index_levels: return ((int)(log2( size - 1 ) / 8)) + 1; + case nibble_index_levels: return ((int)(log2( size - 1 ) / 4)) + 1; + case bitpair_index_levels: return ((int)(log2( size - 1 ) / 2)) + 1; + case single_index_level: return 1; + } + return 0; +} + +/** ============================================================ **/ /** * Advances a vector index to the next used slot at or below the @@ -54,84 +194,162 @@ typedef union { * update the index slots accordingly. The given index is advanced * cyclically to match the found slot. The function returns a slot * pointer to the used slot, if any, and 0 otherwise. + * The last parameter is a flag that gets set when the scanning is + * partial (i.e. not the whole index page). */ static void **vector_level_next_used( - vector_page *page,vector_index *index,int level,vector_index end) { - void **p = (void**)&(*page)[ VECTOR_INDEX_PART( index, level ) ]; - for( ; *index < end; p++ ) { + vector *pv, + vector_page *page, + vector_index *index, + int level, + int *partial ) +{ + void **p = (void**)&(*page)[ VECTOR_INDEX_PART( pv, index, level ) ]; + if ( VECTOR_INDEX_PART( pv, index, level ) != 0 ) { + *partial = 1; + } + for( ; *index < pv->size; p++ ) { if ( *p ) { if ( level == 0 ) { return p; // This is a used entry } // *p is an index that needs to be inspected recursively - int whole = VECTOR_INDEX_PART( index, level - 1 ) == 0; - void **x = vector_level_next_used( *p, index, level - 1, end ); + int w = 0; + void **x = vector_level_next_used( pv, *p, index, level - 1, &w ); if ( x ) { return x; // Used slot was found; return it. } - // The page *p is all empty, so can/should be reclaimed. - if ( whole ) { + // If the page *p is all empty, so can/should be reclaimed. + if ( w == 0 ) { free( *p ); *p = 0; + } else { + *partial = 1; + } + } else { + if ( level > 0 ) { + VECTOR_INDEX_FIRST( pv, index, level - 1 ); } } - if ( VECTOR_INDEX_PART_INC( index, level ) == 0 ) { + if ( VECTOR_INDEX_PART_INC( pv, index, level ) == 0 ) { break; // cycling this level => nothing found } } return 0; } -// The least number of levels to span index S (typically the size of a -// vector) -static unsigned int vector_levels(vector_index S) { - unsigned int N = 0; - do { - N++; - S /= VECTOR_SLOTS; - } while ( S ); - return N; -} - // Find the next used slot at given index or later. Returns pointer to -// the slot. -void **vector_next_used( - vector *pv,vector_index *index, - int (*reclaim)(vector *pv,vector_index index,void *item,void *data), - void *data ) -{ - if ( pv->entries == 0 ) { +// the slot. This allows for a reclaim function that may reclaim slot +// items on the way to next used slot. +void **vector_next_used(vector *pv,vector_index *index) { + if ( pv->entries == 0 || *index >= pv->size ) { *index = pv->size; return 0; } - int levels = vector_levels( pv->size ); - for ( ; *index < pv->size; (*index)++ ) { + int levels = vector_levels( pv, pv->size ); + int partial = 0; + do { void **slot = vector_level_next_used( - pv->entries, index, levels - 1, pv->size ) ; + pv, pv->entries, index, levels - 1, &partial ) ; if ( slot == 0 ) { - // reached the end of the vector - *index = pv->size; - break; + break; // reached the end of the vector } if ( *slot ) { - // Try reclaiming the slot, - if ( reclaim && reclaim( pv, *index, *slot, data ) == 0 ) { - *slot = 0; + return slot; + } + } while ( ++(*index) < pv->size ); + if ( partial == 0 ) { + free( pv->entries ); + pv->entries = 0; + } + *index = pv->size; // reached the end of the vector + return 0; +} + +/** + * Advances a vector index to the prior used slot at or below the + * given level, starting from the indexed entry (inclusive) and down. + * The function will free any empty pages it discovers, and then + * update the index slots accordingly. The given index is advanced + * cyclically to match the found slot. The function returns a slot + * pointer to the used slot, if any, and 0 otherwise. + * The last parameter is a flag that gets set when the scanning is + * partial (i.e. not the whole index page). + */ +static void **vector_level_prev_used( + vector *pv, + vector_page *page, + vector_index *index, + int level, + int *partial ) +{ + void **p = (void**)&(*page)[ VECTOR_INDEX_PART( pv, index, level ) ]; + if ( VECTOR_INDEX_PART( pv, index, level ) != VECTOR_SLOTS( pv ) - 1 ) { + *partial = 1; + } + do { + if ( *p ) { + if ( level == 0 ) { + return p; // This is a used entry + } + // *p is an index that needs to be inspected recursively + int w = 0; + void **x = vector_level_prev_used( pv, *p, index, level - 1, &w ); + if ( x ) { + return x; // Used slot was found; return it. + } + // If the page *p is all empty, it can/should be reclaimed. + if ( w == 0 ) { + free( *p ); + *p = 0; } else { - return slot; + *partial = 1; + } + } else { + if ( level > 0 ) { + VECTOR_INDEX_LAST( pv, index, level ); } } + p--; + } while ( VECTOR_INDEX_PART_DEC( pv, index, level ) != 0 ); + return 0; +} + +// Find the next used slot at given index or later. Returns pointer to +// the slot. This allows for a reclaim function that may reclaim slot +// items on the way to next used slot. +void **vector_prev_used(vector *pv,vector_index *index) { + if ( pv->entries == 0 || *index >= pv->size ) { + *index = pv->size; + return 0; } + int levels = vector_levels( pv, pv->size ); + int partial = 0; + do { + void **slot = vector_level_prev_used( + pv, pv->entries, index, levels - 1, &partial ) ; + if ( slot == 0 ) { + break; // reached the end of the vector + } + if ( *slot ) { + return slot; + } + } while ( (*index)-- != 0 ); + if ( partial == 0 ) { + free( pv->entries ); + pv->entries = 0; + } + *index = pv->size; // reached the end of the vector return 0; } -// Reclaim tree of unused pages -static void vector_reclaim(vector_page *page,unsigned int level) { +// Reclaim tree of unused pages for a given level +static void vector_reclaim(vector *pv,vector_page *page,unsigned int level) { int i = 0; if ( level > 0 ) { - for ( ; i < VECTOR_SLOTS; i++ ) { + for ( ; i < VECTOR_SLOTS( pv ); i++ ) { if ( (*page)[i] ) { - vector_reclaim( (vector_page *) (*page)[i], level - 1 ); + vector_reclaim( pv, (vector_page *) (*page)[i], level - 1 ); } } } @@ -139,11 +357,11 @@ static void vector_reclaim(vector_page *page,unsigned int level) { } // Resize vector, using the reclaim function as needed, to handle any -// excess items or to veto the resize. Returns the index of the veto, if -// any, or <0 otherwise, with -1 indicating success and -2 indicating -// OOM while growing. +// excess items or to veto the resize. Returns the index of the veto, +// if any, or <0 otherwise, with -1 indicating success and -2 +// indicating OOM // -// Nothe that resizing may result in the introduction/removal of +// Note that resizing may result in the introduction/removal of // indexing levels and pages, so as to keep the leveling accurate for // the size. int vector_resize( @@ -151,27 +369,14 @@ int vector_resize( int (*reclaim)(vector *pv,vector_index index,void *item,void *data), void *data ) { - // Table of number of slots for a level above that of the number - // at the prior lower level. The first level (i.e., level 0) adds - // 15 slots to the one slot of no index page. Level 1 adds 15*16 - // slots, level 2 adds 15*(16^2), and generically level i adds - // 15*(16^i) slots. - static int level_delta[ VECTOR_INDEX_FIELDS ]; - if ( level_delta[ 0 ] == 0 ) { - int d = 1; - int i; - for ( i = 0; i < VECTOR_INDEX_FIELDS; i++ ) { - level_delta[ i ] = ( VECTOR_SLOTS - 1 ) * d; - d = VECTOR_SLOTS * d; - } - } struct { int old; int new; } level = { - vector_levels( pv->size ), - vector_levels( new_size ) + vector_levels( pv, pv->size ), + vector_levels( pv, new_size ) }; + vector_page *entries = 0; if ( pv->entries == 0 ) { pv->size = new_size; return 0; @@ -179,75 +384,99 @@ int vector_resize( // A shrinking vector might be veto-ed if ( new_size < pv->size ) { vector_index index = new_size; - void **slot = vector_next_used( pv, &index, reclaim, data ); - if ( slot ) { + void **slot = vector_next_used( pv, &index ); + while ( slot ) { + if ( *slot && reclaim && reclaim( pv, index, *slot, data ) == 0 ) { + *slot = 0; + slot = vector_next_used( pv, &index ); + continue; + } return index; } // At this point we know that there are no slots used after // the new_size size, so now it's time to remove and reclaim // any superflouous top level pages. - vector_page *entries; - vector_page **pp = &pv->entries; - while ( level.old-- > level.new ) { - if ( pp ) { - pp = (vector_page **)(*pp)[0]; + if ( pv->variant == single_index_level ) { + // Follow vector size using realloc + if ( new_size > 0 ) { + entries = (vector_page*) + realloc( pv->entries, new_size * sizeof( void* ) ); + if ( entries == 0 ) { + return -2; // OOM + } } - } - if ( pp != &pv->entries ) { - entries = pv->entries; - if ( pp ) { - pv->entries = *pp; - *pp = 0; // Detach subtree - } else { + pv->entries = entries; + } else { + vector_page **pp = &pv->entries; + int i = level.old; + while ( i-- > level.new ) { + if ( pp ) { + pp = (vector_page **)(*pp); + } + } + if ( pp != &pv->entries ) { + entries = pv->entries; + if ( pp ) { + pv->entries = *pp; + *pp = 0; // Detach subtree + } else { + pv->entries = 0; + } + vector_reclaim( pv, entries, level.old - 1 ); + } + if ( new_size == 0 && pv->entries ) { + free( pv->entries ); pv->entries = 0; } - vector_reclaim( entries, level.old ); - } - if ( new_size == 0 && pv->entries ) { - free( pv->entries ); - pv->entries = 0; } } else { // vector is growing. Maybe insert levels. - while ( level.old < level.new ) { - vector_page *p = (vector_page *) - calloc( 1, sizeof( vector_page ) ); - if ( p == 0 ) { + if ( pv->variant == single_index_level ) { + // Follow vector size using realloc + entries = (vector_page *)realloc( + pv->entries, new_size * sizeof( void* ) ); + if ( entries == 0 ) { return -2; // OOM } - (*p)[0] = pv->entries; - pv->entries = p; - pv->size += level_delta[ level.old++ ]; - // Note that the last level addition might make the size - // larger than requested, which gets corrected below. + pv->entries = entries; + memset( &(*entries)[ pv->size ], 0, + ( new_size - pv->size ) * sizeof( void* ) ); + } else { + for ( ; level.old < level.new; level.old++ ) { + vector_page *p = (vector_page *) + calloc( VECTOR_SLOTS( pv ), sizeof( void* ) ); + if ( p == 0 ) { + return -2; // OOM + } + (*p)[0] = pv->entries; + pv->entries = p; + // Should maybe change the size to match the level? + // otherwise recovery from OOM is impossible + } } } pv->size = new_size; return -1; } -// Return a pointer to the indexed item the given page level, adding -// intermediate pages if requested. Returns 0 if addition fails (OOM), -// or if not requested and page is missing. -// Level 0 = pointer to the item entry itself. -// Level VECTORLEVELS( pv->size ) - 1 = -static void **vector_access( - vector *pv,vector_index index,int level,int add) -{ +// Return pointer to the indexed page slot at the requested level, and +// adding intermediate index pages if so requested. Returns 0 if +// addition fails (OOM), or if not requested and page is missing. +void **vector_access(vector *pv,vector_index index,int level,int add) { if ( index >= pv->size ) { return 0; } void **page = (void**) &pv->entries; - int i = vector_levels( pv->size ); + int i = vector_levels( pv, pv->size ); while ( i-- > level ) { if ( add && (*page) == 0 ) { - (*page) = calloc( VECTOR_SLOTS, sizeof( void* ) ); + (*page) = calloc( VECTOR_SLOTS( pv ), sizeof( void* ) ); } page = (*page); if ( page == 0 ) { return 0; } - page += VECTOR_INDEX_PART( &index, i ); + page += VECTOR_INDEX_PART( pv, &index, i ); } return page; } @@ -262,8 +491,17 @@ inline void vector_set(vector *pv,vector_index index,void *value) { *p = value; } +// Set value at index but return the old value +void *vector_get_set(vector *pv,vector_index index,void *value) { + void **p = vector_entry( pv, index ); + void *old = *p; + *p = value; + return old; +} + inline void *vector_get(vector *pv,vector_index index) { - return *(vector_entry( pv, index )); + void **p = vector_entry( pv, index ); + return *p; } int vector_reclaim_any(vector *pv,vector_index ix,void *item,void *data) { @@ -293,11 +531,19 @@ void vector_copy(vector *dst,vector_index di, } } +vector *vector_clone(enum vector_variant variant,vector *src) { + vector *dst = (vector*) malloc( sizeof( vector ) ); + (*dst) = (vector) { .variant = variant, .size = 0, .entries = 0 }; + vector_resize( dst, src->size, 0, 0 ); + vector_copy( dst, 0, src, 0, src->size ); + return dst; +} + void vector_dump(vector *pv, - int (*itemdump)(const vector_index,const void *)) { + void (*itemdump)(const vector_index,const void *)) { vector_index index = 0; for ( ; index < pv->size; index++ ) { - void **slot = vector_next_used( pv, &index, 0, 0 ); + void **slot = vector_next_used( pv, &index ); if ( slot == 0 ) { break; } @@ -308,7 +554,7 @@ void vector_dump(vector *pv, //// Quicksort // Returns 1 for "in order", 0 for equal, and -1 for "wrong order" -typedef int (*comparfn)(const void *,const void *); + typedef int (*comparfn)(const void *,const void *); static void vector_qsort_part( vector *pv,comparfn compar, @@ -365,20 +611,96 @@ void vector_qsort(vector *pv,comparfn compar) { } void vector_iterate(vector *pv, - int (*itemfn)(vector_index,void*,void*), - void *data ) + vector_index start, + int (*itemfn)(vector_index,void*,void*), + void *data ) { - vector_index index = 0; + vector_index index = start; while ( index < pv->size ) { - void **slot = vector_next_used( pv, &index, 0, 0 ); - if ( slot == 0 ) { - break; - } - int i = index & VECTOR_LEVEL_MASK ; - for ( ; i < VECTOR_SLOTS && index < pv->size; i++, index++, slot++ ) { - if ( itemfn( index, *slot, data ) ) { + int end = VECTOR_SLOTS( pv ); + int i = index & ( end - 1 ); + for ( ; i < end && index < pv->size; i++, index++ ) { + void **slot = vector_access( pv, index, 0, 0 ); + if ( itemfn( index, slot? *slot: 0, data ) ) { return; } } } } + +struct find_data { + void *value; + vector_index index; +}; + +static int vector_find_item(vector_index index, void *item,void *data) { + struct find_data *fd = (struct find_data*)data; + if ( fd->value != item ) { + return 0; + } + fd->index = index; + return 1; +} + +vector_index vector_find(vector *pv,void *value) { + struct find_data fd = { + .value = value, + .index = pv->size + }; + vector_iterate( pv, 0, vector_find_item, &fd ); + return fd.index; +} + +// Find surrounding indexes for a given item key in a sparse vector +void *vector_bsearch(vector *pv,vector_index *index,const void *key, + int (*compare)(const void *key, const void *item)) { + vector_index lo = 0; + vector_index hi = pv->size; + if ( hi-- == 0 || vector_prev_used( pv, &hi ) == 0 ) { + return 0; + } + hi++; + while ( lo < hi ) { + vector_index m = lo + ( hi - lo ) / 2; + void **slot = vector_next_used( pv, &m ); + int c = compare( key, *slot ); + if ( c == 0 ) { + *index = m; + return *slot; + } + if ( c < 0 ) { + hi = m; + } else { + lo = m + 1; + } + } + return 0; +} + +// Iterator callback. +static int checkunused(vector_index index,void *item,void *data) { + vector_index *last = (vector_index*) data; + if ( item == 0 ) { + (*last) = index; + return 1; + } + if ( *last > index ) { + // Only on the first iteration, with *last = vector_sie + if ( index == 0 ) { + (*last) = 1; + return 0; + } + *last = 0; + } else if ( index == (*last) ) { + (*last)++; + return 0; + } + return 1; +} + +// Scan forward for the next unused vector slot +vector_index vector_next_unused(vector *pv,vector_index index) { + vector_index unused = vector_size( pv ); + vector_iterate( pv, index, checkunused, &unused ); + return unused; +}