From: Ralph Ronnquist Date: Tue, 12 Jul 2022 05:25:16 +0000 (+1000) Subject: Added Query functions. Major renaming of stuff to use camelcase type names. X-Git-Url: https://git.rrq.au/?a=commitdiff_plain;h=6fcd4ffc18696dbf4c11be32837a2035ea5ee92f;p=rrq%2Frrqmisc.git Added Query functions. Major renaming of stuff to use camelcase type names. --- diff --git a/vector/BindingTable.c b/vector/BindingTable.c new file mode 100644 index 0000000..7f9d344 --- /dev/null +++ b/vector/BindingTable.c @@ -0,0 +1,156 @@ +#include +#include +#include + +/** + * A Binding is an association between a name (char*) and a value + * (void*). + */ +typedef struct { + char *name; + void *value; +} Binding; + +/** + * This callback function should return the hashcode of a key. The + * hashcode is used for indexing into the backing Vector for + * finding the an item via its key. The same key must map + * consistently to the same hashcode while the hashtable contains + * an item with that key. Different keys map map to the same + * hashcode, in which case the Vector placement is made at the + * first empty or hole slot following the hashcode index. + */ +static unsigned long binding_hashcode(void *this,void *key) { + char *name = (char *) key; + unsigned long n = strlen( name ); + return HashVector_hashcode( (unsigned char *) name, n ); +} + +/** + * This callback function should determine whether an item has a + * given key or not. + */ +static int binding_haskey(void *this,void *item,void *key) { + Binding *b = (Binding*) item; + char *name = (char *) key; + return strcmp( name, (char*) b->name ) == 0; +} + +/** + * This callback function should return a (possibly temporary) key + * of an item. It can be anything (i.e., with or without internal + * structure) as it is treated as an identifier that other + * callbacks recognize. It is merely understood as a value in an + * identity domain. + */ +static void *binding_itemkey(void *this,void *item) { + Binding *b = (Binding*) item; + return b->name; +} + + +/** + * This callback function should handle a key obtained from the + * itemkey callback function, e.g., reclaim temporary allocation. + */ +static void binding_releasekey(void *this,void *key) { +} + +/** + * This callback function writes a representation of an item into + * a character buffer. + */ +static int binding_tostring(void *this,void *item,char *buffer,int limit) { + Binding *b = (Binding*) item; + return snprintf( buffer, limit, "{%s,%p}", b->name, b->value ); +} + +/** + * This is the "item type" for Binding items. + */ +ItemKeyFun bindingitem = { + .hashcode = binding_hashcode, + .haskey = binding_haskey, + .itemkey = binding_itemkey, + .releasekey = binding_releasekey, + .tostring = binding_tostring +}; + +BindingTable *BindingTable_create(BindingTable *next) { + BindingTable *this = (BindingTable*) malloc( sizeof( BindingTable ) ); + this->table = (HashVector) { + .table = (Vector) { + .variant = Nibble_index_levels, .size = 16, .entries = 0 + }, .fill = 0, .holes = 0, .type = &bindingitem + }; + this->next = next; + return this; +} + +BindingTable *BindingTable_release(BindingTable *bt) { + if ( bt ) { + BindingTable *next = bt->next; + Vector_resize( &bt->table.table, 0, Vector_free_any, 0 ); + free( bt ); + return next; + } + return 0; +} + +void BindingTable_set(BindingTable *bt,char *name,void *value) { + Binding *b = (Binding*) HashVector_find( &bt->table, name ); + if ( b == 0 ) { + b = (Binding*) malloc( sizeof( Binding ) ); + b->name = name; + HashVector_add( &bt->table, b ); + } + b->value = value; +} + +void *BindingTable_get(BindingTable *bt,char *name) { + for ( ; bt; bt = bt->next ) { + Binding *b = (Binding*) HashVector_find( &bt->table, name ); + if ( b ) { + return b->value; + } + } + return 0; +} + +void BindingTable_deref(BindingTable *bt,int arity,tuple *t) { + int i; + for ( i = 0; i < arity; i++ ) { + if ( (*t)[i] ) { + (*t)[i] = BindingTable_get( bt, (*t)[i] ); + } + } +} + +int BindingTable_unify( + BindingTable *bt,char *n1,char *n2,int (*eq)(void*,void*)) { + void *v1 = BindingTable_get( bt, n1 ); + void *v2 = BindingTable_get( bt, n2 ); + if ( v2 && v1 == 0 ) { + BindingTable_set( bt, n1, v2 ); + } + if ( v1 && v2 == 0 ) { + BindingTable_set( bt, n2, v1 ); + } + return ( v1 && v2 )? ( eq? ( eq( v1, v2 ) == 0 ) : ( v1 == v2 ) ) : 1; +} + +#define COPYA(T,P,N) (T*) memcpy( malloc( N * sizeof(T) ), P, N * sizeof( T ) ) + +tuple *BindingTable_tuple_get(BindingTable *bt,int arity,tuple *t) { + tuple *vt = (tuple*) memcpy( + malloc( arity * sizeof( void* ) ), t, arity * sizeof( void* ) ); + BindingTable_deref( bt, arity, vt ); + return vt; +} + +void BindingTable_tuple_set(BindingTable *bt,int arity,tuple *nm,tuple *vs) { + int i; + for ( i = 0; i < arity; i++ ) { + BindingTable_set( bt, (*nm)[i], (*vs)[i] ); + } +} diff --git a/vector/BindingTable.h b/vector/BindingTable.h new file mode 100644 index 0000000..8ce6b7e --- /dev/null +++ b/vector/BindingTable.h @@ -0,0 +1,108 @@ +#ifndef BindingTable_H +#define BindingTable_H + +#include +#include + +/** + * A BindingTable is a chain of \ref HashVector "HashVectors" of + * Binding items that associate a (char*) name with a (void*) value. + */ +typedef struct _BindingTable { + HashVector table; + struct _BindingTable *next; +} BindingTable; + +/** + * \brief Allocate a new \ref BindingTable and chain it to the one given. + * + * \returns the allocated \ref bandingtable. + * + * \related BindingTable + */ +extern BindingTable *BindingTable_create(BindingTable *next); + +/** + * \brief Reclaim a \ref BindingTable with all its bindings and return + * its chained. + * + * \param bt is the \ref BindingTable to reclaim. + * + * \returns the chained \ref BindingTable. + * + * \related BindingTable + */ +extern BindingTable *BindingTable_release(BindingTable *bt); + +/** + * \brief Set a Binding in a \ref BindingTable. + * + * \param bt is the \ref BindingTable concerned. + * + * \param name is the Binding name. + * + * \param value is the Binding value. + * + * \note Binding names are equal or not by means of strcmp, and each + * name has a at most single Binding. + * + * A value of \b 0 indicates "unbound". + * + * \related BindingTable + */ +extern void BindingTable_set(BindingTable *bt,char *name,void *value); + +/** + * \brief Get a Binding from a BindingTable chain. + * + * \param bt is the first BindingTable concerned. + * + * \param name is the Binding variable name. + * + * \param value is the Binding value. + * + * \note Binding names are equal or not by means of strcmp, and each + * name has a at most single Binding. + * + * This function scan's the \ref BindingTable chain for the first + * assignment, if any, of the name. Note that a name can be made + * unbound on top of being bound by setting it to \b 0. + * + * \related BindingTable + */ +extern void *BindingTable_get(BindingTable *bt,char *name); + +/** + * \brief Replace all names with their values. + * + * \param bt is the BindingTable concerned. + * + * \param arity is the tuple arity. + * + * \param t is the tuple of (char*) names to dereference. + */ +void BindingTable_deref(BindingTable *bt,int arity,tuple *t); + +/** + * \brief Unify two named bindings with option equal-value callback. + * + * \param bt is the first BindingTable concerned. + * + * \param n1 is the first Binding name. + * + * \param n2 is the second Binding name. + * + * \param eq is the optional equal-value callback that returns 0 if + * the given values are equal. + * + * This function updates the top \ref BindingTable by assigning n1 or + * n2 to any value the other has (in the chain) unless they have + * different values (in the chain). If both are unassigned, then + * neither get reassigned in the to both BindingTable/ + * + * \related BindingTable + */ +extern int BindingTable_unify( + BindingTable *bt,char *n1,char *n2,int (*eq)(void*,void*)); + +#endif diff --git a/vector/HashVector.c b/vector/HashVector.c new file mode 100644 index 0000000..b4c6184 --- /dev/null +++ b/vector/HashVector.c @@ -0,0 +1,216 @@ +#include +#include + +#define SELF hv->type + +// Find the slot for the keyed element, and return pointer to it, or +// to the first of holes encountered while considering collisions. +// Returns a pointer to the place for the item, or 0 in case of OOM or +// overfull HashVector (i.e. 0 shouldn't happen). +// If itemkey is set, then the itemkey callback function is used for +// obtaining a temporary key from the item. +static void **HashVector_find_slot( + HashVector *hv, void *key, unsigned long *i, int itemkey ) +{ + if ( itemkey ) { + // Get actual key from keying item + key = hv->type->itemkey( SELF, key ); + } + unsigned long index = hv->type->hashcode( SELF, key ) % hv->table.size; + *i = index; + void **hole = 0; + void **p = 0; + for ( ;; ) { + p = Vector_entry( &hv->table, (*i) ); + if ( p == 0 ) { + if ( itemkey ) { + hv->type->releasekey( SELF, key ); + } + return 0; // This basically means OOM, and is a failure condition. + } + if ( (*p) == 0 ) { + if ( itemkey ) { + hv->type->releasekey( SELF, key ); + } + return ( hole )? hole : p; // Not found; it's place is here. + } + if ( (*p) == HV_HOLE ) { + if ( hole == 0 ) { + hole = p; // Remember the first hole + } + } else if ( hv->type->haskey( SELF, (*p), key ) ) { + if ( itemkey ) { + hv->type->releasekey( SELF, key ); + } + return p; // Found + } + if ( ++(*i) == hv->table.size ) { + (*i) = 0; // Roll-around the table + } + if ( (*i) == index ) { + if ( itemkey ) { + hv->type->releasekey( SELF, key ); + } + return 0; // Overfull HashVector! + } + } +} + +// Find the keyed item +void *HashVector_find(HashVector *hv,void *key) { + VectorIndex index = 0; + void **slot = HashVector_find_slot( hv, &index, key, 0 ); + return ( slot && *slot && *slot != HV_HOLE )? *slot : 0; +} + +// Find the keyed element at or after the index. Update index and +// return item. +void *HashVector_next(HashVector *hv,VectorIndex *index,void *key) { + unsigned long i = index? *index : 0; + for ( ; i < hv->table.size; i++ ) { + void **p = Vector_next_used( &hv->table, &i ); + if ( p == 0 ) { + break; + } + if ( *p && *p != HV_HOLE ) { + if ( key && hv->type->haskey( hv->type, *p, key ) == 0 ) { + continue; + } + if ( index ) { + (*index) = i; + } + return *p; + } + } + if ( index ) { + (*index) = hv->table.size; + } + return 0; +} + +static int capture_item(Vector *pv,unsigned long ix,void *item,void *data) { + if ( item && item != HV_HOLE ) { + HashVector_add( (HashVector *) data, item ); + } + return 0; +} + +static int iter_item(unsigned long ix,void *item,void *data) { + if ( item && item != HV_HOLE ) { + HashVector_add( (HashVector *) data, item ); + } + return 0; +} + +static void HashVector_resize(HashVector *hv,unsigned long new_size) { + HashVector tmp = *hv; + hv->table.size = new_size; + hv->table.entries = 0; + hv->fill = 0; + hv->holes = 0; + if ( new_size < hv->table.size ) { + Vector_resize( &tmp.table, 0, capture_item, hv ); + } else { + Vector_iterate( &tmp.table, 0, iter_item, hv ); + } +} + +// Add the given element. +int HashVector_add(HashVector *hv,void *item) { + unsigned long i; + void **p = HashVector_find_slot( hv, item, &i, 1 ); + if ( p == 0 ) { + return -1; // OOM or overfull HashVector + } + if ( *p ) { + if ( *p != HV_HOLE ) { + return 0; // Already added. + } + hv->holes--; // about to reuse a hole + } + *p = item; + hv->fill++; + if ( hv->fill + hv->holes > hv->table.size / 2 ) { + HashVector_resize( hv, hv->table.size * 2 ); + } + return 1; +} + +// Delete the given item +int HashVector_delete(HashVector *hv,void *item) { + unsigned long i; + void **p = HashVector_find_slot( hv, item, &i, 1 ); + if ( p == 0 ) { + return -1; + } + if ( *p != item ) { + return 0; + } + // Check if subsequent slot is occupied + if ( ++i >= hv->table.size ) { + i = 0; + } + void **q = Vector_access( &hv->table, i, 0, 0 ); + if ( q && (*q) ) { + *p = HV_HOLE; + hv->holes++; + } else { + *p = 0; + } + hv->fill--; + if ( hv->table.size > VECTOR_SLOTS( &hv->table ) && + hv->fill < hv->table.size / 4 ) { + HashVector_resize( hv, hv->table.size / 2 ); + } + return 1; +} + +Vector *HashVector_contents( + HashVector *hv,enum VectorVariant variant,Vector *v) +{ + if ( v == 0 ) { + if ( hv->fill == 0 ) { + return 0; + } + v = (Vector*) malloc( sizeof( Vector ) ); + } else { + Vector_resize( v, 0, Vector_clear_any, 0 ); + } + (*v) = (Vector) { .variant = variant, 0, 0 }; + if ( hv->fill == 0 ) { + return v; + } + Vector_resize( v, hv->fill, 0, 0 ); + VectorIndex i; + VectorIndex j = 0; + for ( i = 0; i < v->size; i++, j++ ) { + Vector_set( v, i, HashVector_next( hv, &j, 0 ) ); + } + return v; +} + +// A simple binary hashcode, (before modulo table size) +unsigned long HashVector_hashcode(unsigned char *key,unsigned long n) { + unsigned long value = 5381; + while ( n-- ) { + value += ( value << 5 ) + *(key++); + } + return value; +} + + +HashVector *HashVector_create(enum VectorVariant variant,ItemKeyFun *type) { + HashVector *hv = (HashVector*) malloc( sizeof( HashVector ) ); + (*hv) = (HashVector) { + .table = (Vector) { + .variant = variant, + .size = 16, + .entries = 0 + }, + .fill = 0, + .holes = 0, + .type = type + }; + return hv; +} + diff --git a/vector/HashVector.h b/vector/HashVector.h new file mode 100644 index 0000000..18f63fb --- /dev/null +++ b/vector/HashVector.h @@ -0,0 +1,202 @@ +#ifndef HashVector_H +#define HashVector_H + +#include +#include + +/** + * \file HashVector.h + * + * A HashVector is a use of \ref Vector as a hashtable. The HashVector + * includes three functions to, respectively, obtain the hashcode of a + * given key, to obtain the key of a given item, and to tell whether + * or not a given item has a given key. The HashVector manipulations + * uses those for locating and placing items into the Vector; the + * hascode is the primary place for an item, but then scanning upwards + * with a rolling index in case of collisions. + * + * The Vector holds (void*)0 pointers for free slots, (void*)1 + * pointers (aka \ref HV_HOLE) to indicate slots of deleted items, and + * otherwise item pointers. Thus, deleting an item results in av + * HV_HOLE et the item's slot. + * + * The HashVector grows to double in size, with rehashing, when an + * item is added to make the sum of fill and holes exceed 50% of the + * size, and the HashVector shrinks to half size when an item is + * deleted to make fill reduce below 25% of the size. + */ + +/** + * \struct HashVector + * + * HashVector combines a \ref Vector (for contents) with fill and hole + * counters, and \ref ItemKeyFun callback functions for abstracting + * items as being pairs of key and payload. + * + * \extends Vector + */ +typedef struct { + /** + * This is the backing \ref Vector for the HashVector. Items are + * placed in the Vector by means of their key hashcodes, at the + * first unused slot cyclically after the hashcode index. + */ + Vector table; // the table space for items + /** + * This is the fill count, i.e., how many key-distinct items there + * are in the backing Vector. + */ + unsigned long fill; // current number of contained items + /** + * This is a count of \ref HV_HOLE markers in the backing Vector, + * which hold the position of deleted items so as to not break the + * lookup sequence for items placed cyclically after their + * hashcode index due to hash collisions. + */ + unsigned long holes; // current number of slots marking deleted items + /** + * This is a pointer to the \ref ItemKeyFun record that provides + * the key-and-payload abstraction for items. + */ + ItemKeyFun *type; // the key type for the items +} HashVector; + +/** + * HV_HOLE is the representation of a deleted item. This supports the + * hashtable algoritm where hashcode collisions are resolved by + * placing later items compactly cyclically after their hashcode + * index. When an item is removed from the table, its slot is set as a + * HV_HOLE slot instead of being cleared. + * + * \related HashVector + */ +#define HV_HOLE ((void*) 1) + +/** + * \brief Find the keyed item. + * + * \param hv is the \ref HashVector concerned. + * + * \param key is the key to find. + * + * \returns the item if any or \b 0 if none. + * + * \related HashVector + */ +void *HashVector_find(HashVector *hv,void *key); + +/** + * \brief Scan the table for th next key-matching item at or after the + * given index. + * + * \param hv is the \ref HashVector concerned. + * + * \param index is a pointer to the index to advance. + * \ + * \param key is the query key + * + * \returns the next matching item, or \b 0 if none, with the index + * updated. + * + * \related HashVector + */ +extern void *HashVector_next(HashVector *hv,VectorIndex *i,void *key); + +/** + * \brief Add the given item into the \ref HashVector, growing it as + * needed to ensure that its fill+holes is no more than half the size. + * + * \param hv is the \ref HashVector concerned. + * + * \param item is the item to add. + * + * \returns \b 1 when an item is added, \b 0 when the item key is + * already present, and \b -1 upon OOM or ovefull \ref HashVector. + * + * Note that this function first locates any item of the same key, and + * then doesn't add if there is already an item that has the same key. + * + * \related HashVector + */ +extern int HashVector_add(HashVector *hv,void *item); + +/** + * \brief Delete the given item, and shrink the HashVector so that its + * size is at most double its fill, though at minimum a single index + * page corresponding to the \ref VectorVariant in use. + * + * \param hv is the \ref HashVector concerned. + * + * \param item is the item to delete. + * + * \returns \b 1 when the item gets deleted (by replacing its slot + * with a \ref HV_HOLE value), \b 0 if the HashVector has either no + * item or a different item for the item key, and \b -1 in case of OOM + * or overfull HashVector. + * + * Note that the contained item that has the same key as the provided + * item is deleted. + * + * \see ItemKeyFun + * + * \related HashVector + */ +extern int HashVector_delete(HashVector *hv,void *item); + +/** + * \brief Copy content items into a given empty \ref Vector. + * + * \param hv is the \ref HashVector concerned. + * + * \param variant is the desired Vector variant. + * + * \param pv is the optional \ref Vector to copy the content into. + * + * \returns the \ref Vector with the \ref HashVector content + * + * If \pv is null, then a new Vector is allocated and handed out for + * the caller to reclaim unless the HashVector is empty in which case + * this function returns 0. + * + * If \b pv is not null, it is first cleared, and then populated with + * the content from the HashVector. The populated Vector is returned. + * + * \related HashVector + */ +extern Vector *HashVector_contents( + HashVector *hv,enum VectorVariant variant,Vector *pv); + +/** + * \brief This is a utility function to compute and return a hashcode + * for a block of bytes. + * + * \param key is the start of the block. + * + * \param n is the Byte size of the block. + * + * \related HashVector + */ +extern unsigned long HashVector_hashcode(unsigned char *key,unsigned long n); + +/** + * \brief Create a \ref HashVector of given \ref VectorVariant + * for given \ref ItemKeyFun. + * + * \param variant is the \ref VectorVariant to use. + * + * \param type is the \ref ItemKeyFun to use. + * + * \returns the initialised \ref HashVector. + * + * The HashVector will be initialized with a single \ref VectorPage. + * + * \note The \ref HashVector record is allocated with malloc and the + * caller is responsible for free-ing the memory block when it's no + * longer needed. + * + * \related HashVector + */ +extern HashVector *HashVector_create( + enum VectorVariant variant,ItemKeyFun *type); + +#endif diff --git a/vector/ItemKeyFun.h b/vector/ItemKeyFun.h new file mode 100644 index 0000000..93c0200 --- /dev/null +++ b/vector/ItemKeyFun.h @@ -0,0 +1,62 @@ +#ifndef ItemKeyFun_H +#define ItemKeyFun_H + +#include + +/** + * \struct ItemKeyFun + * + * ItemKeyFun provides a meta-level representation for abstracting + * items as pairs of keys and payload, and having a hashcode mapping + * for keys. The key part is used for identifying items under the idea + * that all items that have the same key are the same; distinct + * representations of the same abstract entity. The hashcode scrambles + * the key part in a consistent way into an ideal table index for the + * items that have that key. Different keys may yield the same + * hashcode. + */ +typedef struct { + +#define SELF void *this + /** + * This callback function should return the hashcode of a key. The + * hashcode is used for indexing into the backing Vector for + * finding the an item via its key. The same key must map + * consistently to the same hashcode while the hashtable contains + * an item with that key. Different keys map map to the same + * hashcode, in which case the Vector placement is made at the + * first empty or hole slot following the hashcode index. + */ + unsigned long (*hashcode)(SELF,void *key); + + /** + * This callback function should determine whether an item has a + * given key or not. + */ + int (*haskey)(SELF,void *item,void *key); + + /** + * This callback function should return a (possibly temporary) key + * of an item. It can be anything (i.e., with or without internal + * structure) as it is treated as an identifier that other + * callbacks recognize. It is merely understood as a value in an + * identity domain. + */ + void *(*itemkey)(SELF,void *item); + + /** + * This callback function should handle a key obtained from the + * itemkey callback function, e.g., reclaim temporary allocation. + */ + void (*releasekey)(SELF,void *key); + + /** + * This callback function writes a representation of an item into + * a character buffer. + */ + int (*tostring)(SELF,void *item,char *buffer,int limit); + +#undef SELF +} ItemKeyFun; + +#endif diff --git a/vector/Makefile b/vector/Makefile index d5c718b..0f342d3 100644 --- a/vector/Makefile +++ b/vector/Makefile @@ -1,6 +1,8 @@ LIBRARY = libvector.a -LIBOBJS = vector.o hashvector.o -LIBOBJS += integeritem.o stringitem.o tupleitem.o relation.o +LIBOBJS = Vector.o HashVector.o +LIBOBJS += TupleSchema.o integeritem.o stringitem.o +LIBOBJS += Relation.o +LIBOBJS += BindingTable.o Query.o default: $(LIBRARY) diff --git a/vector/Query.c b/vector/Query.c new file mode 100644 index 0000000..54d413b --- /dev/null +++ b/vector/Query.c @@ -0,0 +1,389 @@ +#include +#include +#include +#include +#include + +enum next_state { + /** + * This state tells the "next" function that it should capture the + * incoming BindingTable state and provide the initial Binding of + * a new sucession of bindings. + */ + initial, + /** + * This state tells the "next" function that it should update the + * bidning table with a subsequent Binding in the current + * succession of bindings. + */ + subsequent, + /** + * This state tells the "next" function that it should just + * restore the Binding table to its incoming state. + */ + restore +}; + +//enum compound_state { reset, started, ended }; + +/** + * A struct query_callbacks record defines the callbacks for a + * specific query type. + */ +struct query_callbacks { + /** + * \brief Callback function to reclaim the query memory for a + * given query. + * + * \param this is the specific \ref query concerned. + * + * Ground queries recalim their own state memory. Composite + * queries first propagate the reclaim call to its components, and + * thereafter reclaim their local state memory. + */ + void (*reclaim)(query *this); + void (*start)(query *this,BindingTable *bt,enum next_state s); + /** + * \brief Callback function to update the Binding table with a + * succession of alternative bindings. + * + * \param this is the specific \ref query concerned. + * + * \param bt is the Binding table to set bindings in. + * + * \param s is the call "sub-command" for the function. + * + * \returns 1 if a new Binding is provided and 0 otherwise. + * + * This function is called repeatedly for successively obtaining + * the alternative bindings that satisfy the query. The "initial" + * state sub-command tells the function to capture the incoming + * BindingTable state so that the function can later restore it + * upon the "restore" sub-command. Upon the "initial" command, the + * function also sets up the Binding table with its first Binding + * alternative. This is followed by a series of "subsequent" + * sub-command calls for the function to change the BindingTable + * for its succession of Binding alternatives. The function should + * return 1 after any successful Binding setup, and return 0 when + * it cannot setup any (more) Binding. + */ + int (*next)(query *this,BindingTable *bt,enum next_state state); +}; + +/* ==================== AssignmentQuery ==================== */ +typedef struct { + struct query_callbacks *def; + int arity; + tuple *names; + tuple *values; + tuple *saved; +} AssignmentQuery; + +// Release any memory. +static void AssignmentQuery_reclaim(query *this) { + AssignmentQuery *q = (AssignmentQuery*) this; + free( q->saved ); + free( this ); +} + +static int AssignmentQuery_check(int arity,tuple *a,tuple *b) { + int i; + for ( i = 0; i < arity; i++ ) { + char *value = (*a)[i]; + char *current = (*b)[i]; + if ( value && current && current != value && + strcmp( current, value ) != 0 ) { + return 0; + } + } + return 1; +} + +static void AssignmentQuery_assign( + BindingTable *bt,int n,tuple *names,tuple *values,int all) +{ + int i; + for ( i = 0; i < n; i++ ) { + if ( all || (*values)[i] ) { + BindingTable_set( bt, (*names)[i], (*values)[i] ); + } + } +} + +// Make names have given values and return 1. If any name has a +// different value then return 0. Values are strings. +static int AssignmentQuery_next( + query *this,BindingTable *bt,enum next_state state) { + AssignmentQuery *q = (AssignmentQuery*) this; + switch ( state ) { + case initial: + if ( q->saved == 0 ) { + q->saved = (tuple*) malloc( q->arity * sizeof( void* ) ); + } + memcpy( q->saved, q->names, q->arity * sizeof( void* ) ); + BindingTable_deref( bt, q->arity, q->saved ); + // Check with new values + if ( AssignmentQuery_check( q->arity, q->values, q->saved ) ) { + AssignmentQuery_assign( bt, q->arity, q->names, q->values, 0 ); + return 1; + } + // Fall through + case subsequent: + case restore: + if ( q->saved ) { + AssignmentQuery_assign( bt, q->arity, q->names, q->saved, 1 ); + free( q->saved ); + q->saved = 0; + } + return 0; + } + return 0; +} + +static struct query_callbacks AssignmentQuery_def = { + .reclaim = AssignmentQuery_reclaim, + .next = AssignmentQuery_next +}; + +/** + * Return a query object representing an assignment of one or more + * variables. + */ +query *query_assign(int arity,tuple *names,tuple *values) { + AssignmentQuery *q = (AssignmentQuery*) + malloc( sizeof( AssignmentQuery ) ); + (*q) = (AssignmentQuery) { + .def = &AssignmentQuery_def, + .arity = arity, + .names = names, + .values = values, + .saved = 0 + }; + return (query*) q; +} + +/* ==================== conjunction ==================== */ +typedef struct { + struct query_callbacks *def; + int active; + int size; + query **conjuncts; +} ConjunctionQuery; + +static void ConjunctionQuery_reclaim(query *this) { + ConjunctionQuery *q = (ConjunctionQuery*) this; + int i; + for ( i = 0; i < q->size; i++ ) { + q->conjuncts[i]->def->reclaim( q->conjuncts[i] ); + } + free( q->conjuncts ); + free( this ); +} + +static int ConjunctionQuery_next( + query *this,BindingTable *bt,enum next_state state) +{ + ConjunctionQuery *q = (ConjunctionQuery*) this; + int i = q->size - 1; + enum next_state s = subsequent; + switch ( state ) { + case initial: + q->active = 1; + i = 0; + s = initial; + // Fall through? + case subsequent: + while ( i < q->size ) { + if ( q->conjuncts[i]->def->next( q->conjuncts[i], bt, s ) ) { + continue; + } + q->conjuncts[i]->def->next( q->conjuncts[i], bt, restore ); + if ( i-- == 0 ) { + // The first conjunct now exhausted + q->active = 0; + return 0; + } + s = subsequent; + } + return 1; + case restore: + if ( q->active ) { + for ( ; i >= 0; i-- ) { + q->conjuncts[i]->def->next( q->conjuncts[i], bt, restore ); + } + } + q->active = 0; + return 0; + } + return 0; +} + +static struct query_callbacks ConjunctionQuery_def = { + .reclaim = ConjunctionQuery_reclaim, + .next = ConjunctionQuery_next +}; + +query *query_and(int n,...) { + va_list args; + ConjunctionQuery *q = (ConjunctionQuery *) + malloc( sizeof( ConjunctionQuery ) ); + (*q) = (ConjunctionQuery) { + .def = &ConjunctionQuery_def, + .active = 0, + .size = n, + .conjuncts = (query**) malloc( n * sizeof( query* ) ) + }; + int i; + va_start( args, n ); + for ( i = 0; i < n; i++ ) { + q->conjuncts[i] = va_arg( args, query* ); + } + va_end( args ); + return (query*) q; +} + +/* ==================== disjunction ==================== */ +typedef struct { + struct query_callbacks *def; + int index; + int size; + query **disjuncts; +} DisjunctionQuery; + +static void DisjunctionQuery_reclaim(query *this) { + DisjunctionQuery *q = (DisjunctionQuery*) this; + int i; + for ( i = 0; i < q->size; i++ ) { + q->disjuncts[i]->def->reclaim( q->disjuncts[i] ); + } + free( q->disjuncts ); + free( this ); +} + +static int DisjunctionQuery_next( + query *this,BindingTable *bt,enum next_state state) { + DisjunctionQuery *q = (DisjunctionQuery*) this; + int i = q->index; + enum next_state s = subsequent; + switch ( state ) { + case initial: + i = 0; + s = initial; + case subsequent: + for ( ; i < q->size; i++ ) { + if ( q->disjuncts[i]->def->next( q->disjuncts[i], bt, s ) ) { + q->index = i; + return 1; + } + q->disjuncts[i]->def->next( q->disjuncts[i], bt, restore ); + s = initial; + } + q->index = -1; + return 0; + case restore: + if ( i >= 0 ) { + q->disjuncts[i]->def->next( q->disjuncts[i], bt, restore ); + q->index = -1; + } + return 0; + } + return 0; +} + +static struct query_callbacks DisjunctionQuery_def = { + .reclaim = DisjunctionQuery_reclaim, + .next = DisjunctionQuery_next, +}; + +query *query_or(int n,...) { + va_list args; + DisjunctionQuery *q = (DisjunctionQuery *) + malloc( sizeof( DisjunctionQuery ) ); + (*q) = (DisjunctionQuery) { + .def = &DisjunctionQuery_def, + .index = -1, + .size = n, + .disjuncts = (query**) malloc( n * sizeof( query* ) ), + }; + int i; + va_start( args, n ); + for ( i = 0; i < n; i++ ) { + q->disjuncts[i] = va_arg( args, query* ); + } + va_end( args ); + return (query*) q; +} + +/* ==================== Relation access ==================== */ +typedef struct { + struct query_callbacks *def; + Relation *rel; + VectorIndex index; + tuple *names; + tuple *values; + tuple *saved; +} RelationQuery; + +// Release any memory. +static void RelationQuery_reclaim(query *this) { + RelationQuery *q = (RelationQuery*) this; + free( q->saved ); + free( this ); +} + +// Make names have given values and return 1. If any name has a +// different value then return 0. Values are strings. +static int RelationQuery_next( + query *this,BindingTable *bt,enum next_state state) { + RelationQuery *q = (RelationQuery*) this; + int arity = ((TupleSchema*) q->rel->content.type)->arity; + VectorIndex index = q->index + 1; + switch ( state ) { + case initial: + if ( q->saved == 0 ) { + q->saved = (tuple*) malloc( arity * sizeof( void* ) ); + } + memcpy( q->saved, q->names, arity * sizeof( void* ) ); + BindingTable_deref( bt, arity, q->saved ); + index = 0; + // Fall through + case subsequent: + for ( ; index < q->rel->content.table.size; index++ ) { + tuple *values = relation_next( q->rel, &index, q->values ); + if ( values ) { + q->index = index; + AssignmentQuery_assign( bt, arity, q->names, values, 1 ); + return 1; + } + } + case restore: + if ( q->saved ) { + AssignmentQuery_assign( bt, arity, q->names, q->saved, 1 ); + free( q->saved ); + q->saved = 0; + } + return 0; + } + return 0; +} + +static struct query_callbacks RelationQuery_def = { + .reclaim = RelationQuery_reclaim, + .next = RelationQuery_next +}; + +/** + * Return a query object representing an Relation of one or more + * variables. + */ +query *query_Relation(Relation *r,tuple *names,tuple *values) { + RelationQuery *q = (RelationQuery*) malloc( sizeof( RelationQuery ) ); + (*q) = (RelationQuery) { + .def = &RelationQuery_def, + .rel = r, + .names = names, + .values = values, + .saved = 0 + }; + return (query*) q; +} diff --git a/vector/Query.h b/vector/Query.h new file mode 100644 index 0000000..17bbe38 --- /dev/null +++ b/vector/Query.h @@ -0,0 +1,53 @@ +#ifndef query_H +#define query_H + +#include +#include + +struct query_callbacks; + +/** + * A query is an implementation of a generic ABI over relations. It's + * more or less a "virtual Relation" representing a logical composite + * access of actual relations, and as such its active part is held in + * a separate \ref querytype record similar to how \ref ItemKeyFun + * records define valye types. + */ +typedef struct { + struct query_callbacks *def; +} query; + +/** + * \brief Creates an assignment query. + * + * \param arity is the assignment tuple arity. + * + * \param names is the (char*) names to assign. + * + * \param values is the (void*) values to asign. + * + * The two tuples must have the same arity for assigning names[i] to + * values[i]. This query makes the given names have the given values, + * once for each (*next) call following a (*reset) call. + * + * \related query + */ +extern query *query_assign(int arity,tuple *names,tuple *values); + + + +typedef struct { + query base; + Relation *r; + tuple *names; + tuple *query; +} lookup_query; + +/** + * \brief Create a lookup_query record for lookup data in a Relation. + */ +extern query *query_lookup(Relation *r,...); + + + +#endif diff --git a/vector/Relation.c b/vector/Relation.c new file mode 100644 index 0000000..f72f198 --- /dev/null +++ b/vector/Relation.c @@ -0,0 +1,143 @@ +#include +#include +#include + +Relation *relation_create(TupleSchema *schema) { + Relation *r = (Relation *) malloc( sizeof( Relation ) ); + (*r) = (Relation) { + .content = (HashVector) { + .table = (Vector) { + .variant = Nibble_index_levels, + .size = 16, + .entries = 0 + }, + .fill = 0, .holes = 0, .type = (ItemKeyFun*) schema + }, + .constraints = (Vector) { + .variant = single_index_level, + .size = 0, + .entries = 0 + }, + }; + return r; +} + +#define COPYA(T,P,N) (T*) memcpy( malloc( N * sizeof(T) ), P, N * sizeof( T ) ) +#define COPY(T,P) COPYA(T,P,1) + +// Add an indexing HashVector to the Relation using the given column +// flags with 1 indicating key column and 0 indicating value column. +int relation_add_constraint(Relation *r,...) { + va_list ap; + TupleSchema *ts = (TupleSchema *) r->content.type; + tuple *columns = (tuple*) calloc( ts->arity, sizeof( void* ) ); + int i = 0; + va_start( ap, r ); + for ( ; i < ts->arity; i++ ) { + if ( va_arg( ap, int ) ) { + (*columns)[i] = ts->columns[i]; + } + } + va_end( ap ); + ts = TupleSchema_create( ts->arity, columns ); + i = (int) r->constraints.size; + Vector_append( + &r->constraints, + HashVector_create( Nibble_index_levels, (ItemKeyFun*) ts ) ); + return i; +} + +//============== Adding an item ============= +// Iteration context for adding or querying a Relation +typedef struct { + Relation *rel; + HashVector knockouts; + void *item; +} Knockout; + +// Determine matches to ((Knockout*)data)->key in +// (HashVector*)item, optionally using ((Knockout*)data)->columns +// for ignoring full matches to the key tuple. +static int knockout_check(VectorIndex index,void *item,void *data) { + Knockout *kod = (Knockout*) data; + VectorIndex i = 0; + for ( ; i < ((HashVector*) item)->table.size; i++ ) { + void *old = HashVector_next( (HashVector*) item, &i, kod->item ); + if ( old ) { + HashVector_add( &kod->knockouts, old ); + } + } + return 0; +} + +// delete the (tuple*)item from the (HashVector*)data +static int knockout_delete(VectorIndex index,void *item,void *data) { + HashVector_delete( (HashVector*) item, data ); + return 0; +} + +// add the (tuple*)data to the (HashVector*)item +static int knockout_add(VectorIndex index,void *item,void *data) { + HashVector_add( (HashVector*)item, data ); + return 0; +} + +// Find and remove all collisions for a query, unless "add" is +// non-zero in which case the function aborts if there is any match in +// the main content. +static int knockout_clear(Knockout *this,Relation *r,tuple *item,int add) { + (*this) = (Knockout) { + .rel = r, + .knockouts = { + .table = { + .variant = Nibble_index_levels, .size = 16, .entries = 0 + }, + .fill = 0, .holes = 0, .type = r->content.type, + }, + .item = item + }; + knockout_check( 0, &r->content, this ); + if ( add ) { + if ( this->knockouts.fill > 0 ) { + return 0; + } + // Find all constraint knockouts for addition + Vector_iterate( &r->constraints, 0, knockout_check, this ); + } + if ( this->knockouts.fill > 0 ) { + // Delete them from all tables + VectorIndex i; + for ( i = 0; i < this->knockouts.table.size; i++ ) { + void *t = HashVector_next( &this->knockouts, &i, 0 ); + if ( t ) { + HashVector_delete( &r->content, t ); + Vector_iterate( &r->constraints, 0, knockout_delete, t ); + } + } + } + return 1; +} + +// add a tuple to a Relation and return a Vector of knocked out +// tuples, if any, or 0 otherwise. +Vector *relation_add(Relation *r,tuple *item) { + Knockout data; + if ( knockout_clear( &data, r, item, 1 ) ) { + // Add the new tuple + HashVector_add( &r->content, item ); + Vector_iterate( &r->constraints, 0, knockout_add, item ); + return HashVector_contents( &data.knockouts, single_index_level, 0 ); + } + return 0; +} + +Vector *relation_delete(Relation *r,tuple *item) { + Knockout data; + (void) knockout_clear( &data, r, item, 0 ); + return HashVector_contents( &data.knockouts, single_index_level, 0 ); +} + +void *relation_next(Relation *r,VectorIndex *index,tuple *query) { + return HashVector_next( &r->content, index, query ); +} + diff --git a/vector/Relation.h b/vector/Relation.h new file mode 100644 index 0000000..b3c2be7 --- /dev/null +++ b/vector/Relation.h @@ -0,0 +1,164 @@ +#ifndef relation_H +#define relation_H + +#include +#include + +/** + * A Relation is an implementation of a tuple set with (optional) key + * constraints. The store is a \ref HashVector whose \b type is a \ref + * TupleSchema that defines the columns. The key constraints are + * represented as additional \ref HashVector "HashVectors" whose \ref + * TupleSchema "TupleSchemas" are clones of the column schema with + * some columns excluded. + * + * \extends HashVector + */ +typedef struct { + /** + * This is the primary content store for the Relation. Its type + * should be a TupleSchema declaring the "item types" for the + * Relation columns. + */ + HashVector content; + + /** + * This is a collection of relational constraints, if any, which + * are represented as HashVectors whose TupleSchemas are clones of + * the content TupleSchema with some columns excluded. + */ + Vector constraints; +} Relation; + +/** + * \brief Create a Relation for the given TupleSchema. + * + * \param schema is the column schema + * + * \returns the allocated Relation record. + * + * The given TupleSchema is set up as the type of the content + * HashVector, which also is initialised as a Nibble_index_levels + * variant Vector. + * + * \related Relation + */ +extern Relation *relation_create(TupleSchema *schema); + +/** + * \brief Add a key constraint to a \ref Relation. + * + * \param r is the Relation concerned. + * + * \param ... are the column flags indicating key (1) or value (0) + * column for all columns. + * + * \returns the index into the constraints \ref Vector for the added + * constraint. + * + * This function adds a \ref HashVector with a \ref TupleSchema as its + * item type cloned from the content type and then modified to + * represent the constraint. Namely that the key columns have their + * "column type" set while value columsn are reset. + * + * The \b constraint \ref HashVectors are used when \ref tuple + * "tuples" are added to the \ref Relation so as to identify the + * already contained \ref tuple "tuples" that contradict the addition + * by means of having the same constraint key. The already contained + * \ref tuple "tuples" are then "knocked out" from the Relation by the + * new addition. + * + * \see relation_add + * \related Relation + */ +extern int relation_add_constraint(Relation *r,...); + +/** + * \brief Add the tuple to the Relation. + * + * \param r is the \ref Relation concerned. + * + * \param t is the \ref tuple to add. + * + * \returns a Vector of all knocked out tuples. + * + * This function adds the \ref tuple \b t to the \ref Relation \b r, + * and it returns a \ref Vector (single_index_level variant) of all + * same-key constraint tuples. The returned Vector is malloc-ed and it + * must be free-ed by the caller. If the tuple is already contained or + * there are no other same-key tuples knocked out, then \b 0 is + * returned. + * + * \related Relation + */ +extern Vector *relation_add(Relation *r,tuple *t); + +/** + * \brief Delete all tuples matching to the query \ref tuple fromt the + * \ref Relation. + * + * \param r is the \ref Relation concerned. + * + * \param t is the \ref tuple to delete. + * + * \returns a \Vector Vector of all knocked out tuples, i.e. the + * same-key tuples, if any, contained in the Relation + * + * Note that deletion uses a "query" tuple, which means that some + * columns may be null to mark that them match to any value. + * + * \related Relation + */ +extern Vector *relation_delete(Relation *r,tuple *query); + +/** + * \brief Return the next \ref tuple in the \ref Relation that matches + * to the query \ref tuple, at or after the index. + * + * \param r is the \ref Relation concerned. + * + * \param index is a pointer to the \ref Vector index to update. + * + * \param query is a query \tuple tuple for selection of certain + * column values. + * + * \returns any such matching \tuple tuple and an updateed *index. + * + * \related Relation + */ +extern void *relation_next(Relation *r,VectorIndex *index,tuple *query); + +/** + * \brief Lay out a dynamic \ref Relation initializer for a Relation + * wth the given column "types". + * + * This defines a \ref Relation intializer that creates the \ref + * TupleSchema for the given columns. + * + * \note The initializer cannot be used statically. + * + * The \b content \ref HashVector is a \ref Nibble_index_level variant + * with an initial size of 16 slots. + * + * The constraints \ref Vector is a \ref BitPair_index_level variant + * with initial size 0. + * + * The \b content \ref HashVector \b type is set up with an allocated + * \ref TupleSchema that has an allocated \ref tuple that declares the + * column "types" view the given \ref ItemKeyFun pointers. Any add + * constraints will need to clone that \ref TupleSchema and then clear + * the column slots for the constraint value columns, typically by + * using \ref TupleSchema_mask for this. + * + * \related Relation + */ +#define RELATION(...) (Relation) { \ + .content = { \ + .table = { .variant = Nibble_index_levels, .size=16, .entries=0 }, \ + .fill = 0, .holes = 0, \ + .type = (ItemKeyFun*) TUPLESCHEMA( __VA_ARGS__ ) \ + }, \ + .constraints = { .variant = BitPair_index_levels, .size=0, .entries=0 } \ +} + +#endif diff --git a/vector/TupleSchema.c b/vector/TupleSchema.c new file mode 100644 index 0000000..378ac9a --- /dev/null +++ b/vector/TupleSchema.c @@ -0,0 +1,185 @@ +#include +#include +#include +#include + +#define COLUMN def->columns + +/** + * This callback function returns the hashcode of a key. + * + * \param this is a pointer to the ItemKeyFun record from where this + * callback got invoked + * + * \param key is the key to produce a hascode for + * + * \returns the hashcode which is a VectorIndex (i.e. unsigned long) + * + * The hashcode is used for indexing into the backing Vector for + * finding the an item via its key. The same key must map consistently + * to the same hashcode while the hashtable contains an item with that + * key. Different keys map map to the same hashcode, in which case the + * Vector placement is made at the first empty or hole slot following + * the hashcode index. + */ +static unsigned long TupleSchema_hashcode(void *this,void *key) { + TupleSchema *def = (TupleSchema *) this; + tuple *kp = (tuple*) key; + int i = 0; + unsigned long value = 5381; + for ( ; i < def->arity; i++ ) { + if ( COLUMN[i] ) { + value <<= 3; + if ( (*kp)[i] ) { + value += COLUMN[i]->hashcode( COLUMN[i], (*kp)[i] ); + } + } + value += 17; + } + return value; +} + +/** + * This callback function determines whether an item has a + * given key or not. + */ +static int TupleSchema_haskey(void *this,void *item,void *key) { + TupleSchema *def = (TupleSchema *) this; + tuple *kp = (tuple*) key; + tuple *tp = (tuple*) item; + int i = 0; + for ( ; i < def->arity; i++ ) { + if ( COLUMN[i] && (*kp)[i] ) { + if ( COLUMN[i]->haskey( COLUMN[i], (*tp)[i], (*kp)[i] ) == 0 ) { + return 0; + } + } + } + return 1; +} + + +/** + * This callback function returns the key of an item by considering + * the arity and mask. + */ +static void *tupleitem_itemkey(void *this,void *item) { + TupleSchema *def = (TupleSchema *) this; + tuple *tp = (tuple*) item; + int i; + int keylen = def->arity; + void **parts = calloc( keylen, sizeof( void* ) ); + for ( i = 0; i < def->arity; i++ ) { + if ( COLUMN[i] ) { + parts[i] = COLUMN[i]->itemkey( COLUMN[i], (*tp)[i] ); + } + } + return (void*) parts; +} + +/** + * This callback function handles a key obtained from the itemkey + * callback function to reclaim temporary allocation. + */ +static void tupleitem_releasekey(void *this,void *key) { + TupleSchema *def = (TupleSchema *) this; + tuple *kp = (tuple*) key; + int i; + for ( i = 0; i < def->arity; i++ ) { + if ( COLUMN[i] ) { + COLUMN[i]->releasekey( COLUMN[i], (*kp)[i] ); + } + } + free( key ); +} + +#define OUT(X) a = X; if ( a > limit ) return 0; buffer += a; limit -= a + +/** + * This callback function writes a representation of an item into + * a character buffer. + */ +static int tupleitem_tostring(void *this,void *item,char *buffer,int limit) { + TupleSchema *def = (TupleSchema *) this; + tuple *t = (tuple*) item; + char *x = "<"; + int a, i; + for ( i = 0; i < def->arity; i++ ) { + OUT( snprintf( buffer, limit, x ) ); + x = ","; + OUT( def->columns[i]->tostring( + def->columns[i], (*t)[i], buffer, limit ) ); + } + OUT( snprintf( buffer, limit, ">" ) ); + return a; +} + + +// Allocate +tuple *tuple_create(int arity,...) { + va_list ap; + int i; + tuple *t = (tuple *)malloc( arity * sizeof( void* ) ); + va_start( ap, arity ); + for ( i = 0; i < arity; i++ ) { + (*t)[i] = va_arg( ap, void* ); + } + va_end( ap ); + return t; +} + +tuple *tuple_clone(int arity,tuple *t) { + tuple *ct = (tuple *)malloc( arity * sizeof( void* ) ); + memcpy( ct, t, arity * sizeof( void* ) ); + return ct; +} + +ItemKeyFun TupleSchema_callbacks = { + .hashcode = TupleSchema_hashcode, + .haskey = TupleSchema_haskey, + .itemkey = tupleitem_itemkey, + .releasekey = tupleitem_releasekey, + .tostring = tupleitem_tostring +}; + +TupleSchema *TupleSchema_create(int arity,tuple *columns) { + TupleSchema *ts = (TupleSchema*) malloc( sizeof( TupleSchema ) ); + (*ts) = (TupleSchema) { + .base = TupleSchema_callbacks, + .arity = arity, + .columns = (ItemKeyFun**) columns + }; + return ts; +} + +#define COPYA(T,P,N) (T*) memcpy( malloc( N * sizeof(T) ), P, N * sizeof( T ) ) +#define COPY(T,P) COPYA(T,P,1) + +// Duplicate a TupleSchema with optionally some columns reset. +TupleSchema *TupleSchema_mask(TupleSchema *schema,...) { + TupleSchema *masked = COPY(TupleSchema,schema); + masked->columns = COPYA( ItemKeyFun*, schema->columns, schema->arity ); + va_list ap; + int i; + va_start( ap, schema ); + for ( ;; ) { + i = va_arg( ap, int ); + if ( i < 0 || i >= schema->arity ) { + break; + } + masked->columns[i] = 0; + }; + va_end( ap ); + return masked; +} + +unsigned long tuple_mask(int arity,tuple *t) { + unsigned long mask = 0; + while ( arity-- > 0 ) { + mask <<= 1; + if ( (*t)[ arity ] ) { + mask++; + } + } + return mask; +} diff --git a/vector/TupleSchema.h b/vector/TupleSchema.h new file mode 100644 index 0000000..4f5a544 --- /dev/null +++ b/vector/TupleSchema.h @@ -0,0 +1,100 @@ +#ifndef tupleitem_H +#define tupleitem_H + +#include + +/** + * A tuple is an array of void* + */ +typedef void *tuple[]; + +/** + * A TupleSchema record declares the ItemKeyFun functions for tuple + * items together with applicable arity and domain combinations. + * Records are created dynamically via the \ref TupleSchema_create + * function or the \ref TUPLESCHEMA convenience macro. + * + * \extends ItemKeyFun + */ +typedef struct { + /** + * These are the ItemKeyFun callback functions to support + * HashVector use for tuple items. The functions expects their + * ItemKeyFun pointer to be within a TupleSchema record so as to + * provide the handling of the tuple columns. + */ + ItemKeyFun base; + + /** + * This is the number of columns in a tuple. + */ + int arity; + + /** + * This points to an array of pointers to the tuple item domains + * as represented by their associated ItemKeyFun records. + */ + ItemKeyFun **columns; +} TupleSchema; + +/** + * Create a tuples with given values. + * + * \related TupleSchema + */ +extern tuple *tuple_create(int arity,...); + +/** + * Create a tuples with given values. + * + * \related TupleSchema + */ +extern TupleSchema *TupleSchema_create(int arity,tuple *columns); + +/** + * Copy the given TupleSchema into a new TupleSchema with some columns + * masked. This represents a sub-index type using the unmasked columns + * for indexing. + * + * \related TupleSchema + */ +extern TupleSchema *TupleSchema_mask(TupleSchema *schema,...); + +/** + * \brief Return 1/0 to indicate whether the query matches the item. + * + * \related TupleSchema + */ +extern int TupleSchema_match(TupleSchema *def,tuple *query,tuple *item); + +/** + * \brief Generic macro to determine the number of expressions in a + * __VA_ARGS__ + * + * \related TupleSchema + */ +#define NUMARGS(...) (sizeof((void*[]){__VA_ARGS__})/sizeof(void*)) + +/** + * \brief Create a tuple with the given values. + * + * This invokes \ref tuple_create to allocate and assign a void* + * array with the given values. + * + * \related TupleSchema + */ +#define TUPLE(...) tuple_create( NUMARGS(__VA_ARGS__), __VA_ARGS__ ) + +/** + * \brief Create a \ref TupleSchema with the given column "types". + * + * This invokes \ref TupleSchema_create to allocate and initialize a + * \ref TupleSchema for the given columns via the \ref TUPLE macro. + * + * \related TupleSchema + */ +#define TUPLESCHEMA(...) \ + TupleSchema_create( NUMARGS( __VA_ARGS__ ), TUPLE( __VA_ARGS__ ) ) + + +#endif diff --git a/vector/Vector.c b/vector/Vector.c new file mode 100644 index 0000000..31b4065 --- /dev/null +++ b/vector/Vector.c @@ -0,0 +1,710 @@ +#include +#include +#include +#include + +/** + * Representing a Vector of void* accessible via an indexing structure + * as levels of same-size pages. A "VectorPage" is a contiguous array + * void*, and an index is "unsigned long" (64 bits). + */ + +/** ============================================================ **/ + +static int VECTOR_BITS[4] = { 8, 4, 2, 64 }; + +typedef struct { + unsigned int a:2; + unsigned int b:2; + unsigned int c:2; + unsigned int d:2; +} BitPair; + +typedef struct { + unsigned int a:4; + unsigned int b:4; +} Nibble; + +typedef struct { + unsigned int a:8; +} Byte; + +/** + * Return the index part for the given level of the Vector's leveling + * variant. + * + * The Vector variant indicates whether indexing uses 8, 4, or 2 bits + * per level. + */ +unsigned long VECTOR_INDEX_PART(Vector *pv,VectorIndex *index, int level) { + unsigned char *px = (unsigned char *) index; + switch ( pv->variant ) { + case byte_index_levels: { + Byte *pp = (Byte*)(px + level); + return pp->a; + } + case Nibble_index_levels: { + Nibble *pp = (Nibble*)( px + ( level / 2 ) ); + switch ( level & 1 ) { + case 0: return pp->a; + case 1: return pp->b; + } + break; + } + case BitPair_index_levels: { + BitPair *pp = (BitPair*)( px + ( level / 4 ) ); + switch ( level & 3 ) { + case 0: return pp->a; + case 1: return pp->b; + case 2: return pp->c; + case 3: return pp->d; + } + break; + } + case single_index_level: + return (*index); + } + return 0; +} + +/** + * Increment the index part at the indivated level, cyclic but not + * carrying over to the upper level. Returns the new level index. + */ +static unsigned long VECTOR_INDEX_PART_INC( + Vector *pv,VectorIndex *index, int level) +{ + unsigned char *px = (unsigned char *) index; + switch ( pv->variant ) { + case byte_index_levels: { + Byte *pp = (Byte*)( px + level ); + return ++(pp->a); + } + case Nibble_index_levels: { + Nibble *pp = (Nibble*)( px + ( level / 2 ) ); + switch ( level & 1 ) { + case 0: return ++(pp->a); + case 1: return ++(pp->b); + } + break; + } + case BitPair_index_levels: { + BitPair *pp = (BitPair*)( px + level / 4 ); + switch ( level & 3 ) { + case 0: return ++(pp->a); + case 1: return ++(pp->b); + case 2: return ++(pp->c); + case 3: return ++(pp->d); + } + break; + } + case single_index_level: + return ++(*index); + } + return 0; +} + +/** + * Decrement the index part at the indicated level, cyclic but not + * carrying over to the upper level. Returns the prior level index. + */ +static unsigned long VECTOR_INDEX_PART_DEC( + Vector *pv,VectorIndex *index, int level) +{ + unsigned char *px = (unsigned char *) index; + switch ( pv->variant ) { + case byte_index_levels: { + Byte *pp = (Byte*)( px + level ); + return (pp->a)--; + } + case Nibble_index_levels: { + Nibble *pp = (Nibble*)( px + ( level / 2 ) ); + switch ( level & 1 ) { + case 0: return (pp->a)--; + case 1: return (pp->b)--; + } + break; + } + case BitPair_index_levels: { + BitPair *pp = (BitPair*)( px + level / 4 ); + switch ( level & 0xf ) { + case 0: return (pp->a)--; + case 1: return (pp->b)--; + case 2: return (pp->c)--; + case 3: return (pp->d)--; + } + break; + } + case single_index_level: + return (*index)--; + } + return 0; +} + +#define ONES (~((VectorIndex) 0)) + +// Set index to first value for all index parts at level and lower. +static void VECTOR_INDEX_FIRST(Vector *pv,VectorIndex *index, int level) { + (*index) &= ONES << ( VECTOR_BITS[ pv->variant ] * level ); +} + +// Set index to last value for all index parts at level and lower. +static void VECTOR_INDEX_LAST(Vector *pv,VectorIndex *index, int level) { + static unsigned long ones[] = { 255, 15, 3 }; + unsigned long x = ones[ pv->variant ]; + while ( level-- ) { + (*index) |= x; + x <<= VECTOR_BITS[ pv->variant ]; + } + // 255, 25, 3 + //(*index) |= ONES >> ( 64 - ( VECTOR_BITS[ pv->variant ] * level ) ); +} + +// Return number of slots for a Vector variant. +unsigned long VECTOR_SLOTS(Vector *pv) { + switch ( pv->variant ) { + case byte_index_levels: return 256; + case Nibble_index_levels: return 16; + case BitPair_index_levels: return 4; + case single_index_level: return pv->size; + } + return 0; +} + +// The number of levels to span Vector pv wrt its size and variant +static unsigned int Vector_levels(Vector *pv,unsigned int size) { + if ( size < 4 ) { + return 1; + } + switch ( pv->variant ) { + case byte_index_levels: return ((int)(log2( size - 1 ) / 8)) + 1; + case Nibble_index_levels: return ((int)(log2( size - 1 ) / 4)) + 1; + case BitPair_index_levels: return ((int)(log2( size - 1 ) / 2)) + 1; + case single_index_level: return 1; + } + return 0; +} + +/** ============================================================ **/ + +/** + * Advances a Vector index to the next used slot at or below the + * given level, starting from the indexed entry (inclusive) and up. + * The function will free any empty pages it discovers, and then + * update the index slots accordingly. The given index is advanced + * cyclically to match the found slot. The function returns a slot + * pointer to the used slot, if any, and 0 otherwise. + * The last parameter is a flag that gets set when the scanning is + * partial (i.e. not the whole index page). + */ +static void **Vector_level_next_used( + Vector *pv, + VectorPage *page, + VectorIndex *index, + int level, + int *partial ) +{ + void **p = (void**)&(*page)[ VECTOR_INDEX_PART( pv, index, level ) ]; + if ( VECTOR_INDEX_PART( pv, index, level ) != 0 ) { + *partial = 1; + } + for( ; *index < pv->size; p++ ) { + if ( *p ) { + if ( level == 0 ) { + return p; // This is a used entry + } + // *p is an index that needs to be inspected recursively + int w = 0; + void **x = Vector_level_next_used( pv, *p, index, level - 1, &w ); + if ( x ) { + return x; // Used slot was found; return it. + } + // If the page *p is all empty, so can/should be reclaimed. + if ( w == 0 ) { + free( *p ); + *p = 0; + } else { + *partial = 1; + } + } else { + if ( level > 0 ) { + VECTOR_INDEX_FIRST( pv, index, level - 1 ); + } + } + if ( VECTOR_INDEX_PART_INC( pv, index, level ) == 0 ) { + break; // cycling this level => nothing found + } + } + return 0; +} + +// Find the next used slot at given index or later. Returns pointer to +// the slot. This allows for a reclaim function that may reclaim slot +// items on the way to next used slot. +void **Vector_next_used(Vector *pv,VectorIndex *index) { + if ( pv->entries == 0 || *index >= pv->size ) { + *index = pv->size; + return 0; + } + int levels = Vector_levels( pv, pv->size ); + int partial = 0; + do { + void **slot = Vector_level_next_used( + pv, pv->entries, index, levels - 1, &partial ) ; + if ( slot == 0 ) { + break; // reached the end of the Vector + } + if ( *slot ) { + return slot; + } + } while ( ++(*index) < pv->size ); + if ( partial == 0 ) { + free( pv->entries ); + pv->entries = 0; + } + *index = pv->size; // reached the end of the Vector + return 0; +} + +/** + * Advances a Vector index to the prior used slot at or below the + * given level, starting from the indexed entry (inclusive) and down. + * The function will free any empty pages it discovers, and then + * update the index slots accordingly. The given index is advanced + * cyclically to match the found slot. The function returns a slot + * pointer to the used slot, if any, and 0 otherwise. + * The last parameter is a flag that gets set when the scanning is + * partial (i.e. not the whole index page). + */ +static void **Vector_level_prev_used( + Vector *pv, + VectorPage *page, + VectorIndex *index, + int level, + int *partial ) +{ + void **p = (void**)&(*page)[ VECTOR_INDEX_PART( pv, index, level ) ]; + if ( VECTOR_INDEX_PART( pv, index, level ) != VECTOR_SLOTS( pv ) - 1 ) { + *partial = 1; + } + do { + if ( *p ) { + if ( level == 0 ) { + return p; // This is a used entry + } + // *p is an index that needs to be inspected recursively + int w = 0; + void **x = Vector_level_prev_used( pv, *p, index, level - 1, &w ); + if ( x ) { + return x; // Used slot was found; return it. + } + // If the page *p is all empty, it can/should be reclaimed. + if ( w == 0 ) { + free( *p ); + *p = 0; + } else { + *partial = 1; + } + } else { + if ( level > 0 ) { + VECTOR_INDEX_LAST( pv, index, level ); + } + } + p--; + } while ( VECTOR_INDEX_PART_DEC( pv, index, level ) != 0 ); + return 0; +} + +// Find the next used slot at given index or later. Returns pointer to +// the slot. This allows for a reclaim function that may reclaim slot +// items on the way to next used slot. +void **Vector_prev_used(Vector *pv,VectorIndex *index) { + if ( pv->entries == 0 || *index >= pv->size ) { + *index = pv->size; + return 0; + } + int levels = Vector_levels( pv, pv->size ); + int partial = 0; + do { + void **slot = Vector_level_prev_used( + pv, pv->entries, index, levels - 1, &partial ) ; + if ( slot == 0 ) { + break; // reached the end of the Vector + } + if ( *slot ) { + return slot; + } + } while ( (*index)-- != 0 ); + if ( partial == 0 ) { + free( pv->entries ); + pv->entries = 0; + } + *index = pv->size; // reached the end of the Vector + return 0; +} + +// Reclaim tree of unused pages for a given level +static void Vector_reclaim(Vector *pv,VectorPage *page,unsigned int level) { + int i = 0; + if ( level > 0 ) { + for ( ; i < VECTOR_SLOTS( pv ); i++ ) { + if ( (*page)[i] ) { + Vector_reclaim( pv, (VectorPage *) (*page)[i], level - 1 ); + } + } + } + free( page ); +} + +// Resize Vector, using the reclaim function as needed, to handle any +// excess items or to veto the resize. Returns the index of the veto, +// if any, or <0 otherwise, with -1 indicating success and -2 +// indicating OOM +// +// Note that resizing may result in the introduction/removal of +// indexing levels and pages, so as to keep the leveling accurate for +// the size. +int Vector_resize( + Vector *pv,VectorIndex new_size, + int (*reclaim)(Vector *pv,VectorIndex index,void *item,void *data), + void *data ) +{ + struct { + int old; + int new; + } level = { + Vector_levels( pv, pv->size ), + Vector_levels( pv, new_size ) + }; + VectorPage *entries = 0; + if ( pv->entries == 0 ) { + pv->size = new_size; + return 0; + } + // A shrinking Vector might be veto-ed + if ( new_size < pv->size ) { + VectorIndex index = new_size; + void **slot = Vector_next_used( pv, &index ); + while ( slot ) { + if ( *slot && reclaim && reclaim( pv, index, *slot, data ) == 0 ) { + *slot = 0; + slot = Vector_next_used( pv, &index ); + continue; + } + return index; + } + // At this point we know that there are no slots used after + // the new_size size, so now it's time to remove and reclaim + // any superflouous top level pages. + if ( pv->variant == single_index_level ) { + // Follow Vector size using realloc + if ( new_size > 0 ) { + entries = (VectorPage*) + realloc( pv->entries, new_size * sizeof( void* ) ); + if ( entries == 0 ) { + return -2; // OOM + } + } + pv->entries = entries; + } else { + VectorPage **pp = &pv->entries; + int i = level.old; + while ( i-- > level.new ) { + if ( pp ) { + pp = (VectorPage **)(*pp); + } + } + if ( pp != &pv->entries ) { + entries = pv->entries; + if ( pp ) { + pv->entries = *pp; + *pp = 0; // Detach subtree + } else { + pv->entries = 0; + } + Vector_reclaim( pv, entries, level.old - 1 ); + } + if ( new_size == 0 && pv->entries ) { + free( pv->entries ); + pv->entries = 0; + } + } + } else { + // Vector is growing. Maybe insert levels. + if ( pv->variant == single_index_level ) { + // Follow Vector size using realloc + entries = (VectorPage *)realloc( + pv->entries, new_size * sizeof( void* ) ); + if ( entries == 0 ) { + return -2; // OOM + } + pv->entries = entries; + memset( &(*entries)[ pv->size ], 0, + ( new_size - pv->size ) * sizeof( void* ) ); + } else { + for ( ; level.old < level.new; level.old++ ) { + VectorPage *p = (VectorPage *) + calloc( VECTOR_SLOTS( pv ), sizeof( void* ) ); + if ( p == 0 ) { + return -2; // OOM + } + (*p)[0] = pv->entries; + pv->entries = p; + // Should maybe change the size to match the level? + // otherwise recovery from OOM is impossible + } + } + } + pv->size = new_size; + return -1; +} + +// Return pointer to the indexed page slot at the requested level, and +// adding intermediate index pages if so requested. Returns 0 if +// addition fails (OOM), or if not requested and page is missing. +void **Vector_access(Vector *pv,VectorIndex index,int level,int add) { + if ( index >= pv->size ) { + return 0; + } + void **page = (void**) &pv->entries; + int i = Vector_levels( pv, pv->size ); + while ( i-- > level ) { + if ( add && (*page) == 0 ) { + (*page) = calloc( VECTOR_SLOTS( pv ), sizeof( void* ) ); + } + page = (*page); + if ( page == 0 ) { + return 0; + } + page += VECTOR_INDEX_PART( pv, &index, i ); + } + return page; +} + +// Map index into a value slot +void **Vector_entry(Vector *pv,VectorIndex index) { + return Vector_access( pv, index, 0, 1 ); +} + +inline void Vector_set(Vector *pv,VectorIndex index,void *value) { + void **p = Vector_entry( pv, index ); + *p = value; +} + +// Set value at index but return the old value +void *Vector_get_set(Vector *pv,VectorIndex index,void *value) { + void **p = Vector_entry( pv, index ); + void *old = *p; + *p = value; + return old; +} + +inline void *Vector_get(Vector *pv,VectorIndex index) { + void **p = Vector_entry( pv, index ); + return *p; +} + +int Vector_free_any(Vector *pv,VectorIndex ix,void *item,void *data) { + free( item ); + return 0; +} + +int Vector_clear_any(Vector *pv,VectorIndex ix,void *item,void *data) { + return 0; +} + +void Vector_append(Vector *pv,void *value) { + Vector_resize( pv, pv->size + 1, 0, 0 ); + Vector_set( pv, pv->size - 1, value ); +} + +// copy block of n items from src[si] to dst[di] +// no efficiency hacks +void Vector_copy(Vector *dst,VectorIndex di, + Vector *src,VectorIndex si,VectorIndex n) { + if ( dst != src || di < si ) { + while ( n-- != 0 ) { + Vector_set( dst, di++, Vector_get( src, si++ ) ); + } + } else if ( di > si ){ + di += n - 1; + si += n - 1; + while ( n-- != 0 ) { + Vector_set( dst, di--, Vector_get( src, si-- ) ); + } + } +} + +Vector *Vector_clone(enum VectorVariant variant,Vector *src) { + Vector *dst = (Vector*) malloc( sizeof( Vector ) ); + (*dst) = (Vector) { .variant = variant, .size = 0, .entries = 0 }; + Vector_resize( dst, src->size, 0, 0 ); + Vector_copy( dst, 0, src, 0, src->size ); + return dst; +} + +void Vector_dump(Vector *pv, + void (*itemdump)(const VectorIndex,const void *)) { + VectorIndex index = 0; + for ( ; index < pv->size; index++ ) { + void **slot = Vector_next_used( pv, &index ); + if ( slot == 0 ) { + break; + } + itemdump( index, *slot ); + } +} + +//// Quicksort + +// Returns 1 for "in order", 0 for equal, and -1 for "wrong order" +typedef int (*ComparFn)(const void *,const void *); + +static void Vector_qsort_part( + Vector *pv,ComparFn compar, + VectorIndex low,VectorIndex high) +{ + if ( low >= high ) { + return; + } + VectorIndex lo = low; + VectorIndex m = high - 1; + + if ( lo >= m ) { + return; + } + + VectorIndex hi = m - 1; + void **mp = Vector_entry( pv, m ); + void **lop, **hip; + for ( ;; ) { + // Find index of first item "above" mp scanning from lo and up + for ( ; lo < m; lo++ ) { + lop = Vector_entry( pv, lo ); + if ( compar( *lop, *mp ) < 0 ) { + break; + } + } + // if lo == m, then lop is wrong!! + // Find index of first item "below" mp scanning from hi and down + for ( ; hi > lo; hi-- ) { + hip = Vector_entry( pv, hi ); + if ( compar( *mp, *hip ) < 0 ) { + break; + } + } + if ( lo >= hi ) { + if ( lo < m ) { + void *x = *lop; + *lop = *mp; + *mp = x; + m = lo; + } + break; + } + void *x = *lop; + *lop = *hip; + *hip = x; + } + Vector_qsort_part( pv, compar, low, m ); + Vector_qsort_part( pv, compar, m+1, high ); +} + +void Vector_qsort(Vector *pv,ComparFn compar) { + Vector_qsort_part( pv, compar, 0, pv->size ); +} + +void Vector_iterate(Vector *pv, + VectorIndex start, + int (*itemfn)(VectorIndex,void*,void*), + void *data ) +{ + VectorIndex index = start; + while ( index < pv->size ) { + int end = VECTOR_SLOTS( pv ); + int i = index & ( end - 1 ); + for ( ; i < end && index < pv->size; i++, index++ ) { + void **slot = Vector_access( pv, index, 0, 0 ); + if ( itemfn( index, slot? *slot: 0, data ) ) { + return; + } + } + } +} + +struct find_data { + void *value; + VectorIndex index; +}; + +static int Vector_find_item(VectorIndex index, void *item,void *data) { + struct find_data *fd = (struct find_data*)data; + if ( fd->value != item ) { + return 0; + } + fd->index = index; + return 1; +} + +VectorIndex Vector_find(Vector *pv,void *value) { + struct find_data fd = { + .value = value, + .index = pv->size + }; + Vector_iterate( pv, 0, Vector_find_item, &fd ); + return fd.index; +} + +// Find surrounding indexes for a given item key in a sparse Vector +void *Vector_bsearch(Vector *pv,VectorIndex *index,const void *key, + int (*compare)(const void *key, const void *item)) { + VectorIndex lo = 0; + VectorIndex hi = pv->size; + if ( hi-- == 0 || Vector_prev_used( pv, &hi ) == 0 ) { + return 0; + } + hi++; + while ( lo < hi ) { + VectorIndex m = lo + ( hi - lo ) / 2; + void **slot = Vector_next_used( pv, &m ); + int c = compare( key, *slot ); + if ( c == 0 ) { + *index = m; + return *slot; + } + if ( c < 0 ) { + hi = m; + } else { + lo = m + 1; + } + } + return 0; +} + +// Iterator callback. +static int checkunused(VectorIndex index,void *item,void *data) { + VectorIndex *last = (VectorIndex*) data; + if ( item == 0 ) { + (*last) = index; + return 1; + } + if ( *last > index ) { + // Only on the first iteration, with *last = Vector_sie + if ( index == 0 ) { + (*last) = 1; + return 0; + } + *last = 0; + } else if ( index == (*last) ) { + (*last)++; + return 0; + } + return 1; +} + +// Scan forward for the next unused Vector slot +VectorIndex Vector_next_unused(Vector *pv,VectorIndex index) { + VectorIndex unused = Vector_size( pv ); + Vector_iterate( pv, index, checkunused, &unused ); + return unused; +} diff --git a/vector/Vector.h b/vector/Vector.h new file mode 100644 index 0000000..7c9286c --- /dev/null +++ b/vector/Vector.h @@ -0,0 +1,396 @@ +#ifndef Vector_H +#define Vector_H + +/** \file + * + * A Vector is a dynamic pointer array implemented as an access tree + * of index pages. The indexing is done using "unsigned long" indexes, + * and the level 0 index corresponds to actual items. + * + * Actual Vectors are assigned a leveling variant which defines the + * index page size for the Vector. This must not be changed for a + * Vector with entries. + */ + +/** + * \brief This is the general indexing used for Vector access. + * + * \related Vector + */ +typedef unsigned long VectorIndex; + +/** + * \brief A VectorPage is an array of void* items. Its size depends + * on the applicable Vector variant. + * + * \related Vector + */ +typedef void* VectorPage[]; + +/** + * \brief The implementation variants. + * + * - byte_index_levels (0) has 8-bit indexing parts and index pages + * with 256 pointers, + * + * - Nibble_index_levels (1) has 4-bit indexing parts and index pages + * with 16 pointers, + * + * - BitPair_index_levels (2) has 2-bit indexing parts and index pages + * with 4 pointers, + * + * - single_index_level (3) has a single page that is sized as the + * Vector. + * + * The first three variants are managed by adding/removing full pages + * of the indexing tree upon resize and access. The single_index_level + * variant is managed by using realloc upon resize. In all cases + * shrinking a Vector might mean to reclaim items about to be "lost", + * if any, via a provided item reclaim callback function. The callback + * function may then also veto the shrinking. + * + * \related Vector + */ +enum VectorVariant { + byte_index_levels = 0, + Nibble_index_levels = 1, + BitPair_index_levels = 2, + single_index_level = 3 +}; + +/** + * A Vector struct is the "foot part" of a representation that + * constitutes the implementation variant for a Vector abstraction. It + * holds the variant indicator, the intended slot size and a root + * pointer for the indexing structure, which consist of indexing pages + * according to the variant. + */ +typedef struct { + /** + * The indexing variant. + */ + enum VectorVariant variant; + + /** + * The size of the Vector. + */ + VectorIndex size; + + /** + * The root page of the indexing tree. + */ + VectorPage *entries; +} Vector; + +/** + * \brief Return the number of slots spanned by an index level for the + * given Vector variant. + * + * - 0 indicates 8-bit index parts, and 256 page slots + * - 1 indicates 4-bit index parts, and 16 page slots + * - 2 indicates 2-bit index parts, and 4 page slots + * - 3 indicates 64-bit index parts, and 1 page level following the size + * + * The type 3 Vector is managed by using realloc. + * \related Vector + */ +extern unsigned long VECTOR_SLOTS(Vector *pv); + +/** + * \brief Find the nearest used (non-null) slot at given or higher + * index. + * + * \param pv is the Vector concerned. + * + * \param index is the index to change. + * + * \returns a pointer to the first non-null Vector slot from the given + * index, and *index set accordingly. If no non-null slot is found, + * the 0 is returned and *index is set to the Vector size. + * + * \related Vector + */ +extern void **Vector_next_used(Vector *pv,VectorIndex *index); + +/** + * \brief Find the nearest used (non-null) slot at given or lower + * index. + * + * \param pv is the Vector concerned. + * + * \param index is the index to change. + * + * \returns a pointer to the first non-null Vector slot from the given + * index, and *index set accordingly. If no non-null slot is found, + * the 0 is returned and *index is set to the Vector size. + * + * \related Vector + */ +extern void **Vector_prev_used(Vector *pv,VectorIndex *index); + +/** + * \brief Resize a Vector. + * + * \param pv is the Vector concerned. + * + * \param new_size is the new size it should have, + * + * \param reclaim is used upon shrinking in size for handling any + * current items above the new size, or vetoing the attempted resize. + * + * \param data is passed on the the reclaim function to use as context + * as needed. + * + * \returns the index of a resizing veto any, or <0 otherwise, with -1 + * indicating success and -2 indicating OOM. + * + * This function attempts to resize the given Vector to a new size. + * This may result in the introduction or removal of indexing pages, + * so that the index tree leveling is consistent with the Vector size. + * Thus, if it grows into a new level, then one or more new upper + * level pages are inserted as needed. If it shrinks below the current + * level, then top-level pages are removed. + * + * Also, if the new size is smaller than currently, then the now + * excess tail of entries is scanned for any used slots and the given + * reclaim function is invoked successively for these. The reclaim + * function must, in addition to memory-managing the entry, return 0 + * upon success, or non-zero to veto the attempted Vector size change. + * The data argument is passed on to the reclaim function as given. + * + * \related Vector + */ +extern int Vector_resize( + Vector *pv, VectorIndex new_size, + int (*reclaim)(Vector *pv,VectorIndex index,void *item,void *data), + void *data ); + +/** + * \brief Return pointer to the indexed page slot at the requested + * level, and adding intermediate index pages if so requested. + * + * \param pv is the Vector concerned. + * + * \param index is the slot index. + * + * \param level is the indexing level to access. Level 0 is the leaf + * level that holds the slots for the items; level 1 is one level up, + * for Vectors larger than 256 items; ans so on. + * + * \param add is a flag to indicate (with 1) that missing index pages + * should be added, or (with 0) that the function should simply return + * null if an index page to access at any level is missing. + * + * \returns a pointer to the slot for the indexed item (level 0), or + * (for higher levels) the slot for the index page on the access path + * to the indexed item. The function returns 0 if the access path is + * broken by a missing index page, or (with add==1) the allocation of + * a new index page fails. + * + * \note The index tree for the Vector is populated on demand only + * where access has been requested. + * + * \related Vector + */ +extern void **Vector_access(Vector *pv,VectorIndex index,int level,int add); + +/** + * \brief Return the slot value at the given index. + * + * \param pv is the Vector concerned. + * + * \param index is the slot index. + * + * \returns a direct pointer to the slot of the given index in the + * array, or 0 if the index is beyond the array limits (0-limit). + * + * \note Note that slot pointers are only valid while the Vector size + * is unchanged. + * + * \related Vector + */ +extern void **Vector_entry(Vector *pv,VectorIndex index); + +/** + * \param pv - the Vector concerned + * \returns the size of the Vector. + * \related Vector + */ +#define Vector_size(pv) ((VectorIndex) (pv)->size) + +/** + * \brief Set the Vector value at the given index. + * + * \param pv is the Vector concerned + * \param index is the index for the slot to assign + * \param value is the new slot value + * + * \note An assignment of 0 will be treated as an unused slot. + * + * \related Vector + */ +extern void Vector_set(Vector *pv,VectorIndex index,void *value); + +/** + * \brief Set the Vector value at the given index and return the prior + * value. + * + * \param pv is the Vector concerned + * \param index is the index for the slot to assign + * \param value is the new slot value + * + * \note An assignment of 0 will be treated as an unused slot. + * + * \related Vector + */ +extern void *Vector_get_set(Vector *pv,VectorIndex index,void *value); + +/** + * \brief Get the Vector value at the given index. + * + * \param pv is the Vector concerned + * \param index is the index for the slot to assign + * + * \note This function will allocate all traversed indeex tree pages + * even for accessing an unassigned slot. + * + * \related Vector + */ +extern void *Vector_get(Vector *pv,VectorIndex index); + +/** + * \brief Grow the Vector by one and assign the added slot. + * + * \param pv is the Vector concerned + * \param value is the new slot value + * + * \related Vector + */ +extern void Vector_append(Vector *pv,void *value); + +/** + * \brief Copy a consecutive region from one Vector into another, or + * possibly the same Vector. + * + * \param pv is the Vector concerned + * \param value is the new slot value + * + * \note This transfers all slots from the source region to the + * destination region, including zero slots. The Vectors must be large + * enough for the transfer, which is carried out from lowest to + * highest or highest to lowest index depending on wther the move is + * to higher index or to lower index respectively. + * + * \related Vector + */ +extern void Vector_copy( + Vector *dst,VectorIndex di, + Vector *src,VectorIndex si, + VectorIndex n); + +/** + * \brief Allocate a copy of a Vector into one in the given variant. + */ +extern Vector *Vector_clone(enum VectorVariant variant,Vector *src); + +/** + * \brief Utility function that invokes the itemdump function for all + * used (non-null) slots. + * + * \related Vector + * \seealso Vector_iterate + */ +extern void Vector_dump( + Vector *pv, + void (*itemdump)(const VectorIndex ,const void *)); + +/** + * \brief Sort a Vector with quicksort using the provided ordering + * collback function. + * + * \related Vector + */ +extern void Vector_qsort(Vector *pv,int (*compar)(const void *,const void *)); + +/** + * Steps through the Vector item by item invoking the given function + * for each. Continues stepping while the item function returns 0. + * + * \related Vector + */ +extern void Vector_iterate( + Vector *pv, VectorIndex start, + int (*itemfn)(VectorIndex,void *item,void *data), + void *data); + +/** + * \brief Binary search in a sorted Vector for an item of the given + * key, with a callback function providing the sorting order. + * + * \param pv is the Vector concerned. + * + * \param index is a VectorIndex pointer for returning the index of + * the found item. + * + * \param key is the lookup key to find. + * + * \param compare is a callback function that should return the search + * direction given a key and an item. It should return 0 if the key is + * a match for the item, <0 if the sought item is expected at a higher + * index, and >0 if the sought item is expected at a lower index. + * + * \return a pointer to the found item and *index set to its index. If + * there is no matching item, then 0 is returned, and the index is set + * to the Vector size. + * + * \related Vector + */ +extern void *Vector_bsearch( + Vector *pv, VectorIndex *index, const void *key, + int (*compare)(const void *key, const void *item)); + +/** + * \brief Find the index for a given value + * + * \param pv is the Vector concerned + * \param is the value to find + * + * This function scans the Vector for the first, if any, occurrence of + * the value, or returns pv->size if not found. + * + * \related Vector + */ +extern VectorIndex Vector_find(Vector *pv,void *value); + +/** + * \brief Find the next used slot at or after the given index. + * + * \param pv the Vector concerned. + * \param index pointer to the index to advance. + * \return the new index, or the Vector size if no unused slot is + * found. + * + * Scans forward in the Vector for the first unused (null) Vector slot + * at or after the given index. Returns pv->size if full. + * + * \related Vector + */ +extern VectorIndex Vector_next_unused(Vector *pv,VectorIndex index); + +/** + * \brief Convenience callback function for Vector shrinking to free + * any existing slot assignment. + * + * \related Vector + */ +extern int Vector_free_any(Vector *pv,VectorIndex ix,void *item,void *data); + +/** + * \brief Convenience callback function for Vector shrinking to ignore + * any existing slot assignment (without free-ing them). + * + * \related Vector + */ +extern int Vector_clear_any(Vector *pv,VectorIndex ix,void *item,void *data); + +#endif diff --git a/vector/hashvector.c b/vector/hashvector.c deleted file mode 100644 index 8b88135..0000000 --- a/vector/hashvector.c +++ /dev/null @@ -1,209 +0,0 @@ -#include -#include - -#define SELF hv->type - -// Find the slot for the keyed element, and return pointer to it, or -// to the first of holes encountered while considering collisions. -// Returns a pointer to the place for the item, or 0 in case of OOM or -// overfull hashvector (i.e. 0 shouldn't happen). -// If itemkey is set, then the itemkey callback function is used for -// obtaining a temporary key from the item. -static void **hashvector_find_slot( - hashvector *hv, void *key, unsigned long *i, int itemkey ) -{ - if ( itemkey ) { - // Get actual key from keying item - key = hv->type->itemkey( SELF, key ); - } - unsigned long index = hv->type->hashcode( SELF, key ) % hv->table.size; - *i = index; - void **hole = 0; - void **p = 0; - for ( ;; ) { - p = vector_entry( &hv->table, (*i) ); - if ( p == 0 ) { - if ( itemkey ) { - hv->type->releasekey( SELF, key ); - } - return 0; // This basically means OOM, and is a failure condition. - } - if ( (*p) == 0 ) { - if ( itemkey ) { - hv->type->releasekey( SELF, key ); - } - return ( hole )? hole : p; // Not found; it's place is here. - } - if ( (*p) == HV_HOLE ) { - if ( hole == 0 ) { - hole = p; // Remember the first hole - } - } else if ( hv->type->haskey( SELF, (*p), key ) ) { - if ( itemkey ) { - hv->type->releasekey( SELF, key ); - } - return p; // Found - } - if ( ++(*i) == hv->table.size ) { - (*i) = 0; // Roll-around the table - } - if ( (*i) == index ) { - if ( itemkey ) { - hv->type->releasekey( SELF, key ); - } - return 0; // Overfull hashvector! - } - } -} - -// Find the keyed element at or after the index. Update index and -// return item. -void *hashvector_next(hashvector *hv,vector_index *index,void *key) { - unsigned long i = index? *index : 0; - for ( ; i < hv->table.size; i++ ) { - void **p = vector_next_used( &hv->table, &i ); - if ( p == 0 ) { - break; - } - if ( *p && *p != HV_HOLE ) { - if ( key && hv->type->haskey( hv->type, *p, key ) == 0 ) { - continue; - } - if ( index ) { - (*index) = i; - } - return *p; - } - } - if ( index ) { - (*index) = hv->table.size; - } - return 0; -} - -static int capture_item(vector *pv,unsigned long ix,void *item,void *data) { - if ( item && item != HV_HOLE ) { - hashvector_add( (hashvector *) data, item ); - } - return 0; -} - -static int iter_item(unsigned long ix,void *item,void *data) { - if ( item && item != HV_HOLE ) { - hashvector_add( (hashvector *) data, item ); - } - return 0; -} - -static void hashvector_resize(hashvector *hv,unsigned long new_size) { - hashvector tmp = *hv; - hv->table.size = new_size; - hv->table.entries = 0; - hv->fill = 0; - hv->holes = 0; - if ( new_size < hv->table.size ) { - vector_resize( &tmp.table, 0, capture_item, hv ); - } else { - vector_iterate( &tmp.table, 0, iter_item, hv ); - } -} - -// Add the given element. -int hashvector_add(hashvector *hv,void *item) { - unsigned long i; - void **p = hashvector_find_slot( hv, item, &i, 1 ); - if ( p == 0 ) { - return -1; // OOM or overfull hashvector - } - if ( *p ) { - if ( *p != HV_HOLE ) { - return 0; // Already added. - } - hv->holes--; // about to reuse a hole - } - *p = item; - hv->fill++; - if ( hv->fill + hv->holes > hv->table.size / 2 ) { - hashvector_resize( hv, hv->table.size * 2 ); - } - return 1; -} - -// Delete the given item -int hashvector_delete(hashvector *hv,void *item) { - unsigned long i; - void **p = hashvector_find_slot( hv, item, &i, 1 ); - if ( p == 0 ) { - return -1; - } - if ( *p != item ) { - return 0; - } - // Check if subsequent slot is occupied - if ( ++i >= hv->table.size ) { - i = 0; - } - void **q = vector_access( &hv->table, i, 0, 0 ); - if ( q && (*q) ) { - *p = HV_HOLE; - hv->holes++; - } else { - *p = 0; - } - hv->fill--; - if ( hv->table.size > VECTOR_SLOTS( &hv->table ) && - hv->fill < hv->table.size / 4 ) { - hashvector_resize( hv, hv->table.size / 2 ); - } - return 1; -} - -vector *hashvector_contents( - hashvector *hv,enum vector_variant variant,vector *v) -{ - if ( v == 0 ) { - if ( hv->fill == 0 ) { - return 0; - } - v = (vector*) malloc( sizeof( vector ) ); - } else { - vector_resize( v, 0, vector_clear_any, 0 ); - } - (*v) = (vector) { .variant = variant, 0, 0 }; - if ( hv->fill == 0 ) { - return v; - } - vector_resize( v, hv->fill, 0, 0 ); - vector_index i; - vector_index j = 0; - for ( i = 0; i < v->size; i++, j++ ) { - vector_set( v, i, hashvector_next( hv, &j, 0 ) ); - } - return v; -} - -// A simple binary hashcode, (before modulo table size) -unsigned long hashvector_hashcode(unsigned char *key,unsigned long n) { - unsigned long value = 5381; - while ( n-- ) { - value += ( value << 5 ) + *(key++); - } - return value; -} - - -hashvector *hashvector_create(enum vector_variant variant,itemkeyfun *type) { - hashvector *hv = (hashvector*) malloc( sizeof( hashvector ) ); - (*hv) = (hashvector) { - .table = (vector) { - .variant = variant, - .size = 16, - .entries = 0 - }, - .fill = 0, - .holes = 0, - .type = type - }; - return hv; -} - diff --git a/vector/hashvector.h b/vector/hashvector.h deleted file mode 100644 index b57aede..0000000 --- a/vector/hashvector.h +++ /dev/null @@ -1,188 +0,0 @@ -#ifndef hashvector_H -#define hashvector_H - -#include -#include - -/** - * \file hashvector.h - * - * A hashvector is a use of \ref vector as a hashtable. The hashvector - * includes three functions to, respectively, obtain the hashcode of a - * given key, to obtain the key of a given item, and to tell whether - * or not a given item has a given key. The hashvector manipulations - * uses those for locating and placing items into the vector; the - * hascode is the primary place for an item, but then scanning upwards - * with a rolling index in case of collisions. - * - * The vector holds (void*)0 pointers for free slots, (void*)1 - * pointers (aka \ref HV_HOLE) to indicate slots of deleted items, and - * otherwise item pointers. Thus, deleting an item results in av - * HV_HOLE et the item's slot. - * - * The hashvector grows to double in size, with rehashing, when an - * item is added to make the sum of fill and holes exceed 50% of the - * size, and the hashvector shrinks to half size when an item is - * deleted to make fill reduce below 25% of the size. - */ - -/** - * \struct hashvector - * - * hashvector combines a \ref vector (for contents) with fill and hole - * counters, and \ref itemkeyfun callback functions for abstracting - * items as being pairs of key and payload. - * - * \extends vector - */ -typedef struct { - /** - * This is the backing \ref vector for the hashvector. Items are - * placed in the vector by means of their key hashcodes, at the - * first unused slot cyclically after the hashcode index. - */ - vector table; // the table space for items - /** - * This is the fill count, i.e., how many key-distinct items there - * are in the backing vector. - */ - unsigned long fill; // current number of contained items - /** - * This is a count of \ref HV_HOLE markers in the backing vector, - * which hold the position of deleted items so as to not break the - * lookup sequence for items placed cyclically after their - * hashcode index due to hash collisions. - */ - unsigned long holes; // current number of slots marking deleted items - /** - * This is a pointer to the \ref itemkeyfun record that provides - * the key-and-payload abstraction for items. - */ - itemkeyfun *type; // the key type for the items -} hashvector; - -/** - * HV_HOLE is the representation of a deleted item. This supports the - * hashtable algoritm where hashcode collisions are resolved by - * placing later items compactly cyclically after their hashcode - * index. When an item is removed from the table, its slot is set as a - * HV_HOLE slot instead of being cleared. - * - * \related hashvector - */ -#define HV_HOLE ((void*) 1) - -/** - * \brief Find next keyed item at or after the given index. - * - * \param hv is the \ref hashvector concerned. - * - * \param index is a pointer to the index to advance. - * \ - * \param key is the query key - * - * \returns the next matching item, or \b 0 if none, with the index - * updated. - * - * \related hashvector - */ -extern void *hashvector_next(hashvector *hv,vector_index *i,void *key); - -/** - * \brief Add the given item into the \ref hashvector, growing it as - * needed to ensure that its fill+holes is no more than half the size. - * - * \param hv is the \ref hashvector concerned. - * - * \param item is the item to add. - * - * \returns \b 1 when an item is added, \b 0 when the item key is - * already present, and \b -1 upon OOM or ovefull \ref hashvector. - * - * Note that this function first locates any item of the same key, and - * then doesn't add if there is already an item that has the same key. - * - * \related hashvector - */ -extern int hashvector_add(hashvector *hv,void *item); - -/** - * \brief Delete the given item, and shrink the hashvector so that its - * size is at most double its fill, though at minimum a single index - * page corresponding to the \ref vector_variant in use. - * - * \param hv is the \ref hashvector concerned. - * - * \param item is the item to delete. - * - * \returns \b 1 when the item gets deleted (by replacing its slot - * with a \ref HV_HOLE value), \b 0 if the hashvector has either no - * item or a different item for the item key, and \b -1 in case of OOM - * or overfull hashvector. - * - * Note that the contained item that has the same key as the provided - * item is deleted. - * - * \see itemkeyfun - * - * \related hashvector - */ -extern int hashvector_delete(hashvector *hv,void *item); - -/** - * \brief Copy content items into a given empty \ref vector. - * - * \param hv is the \ref hashvector concerned. - * - * \param variant is the desired vector variant. - * - * \param pv is the optional \ref vector to copy the content into. - * - * \returns the \ref vector with the \ref hashvector content - * - * If \pv is null, then a new vector is allocated and handed out for - * the caller to reclaim unless the hashvector is empty in which case - * this function returns 0. - * - * If \b pv is not null, it is first cleared, and then populated with - * the content from the hashvector. The populated vector is returned. - * - * \related hashvector - */ -extern vector *hashvector_contents( - hashvector *hv,enum vector_variant variant,vector *pv); - -/** - * \brief This is a utility function to compute and return a hashcode - * for a block of bytes. - * - * \param key is the start of the block. - * - * \param n is the byte size of the block. - * - * \related hashvector - */ -extern unsigned long hashvector_hashcode(unsigned char *key,unsigned long n); - -/** - * \brief Create a \ref hashvector of given \ref vector_variant - * for given \ref itemkeyfun. - * - * \param variant is the \ref vector_variant to use. - * - * \param type is the \ref itemkeyfun to use. - * - * \returns the initialised \ref hashvector. - * - * The hashvector will be initialized with a single \ref vector_page. - * - * \note The \ref hashvector record is allocated with malloc and the - * caller is responsible for free-ing the memory block when it's no - * longer needed. - * - * \related hashvector - */ -extern hashvector *hashvector_create( - enum vector_variant variant,itemkeyfun *type); - -#endif diff --git a/vector/integeritem.c b/vector/integeritem.c index ae4db92..221fb70 100644 --- a/vector/integeritem.c +++ b/vector/integeritem.c @@ -2,11 +2,11 @@ /** * This callback function returns the hashcode of a key. The hashcode - * is used for indexing into the backing vector for finding the an + * is used for indexing into the backing Vector for finding the an * item via its key. The same key must map consistently to the same * hashcode while the hashtable contains an item with that key. * Different keys map map to the same hashcode, in which case the - * vector placement is made at the first empty or hole slot following + * Vector placement is made at the first empty or hole slot following * the hashcode index. */ static unsigned long integeritem_hashcode(void *this,void *key) { @@ -44,7 +44,7 @@ static int integeritem_tostring(void *this,void *item,char *buffer,int limit) { return snprintf( buffer, limit, "%lld", (long long) item ); } -itemkeyfun integeritem = { +ItemKeyFun integeritem = { .hashcode = integeritem_hashcode, .haskey = integeritem_haskey, .itemkey = integeritem_itemkey, diff --git a/vector/integeritem.h b/vector/integeritem.h index 7518b26..efa5323 100644 --- a/vector/integeritem.h +++ b/vector/integeritem.h @@ -1,12 +1,12 @@ #ifndef integeritem_H #define integeritem_H -#include +#include /** - * The stringitem record declares the itemkeyfun functions for integer + * The stringitem record declares the ItemKeyFun functions for integer * items. */ -extern itemkeyfun integeritem; +extern ItemKeyFun integeritem; #endif diff --git a/vector/itemkeyfun.h b/vector/itemkeyfun.h deleted file mode 100644 index 7bbe625..0000000 --- a/vector/itemkeyfun.h +++ /dev/null @@ -1,62 +0,0 @@ -#ifndef itemkeyfun_H -#define itemkeyfun_H - -#include - -/** - * \struct itemkeyfun - * - * itemkeyfun provides a meta-level representation for abstracting - * items as pairs of keys and payload, and having a hashcode mapping - * for keys. The key part is used for identifying items under the idea - * that all items that have the same key are the same; distinct - * representations of the same abstract entity. The hashcode scrambles - * the key part in a consistent way into an ideal table index for the - * items that have that key. Different keys may yield the same - * hashcode. - */ -typedef struct { - -#define SELF void *this - /** - * This callback function should return the hashcode of a key. The - * hashcode is used for indexing into the backing vector for - * finding the an item via its key. The same key must map - * consistently to the same hashcode while the hashtable contains - * an item with that key. Different keys map map to the same - * hashcode, in which case the vector placement is made at the - * first empty or hole slot following the hashcode index. - */ - unsigned long (*hashcode)(SELF,void *key); - - /** - * This callback function should determine whether an item has a - * given key or not. - */ - int (*haskey)(SELF,void *item,void *key); - - /** - * This callback function should return a (possibly temporary) key - * of an item. It can be anything (i.e., with or without internal - * structure) as it is treated as an identifier that other - * callbacks recognize. It is merely understood as a value in an - * identity domain. - */ - void *(*itemkey)(SELF,void *item); - - /** - * This callback function should handle a key obtained from the - * itemkey callback function, e.g., reclaim temporary allocation. - */ - void (*releasekey)(SELF,void *key); - - /** - * This callback function writes a representation of an item into - * a character buffer. - */ - int (*tostring)(SELF,void *item,char *buffer,int limit); - -#undef SELF -} itemkeyfun; - -#endif diff --git a/vector/relation.c b/vector/relation.c deleted file mode 100644 index 945c991..0000000 --- a/vector/relation.c +++ /dev/null @@ -1,143 +0,0 @@ -#include -#include -#include - -relation *relation_create(tupleschema *schema) { - relation *r = (relation *) malloc( sizeof( relation ) ); - (*r) = (relation) { - .content = (hashvector) { - .table = (vector) { - .variant = nibble_index_levels, - .size = 16, - .entries = 0 - }, - .fill = 0, .holes = 0, .type = (itemkeyfun*) schema - }, - .constraints = (vector) { - .variant = single_index_level, - .size = 0, - .entries = 0 - }, - }; - return r; -} - -#define COPYA(T,P,N) (T*) memcpy( malloc( N * sizeof(T) ), P, N * sizeof( T ) ) -#define COPY(T,P) COPYA(T,P,1) - -// Add an indexing hashvector to the relation using the given column -// flags with 1 indicating key column and 0 indicating value column. -int relation_add_constraint(relation *r,...) { - va_list ap; - tupleschema *ts = (tupleschema *) r->content.type; - tuple *columns = (tuple*) calloc( ts->arity, sizeof( void* ) ); - int i = 0; - va_start( ap, r ); - for ( ; i < ts->arity; i++ ) { - if ( va_arg( ap, int ) ) { - (*columns)[i] = ts->columns[i]; - } - } - va_end( ap ); - ts = tupleschema_create( ts->arity, columns ); - i = (int) r->constraints.size; - vector_append( - &r->constraints, - hashvector_create( nibble_index_levels, (itemkeyfun*) ts ) ); - return i; -} - -//============== Adding an item ============= -// Iteration context for adding or querying a relation -typedef struct { - relation *rel; - hashvector knockouts; - void *item; -} knockout; - -// Determine matches to ((knockout*)data)->key in -// (hashvector*)item, optionally using ((knockout*)data)->columns -// for ignoring full matches to the key tuple. -static int knockout_check(vector_index index,void *item,void *data) { - knockout *kod = (knockout*) data; - vector_index i = 0; - for ( ; i < ((hashvector*) item)->table.size; i++ ) { - void *old = hashvector_next( (hashvector*) item, &i, kod->item ); - if ( old ) { - hashvector_add( &kod->knockouts, old ); - } - } - return 0; -} - -// delete the (tuple*)item from the (hashvector*)data -static int knockout_delete(vector_index index,void *item,void *data) { - hashvector_delete( (hashvector*) item, data ); - return 0; -} - -// add the (tuple*)data to the (hashvector*)item -static int knockout_add(vector_index index,void *item,void *data) { - hashvector_add( (hashvector*)item, data ); - return 0; -} - -// Find and remove all collisions for a query, unless "add" is -// non-zero in which case the function aborts if there is any match in -// the main content. -static int knockout_clear(knockout *this,relation *r,tuple *item,int add) { - (*this) = (knockout) { - .rel = r, - .knockouts = { - .table = { - .variant = nibble_index_levels, .size = 16, .entries = 0 - }, - .fill = 0, .holes = 0, .type = r->content.type, - }, - .item = item - }; - knockout_check( 0, &r->content, this ); - if ( add ) { - if ( this->knockouts.fill > 0 ) { - return 0; - } - // Find all constraint knockouts for addition - vector_iterate( &r->constraints, 0, knockout_check, this ); - } - if ( this->knockouts.fill > 0 ) { - // Delete them from all tables - vector_index i; - for ( i = 0; i < this->knockouts.table.size; i++ ) { - void *t = hashvector_next( &this->knockouts, &i, 0 ); - if ( t ) { - hashvector_delete( &r->content, t ); - vector_iterate( &r->constraints, 0, knockout_delete, t ); - } - } - } - return 1; -} - -// add a tuple to a relation and return a vector of knocked out -// tuples, if any, or 0 otherwise. -vector *relation_add(relation *r,tuple *item) { - knockout data; - if ( knockout_clear( &data, r, item, 1 ) ) { - // Add the new tuple - hashvector_add( &r->content, item ); - vector_iterate( &r->constraints, 0, knockout_add, item ); - return hashvector_contents( &data.knockouts, single_index_level, 0 ); - } - return 0; -} - -vector *relation_delete(relation *r,tuple *item) { - knockout data; - (void) knockout_clear( &data, r, item, 0 ); - return hashvector_contents( &data.knockouts, single_index_level, 0 ); -} - -void *relation_next(relation *r,vector_index *index,tuple *query) { - return hashvector_next( &r->content, index, query ); -} - diff --git a/vector/relation.h b/vector/relation.h deleted file mode 100644 index c2e5293..0000000 --- a/vector/relation.h +++ /dev/null @@ -1,164 +0,0 @@ -#ifndef relation_H -#define relation_H - -#include -#include - -/** - * A relation is an implementation of a tuple set with (optional) key - * constraints. The store is a \ref hashvector whose \b type is a \ref - * tupleschema that defines the columns. The key constraints are - * represented as additional \ref hashvector "hashvectors" whose \ref - * tupleschema "tupleschemas" are clones of the column schema with - * some columns excluded. - * - * \extends hashvector - */ -typedef struct { - /** - * This is the primary content store for the relation. Its type - * should be a tupleschema declaring the "item types" for the - * relation columns. - */ - hashvector content; - - /** - * This is a collection of relational constraints, if any, which - * are represented as hashvectors whose tupleschemas are clones of - * the content tupleschema with some columns excluded. - */ - vector constraints; -} relation; - -/** - * \brief Create a relation for the given tupleschema. - * - * \param schema is the column schema - * - * \returns the allocated relation record. - * - * The given tupleschema is set up as the type of the content - * hashvector, which also is initialised as a nibble_index_levels - * variant vector. - * - * \related relation - */ -extern relation *relation_create(tupleschema *schema); - -/** - * \brief Add a key constraint to a \ref relation. - * - * \param r is the relation concerned. - * - * \param ... are the column flags indicating key (1) or value (0) - * column for all columns. - * - * \returns the index into the constraints \ref vector for the added - * constraint. - * - * This function adds a \ref hashvector with a \ref tupleschema as its - * item type cloned from the content type and then modified to - * represent the constraint. Namely that the key columns have their - * "column type" set while value columsn are reset. - * - * The \b constraint \ref hashvectors are used when \ref tuple - * "tuples" are added to the \ref relation so as to identify the - * already contained \ref tuple "tuples" that contradict the addition - * by means of having the same constraint key. The already contained - * \ref tuple "tuples" are then "knocked out" from the relation by the - * new addition. - * - * \see relation_add - * \related relation - */ -extern int relation_add_constraint(relation *r,...); - -/** - * \brief Add the tuple to the relation. - * - * \param r is the \ref relation concerned. - * - * \param t is the \ref tuple to add. - * - * \returns a vector of all knocked out tuples. - * - * This function adds the \ref tuple \b t to the \ref relation \b r, - * and it returns a \ref vector (single_index_level variant) of all - * same-key constraint tuples. The returned vector is malloc-ed and it - * must be free-ed by the caller. If the tuple is already contained or - * there are no other same-key tuples knocked out, then \b 0 is - * returned. - * - * \related relation - */ -extern vector *relation_add(relation *r,tuple *t); - -/** - * \brief Delete all tuples matching to the query \ref tuple fromt the - * \ref relation. - * - * \param r is the \ref relation concerned. - * - * \param t is the \ref tuple to delete. - * - * \returns a \vector vector of all knocked out tuples, i.e. the - * same-key tuples, if any, contained in the relation - * - * Note that deletion uses a "query" tuple, which means that some - * columns may be null to mark that them match to any value. - * - * \related relation - */ -extern vector *relation_delete(relation *r,tuple *query); - -/** - * \brief Return the next \ref tuple in the \ref relation that matches - * to the query \ref tuple, at or after the index. - * - * \param r is the \ref relation concerned. - * - * \param index is a pointer to the \ref vector index to update. - * - * \param query is a query \tuple tuple for selection of certain - * column values. - * - * \returns any such matching \tuple tuple and an updateed *index. - * - * \related relation - */ -extern void *relation_next(relation *r,vector_index *index,tuple *query); - -/** - * \brief Lay out a dynamic \ref relation initializer for a relation - * wth the given column "types". - * - * This defines a \ref relation intializer that creates the \ref - * tupleschema for the given columns. - * - * \note The initializer cannot be used statically. - * - * The \b content \ref hashvector is a \ref nibble_index_level variant - * with an initial size of 16 slots. - * - * The constraints \ref vector is a \ref bitpair_index_level variant - * with initial size 0. - * - * The \b content \ref hashvector \b type is set up with an allocated - * \ref tupleschema that has an allocated \ref tuple that declares the - * column "types" view the given \ref itemkeyfun pointers. Any add - * constraints will need to clone that \ref tupleschema and then clear - * the column slots for the constraint value columns, typically by - * using \ref tupleschema_mask for this. - * - * \related relation - */ -#define RELATION(...) (relation) { \ - .content = { \ - .table = { .variant = nibble_index_levels, .size=16, .entries=0 }, \ - .fill = 0, .holes = 0, \ - .type = (itemkeyfun*) TUPLESCHEMA( __VA_ARGS__ ) \ - }, \ - .constraints = { .variant = bitpair_index_levels, .size=0, .entries=0 } \ -} - -#endif diff --git a/vector/stringitem.c b/vector/stringitem.c index d53576d..ee8f2cb 100644 --- a/vector/stringitem.c +++ b/vector/stringitem.c @@ -1,18 +1,18 @@ #include #include -#include +#include /** * This callback function returns the hashcode of a key. The hashcode - * is used for indexing into the backing vector for finding the an + * is used for indexing into the backing Vector for finding the an * item via its key. The same key must map consistently to the same * hashcode while the hashtable contains an item with that key. * Different keys map map to the same hashcode, in which case the - * vector placement is made at the first empty or hole slot following + * Vector placement is made at the first empty or hole slot following * the hashcode index. */ static unsigned long stringitem_hashcode(void *this,void *key) { - return hashvector_hashcode( (unsigned char*)key, strlen( (char*)key ) ); + return HashVector_hashcode( (unsigned char*)key, strlen( (char*)key ) ); } /** @@ -49,7 +49,7 @@ static int stringitem_tostring(void *this,void *item,char *buffer,int limit) { return snprintf( buffer, limit, "(null)" ); } -itemkeyfun stringitem = { +ItemKeyFun stringitem = { .hashcode = stringitem_hashcode, .haskey = stringitem_haskey, .itemkey = stringitem_itemkey, diff --git a/vector/stringitem.h b/vector/stringitem.h index cf1d0f7..5cc1a32 100644 --- a/vector/stringitem.h +++ b/vector/stringitem.h @@ -1,12 +1,12 @@ #ifndef stringitem_H #define stringitem_H -#include +#include /** - * The stringitem record declares the itemkeyfun functions for string + * The stringitem record declares the ItemKeyFun functions for string * items. */ -extern itemkeyfun stringitem; +extern ItemKeyFun stringitem; #endif diff --git a/vector/tupleitem.c b/vector/tupleitem.c deleted file mode 100644 index 2a4a01a..0000000 --- a/vector/tupleitem.c +++ /dev/null @@ -1,179 +0,0 @@ -#include -#include -#include -#include - -#define COLUMN def->columns - -/** - * This callback function returns the hashcode of a key. - * - * \param this is a pointer to the itemkeyfun record from where this - * callback got invoked - * - * \param key is the key to produce a hascode for - * - * \returns the hashcode which is a vector_index (i.e. unsigned long) - * - * The hashcode is used for indexing into the backing vector for - * finding the an item via its key. The same key must map consistently - * to the same hashcode while the hashtable contains an item with that - * key. Different keys map map to the same hashcode, in which case the - * vector placement is made at the first empty or hole slot following - * the hashcode index. - */ -static unsigned long tupleitem_hashcode(void *this,void *key) { - tupleschema *def = (tupleschema *) this; - tuple *kp = (tuple*) key; - int i = 0; - unsigned long value = 5381; - for ( ; i < def->arity; i++ ) { - if ( COLUMN[i] ) { - value <<= 3; - if ( (*kp)[i] ) { - value += COLUMN[i]->hashcode( COLUMN[i], (*kp)[i] ); - } - } - value += 17; - } - return value; -} - -/** - * This callback function determines whether an item has a - * given key or not. - */ -static int tupleitem_haskey(void *this,void *item,void *key) { - tupleschema *def = (tupleschema *) this; - tuple *kp = (tuple*) key; - tuple *tp = (tuple*) item; - int i = 0; - for ( ; i < def->arity; i++ ) { - if ( COLUMN[i] && (*kp)[i] ) { - if ( COLUMN[i]->haskey( COLUMN[i], (*tp)[i], (*kp)[i] ) == 0 ) { - return 0; - } - } - } - return 1; -} - - -/** - * This callback function returns the key of an item by considering - * the arity and mask. - */ -static void *tupleitem_itemkey(void *this,void *item) { - tupleschema *def = (tupleschema *) this; - tuple *tp = (tuple*) item; - int i; - int keylen = def->arity; - void **parts = calloc( keylen, sizeof( void* ) ); - for ( i = 0; i < def->arity; i++ ) { - if ( COLUMN[i] ) { - parts[i] = COLUMN[i]->itemkey( COLUMN[i], (*tp)[i] ); - } - } - return (void*) parts; -} - -/** - * This callback function handles a key obtained from the itemkey - * callback function to reclaim temporary allocation. - */ -static void tupleitem_releasekey(void *this,void *key) { - tupleschema *def = (tupleschema *) this; - tuple *kp = (tuple*) key; - int i; - for ( i = 0; i < def->arity; i++ ) { - if ( COLUMN[i] ) { - COLUMN[i]->releasekey( COLUMN[i], (*kp)[i] ); - } - } - free( key ); -} - -#define OUT(X) a = X; if ( a > limit ) return 0; buffer += a; limit -= a - -/** - * This callback function writes a representation of an item into - * a character buffer. - */ -static int tupleitem_tostring(void *this,void *item,char *buffer,int limit) { - tupleschema *def = (tupleschema *) this; - tuple *t = (tuple*) item; - char *x = "<"; - int a, i; - for ( i = 0; i < def->arity; i++ ) { - OUT( snprintf( buffer, limit, x ) ); - x = ","; - OUT( def->columns[i]->tostring( - def->columns[i], (*t)[i], buffer, limit ) ); - } - OUT( snprintf( buffer, limit, ">" ) ); - return a; -} - - -// Allocate -tuple *tuple_create(int arity,...) { - va_list ap; - int i; - tuple *t = (tuple *)malloc( arity * sizeof( void* ) ); - va_start( ap, arity ); - for ( i = 0; i < arity; i++ ) { - (*t)[i] = va_arg( ap, void* ); - } - va_end( ap ); - return t; -} - -itemkeyfun tupleschema_callbacks = { - .hashcode = tupleitem_hashcode, - .haskey = tupleitem_haskey, - .itemkey = tupleitem_itemkey, - .releasekey = tupleitem_releasekey, - .tostring = tupleitem_tostring -}; - -tupleschema *tupleschema_create(int arity,tuple *columns) { - tupleschema *ts = (tupleschema*) malloc( sizeof( tupleschema ) ); - (*ts) = (tupleschema) { - .base = tupleschema_callbacks, - .arity = arity, - .columns = (itemkeyfun**) columns - }; - return ts; -} - -#define COPYA(T,P,N) (T*) memcpy( malloc( N * sizeof(T) ), P, N * sizeof( T ) ) -#define COPY(T,P) COPYA(T,P,1) - -// Duplicate a tupleschema with optionally some columns reset. -tupleschema *tupleschema_mask(tupleschema *schema,...) { - tupleschema *masked = COPY(tupleschema,schema); - masked->columns = COPYA( itemkeyfun*, schema->columns, schema->arity ); - va_list ap; - int i; - va_start( ap, schema ); - for ( ;; ) { - i = va_arg( ap, int ); - if ( i < 0 || i >= schema->arity ) { - break; - } - masked->columns[i] = 0; - }; - va_end( ap ); - return masked; -} - -unsigned long tuple_mask(int arity,tuple *t) { - unsigned long mask = 0; - while ( arity-- > 0 ) { - mask <<= 1; - if ( (*t)[ arity ] ) { - mask++; - } - } - return mask; -} diff --git a/vector/tupleitem.h b/vector/tupleitem.h deleted file mode 100644 index 3917c9b..0000000 --- a/vector/tupleitem.h +++ /dev/null @@ -1,100 +0,0 @@ -#ifndef tupleitem_H -#define tupleitem_H - -#include - -/** - * A tuple is an array of void* - */ -typedef void *tuple[]; - -/** - * A tupleschema record declares the itemkeyfun functions for tuple - * items together with applicable arity and domain combinations. - * Records are created dynamically via the \ref tupleschema_create - * function or the \ref TUPLESCHEMA convenience macro. - * - * \extends itemkeyfun - */ -typedef struct { - /** - * These are the itemkeyfun callback functions to support - * hashvector use for tuple items. The functions expects their - * itemkeyfun pointer to be within a tupleschema record so as to - * provide the handling of the tuple columns. - */ - itemkeyfun base; - - /** - * This is the number of columns in a tuple. - */ - int arity; - - /** - * This points to an array of pointers to the tuple item domains - * as represented by their associated itemkeyfun records. - */ - itemkeyfun **columns; -} tupleschema; - -/** - * Create a tuples with given values. - * - * \related tupleschema - */ -extern tuple *tuple_create(int arity,...); - -/** - * Create a tuples with given values. - * - * \related tupleschema - */ -extern tupleschema *tupleschema_create(int arity,tuple *columns); - -/** - * Copy the given tupleschema into a new tupleschema with some columns - * masked. This represents a sub-index type using the unmasked columns - * for indexing. - * - * \related tupleschema - */ -extern tupleschema *tupleschema_mask(tupleschema *schema,...); - -/** - * \brief Return 1/0 to indicate whether the query matches the item. - * - * \related tupleschema - */ -extern int tupleschema_match(tupleschema *def,tuple *query,tuple *item); - -/** - * \brief Generic macro to determine the number of expressions in a - * __VA_ARGS__ - * - * \related tupleschema - */ -#define NUMARGS(...) (sizeof((void*[]){__VA_ARGS__})/sizeof(void*)) - -/** - * \brief Create a tuple with the given values. - * - * This invokes \ref tuple_create to allocate and assign a void* - * array with the given values. - * - * \related tupleschema - */ -#define TUPLE(...) tuple_create( NUMARGS(__VA_ARGS__), __VA_ARGS__ ) - -/** - * \brief Create a \ref tupleschema with the given column "types". - * - * This invokes \ref tupleschema_create to allocate and initialize a - * \ref tupleschema for the given columns via the \ref TUPLE macro. - * - * \related tupleschema - */ -#define TUPLESCHEMA(...) \ - tupleschema_create( NUMARGS( __VA_ARGS__ ), TUPLE( __VA_ARGS__ ) ) - - -#endif diff --git a/vector/vector.c b/vector/vector.c deleted file mode 100644 index c450a60..0000000 --- a/vector/vector.c +++ /dev/null @@ -1,710 +0,0 @@ -#include -#include -#include -#include "vector.h" - -/** - * Representing a vector of void* accessible via an indexing structure - * as levels of same-size pages. A "vector_page" is a contiguous array - * void*, and an index is "unsigned long" (64 bits). - */ - -/** ============================================================ **/ - -static int VECTOR_BITS[4] = { 8, 4, 2, 64 }; - -typedef struct { - unsigned int a:2; - unsigned int b:2; - unsigned int c:2; - unsigned int d:2; -} bitpair; - -typedef struct { - unsigned int a:4; - unsigned int b:4; -} nibble; - -typedef struct { - unsigned int a:8; -} byte; - -/** - * Return the index part for the given level of the vector's leveling - * variant. - * - * The vector variant indicates whether indexing uses 8, 4, or 2 bits - * per level. - */ -unsigned long VECTOR_INDEX_PART(vector *pv,vector_index *index, int level) { - unsigned char *px = (unsigned char *) index; - switch ( pv->variant ) { - case byte_index_levels: { - byte *pp = (byte*)(px + level); - return pp->a; - } - case nibble_index_levels: { - nibble *pp = (nibble*)( px + ( level / 2 ) ); - switch ( level & 1 ) { - case 0: return pp->a; - case 1: return pp->b; - } - break; - } - case bitpair_index_levels: { - bitpair *pp = (bitpair*)( px + ( level / 4 ) ); - switch ( level & 3 ) { - case 0: return pp->a; - case 1: return pp->b; - case 2: return pp->c; - case 3: return pp->d; - } - break; - } - case single_index_level: - return (*index); - } - return 0; -} - -/** - * Increment the index part at the indivated level, cyclic but not - * carrying over to the upper level. Returns the new level index. - */ -static unsigned long VECTOR_INDEX_PART_INC( - vector *pv,vector_index *index, int level) -{ - unsigned char *px = (unsigned char *) index; - switch ( pv->variant ) { - case byte_index_levels: { - byte *pp = (byte*)( px + level ); - return ++(pp->a); - } - case nibble_index_levels: { - nibble *pp = (nibble*)( px + ( level / 2 ) ); - switch ( level & 1 ) { - case 0: return ++(pp->a); - case 1: return ++(pp->b); - } - break; - } - case bitpair_index_levels: { - bitpair *pp = (bitpair*)( px + level / 4 ); - switch ( level & 3 ) { - case 0: return ++(pp->a); - case 1: return ++(pp->b); - case 2: return ++(pp->c); - case 3: return ++(pp->d); - } - break; - } - case single_index_level: - return ++(*index); - } - return 0; -} - -/** - * Decrement the index part at the indicated level, cyclic but not - * carrying over to the upper level. Returns the prior level index. - */ -static unsigned long VECTOR_INDEX_PART_DEC( - vector *pv,vector_index *index, int level) -{ - unsigned char *px = (unsigned char *) index; - switch ( pv->variant ) { - case byte_index_levels: { - byte *pp = (byte*)( px + level ); - return (pp->a)--; - } - case nibble_index_levels: { - nibble *pp = (nibble*)( px + ( level / 2 ) ); - switch ( level & 1 ) { - case 0: return (pp->a)--; - case 1: return (pp->b)--; - } - break; - } - case bitpair_index_levels: { - bitpair *pp = (bitpair*)( px + level / 4 ); - switch ( level & 0xf ) { - case 0: return (pp->a)--; - case 1: return (pp->b)--; - case 2: return (pp->c)--; - case 3: return (pp->d)--; - } - break; - } - case single_index_level: - return (*index)--; - } - return 0; -} - -#define ONES (~((vector_index) 0)) - -// Set index to first value for all index parts at level and lower. -static void VECTOR_INDEX_FIRST(vector *pv,vector_index *index, int level) { - (*index) &= ONES << ( VECTOR_BITS[ pv->variant ] * level ); -} - -// Set index to last value for all index parts at level and lower. -static void VECTOR_INDEX_LAST(vector *pv,vector_index *index, int level) { - static unsigned long ones[] = { 255, 15, 3 }; - unsigned long x = ones[ pv->variant ]; - while ( level-- ) { - (*index) |= x; - x <<= VECTOR_BITS[ pv->variant ]; - } - // 255, 25, 3 - //(*index) |= ONES >> ( 64 - ( VECTOR_BITS[ pv->variant ] * level ) ); -} - -// Return number of slots for a vector variant. -unsigned long VECTOR_SLOTS(vector *pv) { - switch ( pv->variant ) { - case byte_index_levels: return 256; - case nibble_index_levels: return 16; - case bitpair_index_levels: return 4; - case single_index_level: return pv->size; - } - return 0; -} - -// The number of levels to span vector pv wrt its size and variant -static unsigned int vector_levels(vector *pv,unsigned int size) { - if ( size < 4 ) { - return 1; - } - switch ( pv->variant ) { - case byte_index_levels: return ((int)(log2( size - 1 ) / 8)) + 1; - case nibble_index_levels: return ((int)(log2( size - 1 ) / 4)) + 1; - case bitpair_index_levels: return ((int)(log2( size - 1 ) / 2)) + 1; - case single_index_level: return 1; - } - return 0; -} - -/** ============================================================ **/ - -/** - * Advances a vector index to the next used slot at or below the - * given level, starting from the indexed entry (inclusive) and up. - * The function will free any empty pages it discovers, and then - * update the index slots accordingly. The given index is advanced - * cyclically to match the found slot. The function returns a slot - * pointer to the used slot, if any, and 0 otherwise. - * The last parameter is a flag that gets set when the scanning is - * partial (i.e. not the whole index page). - */ -static void **vector_level_next_used( - vector *pv, - vector_page *page, - vector_index *index, - int level, - int *partial ) -{ - void **p = (void**)&(*page)[ VECTOR_INDEX_PART( pv, index, level ) ]; - if ( VECTOR_INDEX_PART( pv, index, level ) != 0 ) { - *partial = 1; - } - for( ; *index < pv->size; p++ ) { - if ( *p ) { - if ( level == 0 ) { - return p; // This is a used entry - } - // *p is an index that needs to be inspected recursively - int w = 0; - void **x = vector_level_next_used( pv, *p, index, level - 1, &w ); - if ( x ) { - return x; // Used slot was found; return it. - } - // If the page *p is all empty, so can/should be reclaimed. - if ( w == 0 ) { - free( *p ); - *p = 0; - } else { - *partial = 1; - } - } else { - if ( level > 0 ) { - VECTOR_INDEX_FIRST( pv, index, level - 1 ); - } - } - if ( VECTOR_INDEX_PART_INC( pv, index, level ) == 0 ) { - break; // cycling this level => nothing found - } - } - return 0; -} - -// Find the next used slot at given index or later. Returns pointer to -// the slot. This allows for a reclaim function that may reclaim slot -// items on the way to next used slot. -void **vector_next_used(vector *pv,vector_index *index) { - if ( pv->entries == 0 || *index >= pv->size ) { - *index = pv->size; - return 0; - } - int levels = vector_levels( pv, pv->size ); - int partial = 0; - do { - void **slot = vector_level_next_used( - pv, pv->entries, index, levels - 1, &partial ) ; - if ( slot == 0 ) { - break; // reached the end of the vector - } - if ( *slot ) { - return slot; - } - } while ( ++(*index) < pv->size ); - if ( partial == 0 ) { - free( pv->entries ); - pv->entries = 0; - } - *index = pv->size; // reached the end of the vector - return 0; -} - -/** - * Advances a vector index to the prior used slot at or below the - * given level, starting from the indexed entry (inclusive) and down. - * The function will free any empty pages it discovers, and then - * update the index slots accordingly. The given index is advanced - * cyclically to match the found slot. The function returns a slot - * pointer to the used slot, if any, and 0 otherwise. - * The last parameter is a flag that gets set when the scanning is - * partial (i.e. not the whole index page). - */ -static void **vector_level_prev_used( - vector *pv, - vector_page *page, - vector_index *index, - int level, - int *partial ) -{ - void **p = (void**)&(*page)[ VECTOR_INDEX_PART( pv, index, level ) ]; - if ( VECTOR_INDEX_PART( pv, index, level ) != VECTOR_SLOTS( pv ) - 1 ) { - *partial = 1; - } - do { - if ( *p ) { - if ( level == 0 ) { - return p; // This is a used entry - } - // *p is an index that needs to be inspected recursively - int w = 0; - void **x = vector_level_prev_used( pv, *p, index, level - 1, &w ); - if ( x ) { - return x; // Used slot was found; return it. - } - // If the page *p is all empty, it can/should be reclaimed. - if ( w == 0 ) { - free( *p ); - *p = 0; - } else { - *partial = 1; - } - } else { - if ( level > 0 ) { - VECTOR_INDEX_LAST( pv, index, level ); - } - } - p--; - } while ( VECTOR_INDEX_PART_DEC( pv, index, level ) != 0 ); - return 0; -} - -// Find the next used slot at given index or later. Returns pointer to -// the slot. This allows for a reclaim function that may reclaim slot -// items on the way to next used slot. -void **vector_prev_used(vector *pv,vector_index *index) { - if ( pv->entries == 0 || *index >= pv->size ) { - *index = pv->size; - return 0; - } - int levels = vector_levels( pv, pv->size ); - int partial = 0; - do { - void **slot = vector_level_prev_used( - pv, pv->entries, index, levels - 1, &partial ) ; - if ( slot == 0 ) { - break; // reached the end of the vector - } - if ( *slot ) { - return slot; - } - } while ( (*index)-- != 0 ); - if ( partial == 0 ) { - free( pv->entries ); - pv->entries = 0; - } - *index = pv->size; // reached the end of the vector - return 0; -} - -// Reclaim tree of unused pages for a given level -static void vector_reclaim(vector *pv,vector_page *page,unsigned int level) { - int i = 0; - if ( level > 0 ) { - for ( ; i < VECTOR_SLOTS( pv ); i++ ) { - if ( (*page)[i] ) { - vector_reclaim( pv, (vector_page *) (*page)[i], level - 1 ); - } - } - } - free( page ); -} - -// Resize vector, using the reclaim function as needed, to handle any -// excess items or to veto the resize. Returns the index of the veto, -// if any, or <0 otherwise, with -1 indicating success and -2 -// indicating OOM -// -// Note that resizing may result in the introduction/removal of -// indexing levels and pages, so as to keep the leveling accurate for -// the size. -int vector_resize( - vector *pv,vector_index new_size, - int (*reclaim)(vector *pv,vector_index index,void *item,void *data), - void *data ) -{ - struct { - int old; - int new; - } level = { - vector_levels( pv, pv->size ), - vector_levels( pv, new_size ) - }; - vector_page *entries = 0; - if ( pv->entries == 0 ) { - pv->size = new_size; - return 0; - } - // A shrinking vector might be veto-ed - if ( new_size < pv->size ) { - vector_index index = new_size; - void **slot = vector_next_used( pv, &index ); - while ( slot ) { - if ( *slot && reclaim && reclaim( pv, index, *slot, data ) == 0 ) { - *slot = 0; - slot = vector_next_used( pv, &index ); - continue; - } - return index; - } - // At this point we know that there are no slots used after - // the new_size size, so now it's time to remove and reclaim - // any superflouous top level pages. - if ( pv->variant == single_index_level ) { - // Follow vector size using realloc - if ( new_size > 0 ) { - entries = (vector_page*) - realloc( pv->entries, new_size * sizeof( void* ) ); - if ( entries == 0 ) { - return -2; // OOM - } - } - pv->entries = entries; - } else { - vector_page **pp = &pv->entries; - int i = level.old; - while ( i-- > level.new ) { - if ( pp ) { - pp = (vector_page **)(*pp); - } - } - if ( pp != &pv->entries ) { - entries = pv->entries; - if ( pp ) { - pv->entries = *pp; - *pp = 0; // Detach subtree - } else { - pv->entries = 0; - } - vector_reclaim( pv, entries, level.old - 1 ); - } - if ( new_size == 0 && pv->entries ) { - free( pv->entries ); - pv->entries = 0; - } - } - } else { - // vector is growing. Maybe insert levels. - if ( pv->variant == single_index_level ) { - // Follow vector size using realloc - entries = (vector_page *)realloc( - pv->entries, new_size * sizeof( void* ) ); - if ( entries == 0 ) { - return -2; // OOM - } - pv->entries = entries; - memset( &(*entries)[ pv->size ], 0, - ( new_size - pv->size ) * sizeof( void* ) ); - } else { - for ( ; level.old < level.new; level.old++ ) { - vector_page *p = (vector_page *) - calloc( VECTOR_SLOTS( pv ), sizeof( void* ) ); - if ( p == 0 ) { - return -2; // OOM - } - (*p)[0] = pv->entries; - pv->entries = p; - // Should maybe change the size to match the level? - // otherwise recovery from OOM is impossible - } - } - } - pv->size = new_size; - return -1; -} - -// Return pointer to the indexed page slot at the requested level, and -// adding intermediate index pages if so requested. Returns 0 if -// addition fails (OOM), or if not requested and page is missing. -void **vector_access(vector *pv,vector_index index,int level,int add) { - if ( index >= pv->size ) { - return 0; - } - void **page = (void**) &pv->entries; - int i = vector_levels( pv, pv->size ); - while ( i-- > level ) { - if ( add && (*page) == 0 ) { - (*page) = calloc( VECTOR_SLOTS( pv ), sizeof( void* ) ); - } - page = (*page); - if ( page == 0 ) { - return 0; - } - page += VECTOR_INDEX_PART( pv, &index, i ); - } - return page; -} - -// Map index into a value slot -void **vector_entry(vector *pv,vector_index index) { - return vector_access( pv, index, 0, 1 ); -} - -inline void vector_set(vector *pv,vector_index index,void *value) { - void **p = vector_entry( pv, index ); - *p = value; -} - -// Set value at index but return the old value -void *vector_get_set(vector *pv,vector_index index,void *value) { - void **p = vector_entry( pv, index ); - void *old = *p; - *p = value; - return old; -} - -inline void *vector_get(vector *pv,vector_index index) { - void **p = vector_entry( pv, index ); - return *p; -} - -int vector_free_any(vector *pv,vector_index ix,void *item,void *data) { - free( item ); - return 0; -} - -int vector_clear_any(vector *pv,vector_index ix,void *item,void *data) { - return 0; -} - -void vector_append(vector *pv,void *value) { - vector_resize( pv, pv->size + 1, 0, 0 ); - vector_set( pv, pv->size - 1, value ); -} - -// copy block of n items from src[si] to dst[di] -// no efficiency hacks -void vector_copy(vector *dst,vector_index di, - vector *src,vector_index si,vector_index n) { - if ( dst != src || di < si ) { - while ( n-- != 0 ) { - vector_set( dst, di++, vector_get( src, si++ ) ); - } - } else if ( di > si ){ - di += n - 1; - si += n - 1; - while ( n-- != 0 ) { - vector_set( dst, di--, vector_get( src, si-- ) ); - } - } -} - -vector *vector_clone(enum vector_variant variant,vector *src) { - vector *dst = (vector*) malloc( sizeof( vector ) ); - (*dst) = (vector) { .variant = variant, .size = 0, .entries = 0 }; - vector_resize( dst, src->size, 0, 0 ); - vector_copy( dst, 0, src, 0, src->size ); - return dst; -} - -void vector_dump(vector *pv, - void (*itemdump)(const vector_index,const void *)) { - vector_index index = 0; - for ( ; index < pv->size; index++ ) { - void **slot = vector_next_used( pv, &index ); - if ( slot == 0 ) { - break; - } - itemdump( index, *slot ); - } -} - -//// Quicksort - -// Returns 1 for "in order", 0 for equal, and -1 for "wrong order" - typedef int (*comparfn)(const void *,const void *); - -static void vector_qsort_part( - vector *pv,comparfn compar, - vector_index low,vector_index high) -{ - if ( low >= high ) { - return; - } - vector_index lo = low; - vector_index m = high - 1; - - if ( lo >= m ) { - return; - } - - vector_index hi = m - 1; - void **mp = vector_entry( pv, m ); - void **lop, **hip; - for ( ;; ) { - // Find index of first item "above" mp scanning from lo and up - for ( ; lo < m; lo++ ) { - lop = vector_entry( pv, lo ); - if ( compar( *lop, *mp ) < 0 ) { - break; - } - } - // if lo == m, then lop is wrong!! - // Find index of first item "below" mp scanning from hi and down - for ( ; hi > lo; hi-- ) { - hip = vector_entry( pv, hi ); - if ( compar( *mp, *hip ) < 0 ) { - break; - } - } - if ( lo >= hi ) { - if ( lo < m ) { - void *x = *lop; - *lop = *mp; - *mp = x; - m = lo; - } - break; - } - void *x = *lop; - *lop = *hip; - *hip = x; - } - vector_qsort_part( pv, compar, low, m ); - vector_qsort_part( pv, compar, m+1, high ); -} - -void vector_qsort(vector *pv,comparfn compar) { - vector_qsort_part( pv, compar, 0, pv->size ); -} - -void vector_iterate(vector *pv, - vector_index start, - int (*itemfn)(vector_index,void*,void*), - void *data ) -{ - vector_index index = start; - while ( index < pv->size ) { - int end = VECTOR_SLOTS( pv ); - int i = index & ( end - 1 ); - for ( ; i < end && index < pv->size; i++, index++ ) { - void **slot = vector_access( pv, index, 0, 0 ); - if ( itemfn( index, slot? *slot: 0, data ) ) { - return; - } - } - } -} - -struct find_data { - void *value; - vector_index index; -}; - -static int vector_find_item(vector_index index, void *item,void *data) { - struct find_data *fd = (struct find_data*)data; - if ( fd->value != item ) { - return 0; - } - fd->index = index; - return 1; -} - -vector_index vector_find(vector *pv,void *value) { - struct find_data fd = { - .value = value, - .index = pv->size - }; - vector_iterate( pv, 0, vector_find_item, &fd ); - return fd.index; -} - -// Find surrounding indexes for a given item key in a sparse vector -void *vector_bsearch(vector *pv,vector_index *index,const void *key, - int (*compare)(const void *key, const void *item)) { - vector_index lo = 0; - vector_index hi = pv->size; - if ( hi-- == 0 || vector_prev_used( pv, &hi ) == 0 ) { - return 0; - } - hi++; - while ( lo < hi ) { - vector_index m = lo + ( hi - lo ) / 2; - void **slot = vector_next_used( pv, &m ); - int c = compare( key, *slot ); - if ( c == 0 ) { - *index = m; - return *slot; - } - if ( c < 0 ) { - hi = m; - } else { - lo = m + 1; - } - } - return 0; -} - -// Iterator callback. -static int checkunused(vector_index index,void *item,void *data) { - vector_index *last = (vector_index*) data; - if ( item == 0 ) { - (*last) = index; - return 1; - } - if ( *last > index ) { - // Only on the first iteration, with *last = vector_sie - if ( index == 0 ) { - (*last) = 1; - return 0; - } - *last = 0; - } else if ( index == (*last) ) { - (*last)++; - return 0; - } - return 1; -} - -// Scan forward for the next unused vector slot -vector_index vector_next_unused(vector *pv,vector_index index) { - vector_index unused = vector_size( pv ); - vector_iterate( pv, index, checkunused, &unused ); - return unused; -} diff --git a/vector/vector.h b/vector/vector.h deleted file mode 100644 index 4dc61f8..0000000 --- a/vector/vector.h +++ /dev/null @@ -1,396 +0,0 @@ -#ifndef vector_H -#define vector_H - -/** \file - * - * A vector is a dynamic pointer array implemented as an access tree - * of index pages. The indexing is done using "unsigned long" indexes, - * and the level 0 index corresponds to actual items. - * - * Actual vectors are assigned a leveling variant which defines the - * index page size for the vector. This must not be changed for a - * vector with entries. - */ - -/** - * \brief This is the general indexing used for vector access. - * - * \related vector - */ -typedef unsigned long vector_index; - -/** - * \brief A vector_page is an array of void* items. Its size depends - * on the applicable vector variant. - * - * \related vector - */ -typedef void* vector_page[]; - -/** - * \brief The implementation variants. - * - * - byte_index_levels (0) has 8-bit indexing parts and index pages - * with 256 pointers, - * - * - nibble_index_levels (1) has 4-bit indexing parts and index pages - * with 16 pointers, - * - * - bitpair_index_levels (2) has 2-bit indexing parts and index pages - * with 4 pointers, - * - * - single_index_level (3) has a single page that is sized as the - * vector. - * - * The first three variants are managed by adding/removing full pages - * of the indexing tree upon resize and access. The single_index_level - * variant is managed by using realloc upon resize. In all cases - * shrinking a vector might mean to reclaim items about to be "lost", - * if any, via a provided item reclaim callback function. The callback - * function may then also veto the shrinking. - * - * \related vector - */ -enum vector_variant { - byte_index_levels = 0, - nibble_index_levels = 1, - bitpair_index_levels = 2, - single_index_level = 3 -}; - -/** - * A vector struct is the "foot part" of a representation that - * constitutes the implementation variant for a vector abstraction. It - * holds the variant indicator, the intended slot size and a root - * pointer for the indexing structure, which consist of indexing pages - * according to the variant. - */ -typedef struct { - /** - * The indexing variant. - */ - enum vector_variant variant; - - /** - * The size of the vector. - */ - vector_index size; - - /** - * The root page of the indexing tree. - */ - vector_page *entries; -} vector; - -/** - * \brief Return the number of slots spanned by an index level for the - * given vector variant. - * - * - 0 indicates 8-bit index parts, and 256 page slots - * - 1 indicates 4-bit index parts, and 16 page slots - * - 2 indicates 2-bit index parts, and 4 page slots - * - 3 indicates 64-bit index parts, and 1 page level following the size - * - * The type 3 vector is managed by using realloc. - * \related vector - */ -extern unsigned long VECTOR_SLOTS(vector *pv); - -/** - * \brief Find the nearest used (non-null) slot at given or higher - * index. - * - * \param pv is the vector concerned. - * - * \param index is the index to change. - * - * \returns a pointer to the first non-null vector slot from the given - * index, and *index set accordingly. If no non-null slot is found, - * the 0 is returned and *index is set to the vector size. - * - * \related vector - */ -extern void **vector_next_used(vector *pv,vector_index *index); - -/** - * \brief Find the nearest used (non-null) slot at given or lower - * index. - * - * \param pv is the vector concerned. - * - * \param index is the index to change. - * - * \returns a pointer to the first non-null vector slot from the given - * index, and *index set accordingly. If no non-null slot is found, - * the 0 is returned and *index is set to the vector size. - * - * \related vector - */ -extern void **vector_prev_used(vector *pv,vector_index *index); - -/** - * \brief Resize a vector. - * - * \param pv is the vector concerned. - * - * \param new_size is the new size it should have, - * - * \param reclaim is used upon shrinking in size for handling any - * current items above the new size, or vetoing the attempted resize. - * - * \param data is passed on the the reclaim function to use as context - * as needed. - * - * \returns the index of a resizing veto any, or <0 otherwise, with -1 - * indicating success and -2 indicating OOM. - * - * This function attempts to resize the given vector to a new size. - * This may result in the introduction or removal of indexing pages, - * so that the index tree leveling is consistent with the vector size. - * Thus, if it grows into a new level, then one or more new upper - * level pages are inserted as needed. If it shrinks below the current - * level, then top-level pages are removed. - * - * Also, if the new size is smaller than currently, then the now - * excess tail of entries is scanned for any used slots and the given - * reclaim function is invoked successively for these. The reclaim - * function must, in addition to memory-managing the entry, return 0 - * upon success, or non-zero to veto the attempted vector size change. - * The data argument is passed on to the reclaim function as given. - * - * \related vector - */ -extern int vector_resize( - vector *pv, vector_index new_size, - int (*reclaim)(vector *pv,vector_index index,void *item,void *data), - void *data ); - -/** - * \brief Return pointer to the indexed page slot at the requested - * level, and adding intermediate index pages if so requested. - * - * \param pv is the vector concerned. - * - * \param index is the slot index. - * - * \param level is the indexing level to access. Level 0 is the leaf - * level that holds the slots for the items; level 1 is one level up, - * for vectors larger than 256 items; ans so on. - * - * \param add is a flag to indicate (with 1) that missing index pages - * should be added, or (with 0) that the function should simply return - * null if an index page to access at any level is missing. - * - * \returns a pointer to the slot for the indexed item (level 0), or - * (for higher levels) the slot for the index page on the access path - * to the indexed item. The function returns 0 if the access path is - * broken by a missing index page, or (with add==1) the allocation of - * a new index page fails. - * - * \note The index tree for the vector is populated on demand only - * where access has been requested. - * - * \related vector - */ -extern void **vector_access(vector *pv,vector_index index,int level,int add); - -/** - * \brief Return the slot value at the given index. - * - * \param pv is the vector concerned. - * - * \param index is the slot index. - * - * \returns a direct pointer to the slot of the given index in the - * array, or 0 if the index is beyond the array limits (0-limit). - * - * \note Note that slot pointers are only valid while the vector size - * is unchanged. - * - * \related vector - */ -extern void **vector_entry(vector *pv,vector_index index); - -/** - * \param pv - the vector concerned - * \returns the size of the vector. - * \related vector - */ -#define vector_size(pv) ((vector_index) (pv)->size) - -/** - * \brief Set the vector value at the given index. - * - * \param pv is the vector concerned - * \param index is the index for the slot to assign - * \param value is the new slot value - * - * \note An assignment of 0 will be treated as an unused slot. - * - * \related vector - */ -extern void vector_set(vector *pv,vector_index index,void *value); - -/** - * \brief Set the vector value at the given index and return the prior - * value. - * - * \param pv is the vector concerned - * \param index is the index for the slot to assign - * \param value is the new slot value - * - * \note An assignment of 0 will be treated as an unused slot. - * - * \related vector - */ -extern void *vector_get_set(vector *pv,vector_index index,void *value); - -/** - * \brief Get the vector value at the given index. - * - * \param pv is the vector concerned - * \param index is the index for the slot to assign - * - * \note This function will allocate all traversed indeex tree pages - * even for accessing an unassigned slot. - * - * \related vector - */ -extern void *vector_get(vector *pv,vector_index index); - -/** - * \brief Grow the vector by one and assign the added slot. - * - * \param pv is the vector concerned - * \param value is the new slot value - * - * \related vector - */ -extern void vector_append(vector *pv,void *value); - -/** - * \brief Copy a consecutive region from one vector into another, or - * possibly the same vector. - * - * \param pv is the vector concerned - * \param value is the new slot value - * - * \note This transfers all slots from the source region to the - * destination region, including zero slots. The vectors must be large - * enough for the transfer, which is carried out from lowest to - * highest or highest to lowest index depending on wther the move is - * to higher index or to lower index respectively. - * - * \related vector - */ -extern void vector_copy( - vector *dst,vector_index di, - vector *src,vector_index si, - vector_index n); - -/** - * \brief Allocate a copy of a vector into one in the given variant. - */ -extern vector *vector_clone(enum vector_variant variant,vector *src); - -/** - * \brief Utility function that invokes the itemdump function for all - * used (non-null) slots. - * - * \related vector - * \seealso vector_iterate - */ -extern void vector_dump( - vector *pv, - void (*itemdump)(const vector_index ,const void *)); - -/** - * \brief Sort a vector with quicksort using the provided ordering - * collback function. - * - * \related vector - */ -extern void vector_qsort(vector *pv,int (*compar)(const void *,const void *)); - -/** - * Steps through the vector item by item invoking the given function - * for each. Continues stepping while the item function returns 0. - * - * \related vector - */ -extern void vector_iterate( - vector *pv, vector_index start, - int (*itemfn)(vector_index,void *item,void *data), - void *data); - -/** - * \brief Binary search in a sorted vector for an item of the given - * key, with a callback function providing the sorting order. - * - * \param pv is the vector concerned. - * - * \param index is a vector_index pointer for returning the index of - * the found item. - * - * \param key is the lookup key to find. - * - * \param compare is a callback function that should return the search - * direction given a key and an item. It should return 0 if the key is - * a match for the item, <0 if the sought item is expected at a higher - * index, and >0 if the sought item is expected at a lower index. - * - * \return a pointer to the found item and *index set to its index. If - * there is no matching item, then 0 is returned, and the index is set - * to the vector size. - * - * \related vector - */ -extern void *vector_bsearch( - vector *pv, vector_index *index, const void *key, - int (*compare)(const void *key, const void *item)); - -/** - * \brief Find the index for a given value - * - * \param pv is the vector concerned - * \param is the value to find - * - * This function scans the vector for the first, if any, occurrence of - * the value, or returns pv->size if not found. - * - * \related vector - */ -extern vector_index vector_find(vector *pv,void *value); - -/** - * \brief Find the next used slot at or after the given index. - * - * \param pv the vector concerned. - * \param index pointer to the index to advance. - * \return the new index, or the vector size if no unused slot is - * found. - * - * Scans forward in the vector for the first unused (null) vector slot - * at or after the given index. Returns pv->size if full. - * - * \related vector - */ -extern vector_index vector_next_unused(vector *pv,vector_index index); - -/** - * \brief Convenience callback function for vector shrinking to free - * any existing slot assignment. - * - * \related vector - */ -extern int vector_free_any(vector *pv,vector_index ix,void *item,void *data); - -/** - * \brief Convenience callback function for vector shrinking to ignore - * any existing slot assignment (without free-ing them). - * - * \related vector - */ -extern int vector_clear_any(vector *pv,vector_index ix,void *item,void *data); - -#endif