From efd06144cd7b5ab1720ef2cda59bd7568f31d034 Mon Sep 17 00:00:00 2001 From: Ralph Ronnquist Date: Tue, 21 Jun 2022 13:22:55 +1000 Subject: [PATCH] cleanup and commenting --- pvector/hashvector.c | 113 ++++++++++++++++++++++++------------------- pvector/hashvector.h | 101 +++++++++++++++++++++++++++++--------- 2 files changed, 141 insertions(+), 73 deletions(-) diff --git a/pvector/hashvector.c b/pvector/hashvector.c index 35dcd2e..5efbd8a 100644 --- a/pvector/hashvector.c +++ b/pvector/hashvector.c @@ -1,34 +1,37 @@ #include "hashvector.h" -// Find the slot for the keyed element, and return pointer to it. +// Find the slot for the keyed element, and return pointer to it, or +// to the first of holes encountered while considering collisions. +// Returns a pointer to the place for the item, or 0 in case of OOM or +// overfull hashvector (i.e. 0 shouldn't happen). static void **hashvector_find_slot(hashvector *hv,void *key) { - unsigned int index = hv->keyhashcode( key ) % hv->table.size; - unsigned int i = index; + unsigned long index = hv->keyhashcode( key ) % hv->table.size; + unsigned long i = index; void **hole = 0; + void **p = 0; for ( ;; ) { - void **p = pvector_entry(&hv->table, i); + p = pvector_entry(&hv->table, i); if ( p == 0 ) { - return 0; + return 0; // This basically means OOM, and is a failure condition. } - if ( *p ) { - if ( (*p) != HV_HOLE ) { - if ( hv->haskey( *p, key ) ) { - return p; - } - } else { - if ( hole == 0 ) { - hole = p; - } - if ( ++i == hv->table.size ) { - i = 0; - } - if ( i != index ) { - continue; - } + if ( *p == 0 ) { + break; // Not found + } + if ( (*p) == HV_HOLE ) { + if ( hole == 0 ) { + hole = p; // Remember the first hole } + } else if ( hv->haskey( *p, key ) ) { + return p; // Found + } + if ( ++i == hv->table.size ) { + i = 0; // Roll-around the table + } + if ( i == index ) { + return 0; // Overfull hashvector! } - return ( hole )? hole : p; } + return ( hole )? hole : p; } // Find the keyed element, and assign the x pointer, or assign 0. @@ -49,7 +52,7 @@ static int capture_item(pvector *pv,unsigned long ix,void *item,void *data) { return 0; } -static void hashvector_resize(hashvector *hv,unsigned int new_size) { +static void hashvector_resize(hashvector *hv,unsigned long new_size) { hashvector tmp = *hv; hv->table.size = new_size; hv->table.entries = 0; @@ -59,44 +62,46 @@ static void hashvector_resize(hashvector *hv,unsigned int new_size) { } // Add the given element. -void hashvector_add(hashvector *hv,void *item) { +int hashvector_add(hashvector *hv,void *item) { void **p = hashvector_find_slot( hv, hv->itemkey( item ) ); - if ( p ) { - if ( *p ) { - if ( *p != HV_HOLE ) { - return; - } - hv->holes--; - } - *p = item; - hv->fill++; - if ( hv->fill + hv->holes > hv->table.size / 2 ) { - hashvector_resize( hv, hv->table.size * 2 ); + if ( p == 0 ) { + return -1; // OOM or overfull hashvector + } + if ( *p ) { + if ( *p != HV_HOLE ) { + return 0; // Already added. } + hv->holes--; // about to reuse a hole + } + *p = item; + hv->fill++; + if ( hv->fill + hv->holes > hv->table.size / 2 ) { + hashvector_resize( hv, hv->table.size * 2 ); } + return 1; } -// Return the next element starting at i, or 0 if there are no more. -// Also increment the index to be of the element + 1, or -1 if there -// are no more elements. -//unsigned char *htnext(htable *table,int *i); - -// Delete the given element. -void hashvector_delete(hashvector *hv,void *item) { +// Delete the given item +int hashvector_delete(hashvector *hv,void *item) { void **p = hashvector_find_slot( hv, hv->itemkey( item ) ); - if ( p && *p && *p != HV_HOLE ) { - *p = HV_HOLE; - hv->holes++; - if ( hv->table.size > 256 ) { - if ( hv->fill < hv->table.size / 2 ) { - hashvector_resize( hv, hv->table.size / 2 ); - } + if ( p == 0 ) { + return -1; + } + if ( *p != item ) { + return 0; + } + *p = HV_HOLE; + hv->holes++; + if ( hv->table.size > 256 ) { + if ( hv->fill < hv->table.size / 2 ) { + hashvector_resize( hv, hv->table.size / 2 ); } } + return 1; } // Copy items into a pvector. Returns 0 on success and -1 on failure. -int hashvector_pack(hashvector *hv,pvector *pv) { +int hashvector_contents(hashvector *hv,pvector *pv) { if ( pvector_resize( pv, hv->fill, 0, 0 ) ) { return -1; } @@ -115,3 +120,13 @@ int hashvector_pack(hashvector *hv,pvector *pv) { } return 0; } + +// A simple binary hashcode, (before modulo table size) +unsigned long hashvector_hashcode(unsigned char *key,unsigned long n) { + unsigned long value = 5381; + while ( n-- ) { + value += ( value << 5 ) + *(key++); + } + return value; +} + diff --git a/pvector/hashvector.h b/pvector/hashvector.h index b886660..a5b426b 100644 --- a/pvector/hashvector.h +++ b/pvector/hashvector.h @@ -1,43 +1,96 @@ #ifndef hashvector_H #define hashvector_H -#ifdef USE_PTHREAD -#define __USE_GNU 1 -#include -#endif +/** + * hashvector is a use of pvector as a hashtable. The hashvector + * includes three functions to, respectively, obtain the hashcode of a + * given key, to obtain the key of a given item, and to tell whether + * or not a given item has a given key. The hashvector manipulations + * uses those for locating and placing items into the vector; the + * hascode is the primary place for an item, but then scanning upwards + * with a rolling index in case of collisions. + * + * The vector holds 0 pointers for free slots, (void*)1 pointers (aka + * HV_HOLE) to indicate slots of deleted items, and otherwise item + * pointers. Thus, deleting an item results in av HV_HOLE et the + * item's slot. + * + * The hashvector grows to double in size, with rehashing, when the + * sum of items and holes exceeds 50% fill, and it shrinks to half + * size when item numbers reduces below 50%. + */ #include "pvector.h" +/*! + * Type: hashvector + * This combines a pvector (for contents) with fill and hole counters + * and the three functions. + */ typedef struct _hashvector { pvector table; - unsigned int fill; // number of added elements - unsigned int holes; // number of deleted - int (*keyhashcode)(void *key); - void *(*itemkey)(void *item); - int (*haskey)(void *item,void *key); -#ifdef USE_PTHREAD - pthread_mutex_t lock; -#endif + unsigned long fill; // number of added elements + unsigned long holes; // number of deleted + unsigned long (*keyhashcode)(void *key); // The hashcode of a key + void *(*itemkey)(void *item); // the key of ain item + int (*haskey)(void *item,void *key); // whether an item has a given } hashvector; +/*! + * Macro: HV_HOLE + * The representation of a deleted item. + */ #define HV_HOLE ((void*) 1) -// Find the keyed element, and assign the x pointer, or assign 0. -// Returns 1 if element is found and 0 otherwise. +/*! + * Function: int hashvector_find(hashvector *hv,void *key,void **x) + * + * Find the item, if any, with the given key and assign *x with its + * address. Returns 1 on success and 0 on failure to find the keyed + * item. Note that when the function returns 0, *x is unchanged. + */ int hashvector_find(hashvector *hv,void *key,void **x); -// Add the given element. -void hashvector_add(hashvector *hv,void *p); +/*! + * Function: void hashvector_add(hashvector *hv,void *item) + * + * Add the given item into the hashvector, growing the hashvector as + * needed to ensure that its fill+holes is is no more than half the + * size. Note that this function first locates any item of the same + * key, and then doesn't add if there is already an item that has the + * same key. Returns 1 when an item is added, 0 when the item key + * is already present, and -1 upon OOM or ovefull hashvector. + */ +int hashvector_add(hashvector *hv,void *item); -// Return the next element starting at i, or 0 if there are no more. -// Also increment the index to be of the element + 1, or -1 if there -// are no more elements. -//unsigned char *htnext(htable *table,int *i); +/*! + * Function: void hashvector_delete(hashvector *hv,void *item) + * + * Delete the given item, and shrink the hashvector so that its size + * is at most double its fill, though at least 256 slots. Returns 1 + * when the item gets deleted (by replacing its slot with a HV_HOLE + * value, 0 if the hashvector has either no item or a different item + * for the item key, and -1 in case of OOM or overfull hashvector. + */ +int hashvector_delete(hashvector *hv,void *item); -// Delete the given element. -void hashvector_delete(hashvector *hv,void *p); +/*! + * Function: int hashvector_contents(hashvector *hv,pvector *pv) + * + * Copy content items into a pvector, which must be empty. The + * function returns -1 if the resizing of the pvector to the + * hashvector fill fails, otherwise 0 after having copied the + * hashvector items in their internal order of appearance into the + * pvector. + */ +int hashvector_contents(hashvector *hv,pvector *pv); -// Copy content items into pvector, which must be empty. -int hashvector_pack(hashvector *hv,pvector *pv); +/*! + * Function unsigned long hashvector_hashcode( + * unsigned char *key,unsigned long n) + * + * Computes and returns a hashcode for a block of bytes. + */ +unsigned long hashvector_hashcode(unsigned char *key,unsigned long n); #endif -- 2.39.2