hashtable

hash table macros for creating hash table functions, it supports any type key, value, hash
git clone https://noulin.net/git/hashtable.git
Log | Files | Refs | LICENSE

commit 47453e95da039afb276e06787c6a72dc590cc2ea
parent b37e1c2848569620ade41639f406d6dc56a525d6
Author: Remy Noulin <loader2x@gmail.com>
Date:   Sun, 20 May 2018 00:25:25 +0200

hashtable macros

hashtable.c     |   18 +
hashtable.h     | 1310 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
hashtableMain.c |  223 ++++++++++
package.yml     |   19 +
4 files changed, 1570 insertions(+)

Diffstat:
Ahashtable.c | 18++++++++++++++++++
Ahashtable.h | 1310+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
AhashtableMain.c | 223+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apackage.yml | 19+++++++++++++++++++
4 files changed, 1570 insertions(+), 0 deletions(-)

diff --git a/hashtable.c b/hashtable.c @@ -0,0 +1,18 @@ + +#include "libsheepyObject.h" + +u32 ilog2Hashtable(u32 value) { + //u32 n = (__builtin_clz(value) ^ 31) + 1; + if (!value) return 4; + // compute log2 of value + u32 n; + asm ("bsrl %1, %0" : "=r" (n) : "r" (value)); + n++; + // keep size at 16 for values under 17 + if (n <= 4) n = 4; + // if value is a power of 2, keep the size down + else if ((value & (value-1)) == 0) n--; + /*else if (value == (1<<(n-1))) n--;*/ + return n; +} + diff --git a/hashtable.h b/hashtable.h @@ -0,0 +1,1310 @@ +#pragma once + +/** + * \file + * + * Chained hash table + * + * NOTE: The macros in this file are for creating hash table functions wrapping the macros. + * I don't advise using these directly because of the risk of memory leaks and + * wrong computations. + * The leaks and wrong computations happen because the parameters to the macros + * are evaluated multiple times. + * Example: + * hashTbAdd(ahashtable, strdup("akey"), value) causes a leak because strdup is + * evaluted twice. + * + * NOTE: This package doesn't provide the hash functions, install the hashfunctions package to + * get some hash functions. + * + * This hash table has 2 single linked lists in each bucket, + * the performance is still good with a load factor of 2. + * + * The maximum number of buckets is 2^32 and and the maximum number (key, value) nodes 2^32 ( > 40 GB RAM) + * + * The size (bucket count) increases in power of 2. + * + * The status of the macros is saved (hashtableVar).res and is set to true when the operation is success and + * false when the operation is failure. + * + * Use hashmap when there are more than 50 elements in the dictionary + * + * Hash table memory usage (for key type char* and value type u32): + * Empty hashmap size: 2744 bytes + * Size depending on element count (with load factor 2): 184 + 42 * count + * + * It takes in average 4 times more memory than smallDict + * hashmap = 4 * smallDict RAM + * + * Lookup time: + * fnv hash get: 440ns (300 000 000 keys in hash table - kaby lake CPU) + * + * This hash table has better performance than smallDict with 50 elements + * + * + * The types handled by the hash table (key, value and hash types) + * are defined with hashTbT or hashNodeT and hashtableT. + * + * The hashTb* are short and convenient macros for the hashtable* macros. + * With hashTb* macros there is no need to specify the hash, cmp and free functions. + * + * Before using the hashTb* macros, define HASHFUNC CMPFUNC and FREEFUNC. + * + * *Node Macros: + * The *Node macros handle the nodes in hash table and save time by not running the hash function. + * These macros are useful when the hash function is expensive (like siphash). + * + * The hash table has to be defined with hashNodeT and hashtableT to be able to use the *Node macros. + * The *Node macros use the context type defined by hashNodeT, the context type is named: + * + * context_typeName + * + * The key and value in the node is accessed with: + * node.key + * node.value + * + * HASHFUNC is a function with prototype: + * hashType hashFunc(keyType key); + * + * CMPFUNC compares keys and is a function with prototype: + * int compare(keyType k1, keyType k2); + * Return 0 when the keys are equal, 1 otherwise + * + * FREEFUNC frees key and value is a function with prototype: + * void freeKV(keyType* key, valueType* value); + * NOTE: the free function gets a pointer to the key and value which is useful when iterating on the nodes + * (for example, when keyType is int, set key to -1, iterate on the aHashnode dArray and ignore the node + * when the key is -1) + * + * See the 'node macros' section below for details on usage scenarios of the *Node macros. + * + * Example: + * hashTbT(hasht, char*, u32, u32); + * + * #define HASHFUNC strHashFNV1A + * #define CMPFUNC strcmp + * #define FREEFUNC freeNode + * + * void freeNode(char **key, u32* UNUSED value) { + * //logI("free key %x\n", key); + * free(key); + * } + * + * hasht h; + * hashtableInit(h, 128, 2, 1); + * + * hashTbAdd(h, "a key", 12); + * + * u32 result = 0; + * hashTbGet(h, "a key", result); + * + * hashTbDel(h, "a key"); + * + * hashTbFree(h); + */ + + +#include "libsheepyObject.h" + +/** + * function declarations for tailored hash table + * + * - define hash table types + * - declare prototypes + * + * this macro is placed most of the time in a header file + * + * the Hash table type is defined as: + * 'typeName't + * + * the node in the hash table is defined as: + * 'typeName'Nodet + * + * the node context is defined as: + * context_'typeName'Nodet + * + * the function names are: + * function'suffix' + * + * the scope parameter is the function scope and can be 'empty' for global functions, 'static' for local functions, ... + * + * the declared functions are: + * init, empty, free, add, addOrFind, find, get, getKey, del, seed, addOrFindNode + * setKeyNode, getNode, updateNodeContext, unlink, freeUnlinked, delNode + * + * + * RETURN VALUES: + * add, setKeyNode, updateNodeContext: true success, false failure + * addOrFind, addOrFindNode: non NULL success, NULL failure. + * *isnew is set to true when the returned value pointer is in a new node. + * find, getNode, unlink: non NULL success, NULL failure + * + * the ctx pointer parameters must be allocated before the function calls + * the isnew pointer must point to an existing bool variable + * + * + * Example: + * hashTbDef(, m3232, M32, u32, u32, u32); // declare hash table type m3232t with u32 keys, u32 values and u32 hash + * + * m3232Nodet *node; + * context_m3232Nodet ctx; + * bool isnew; + * + * m3232t h; // declares a hash table + * + */ +#define hashTbDef(scope, typeName, suffix, keyType, valueType, hashType) \ + hashNodeT(typeName##Nodet, keyType, valueType, hashType);\ + hashtableT(typeName##t, typeName##Nodet);\ + scope void init##suffix(typeName##t *map);\ + scope void empty##suffix(typeName##t *map);\ + scope void free##suffix(typeName##t *map);\ + scope bool add##suffix(typeName##t *map, keyType key, valueType value);\ + scope valueType* addOrFind##suffix(typeName##t *map, keyType key, bool *isnew);\ + scope valueType* find##suffix(typeName##t *map, keyType key);\ + scope valueType get##suffix(typeName##t *map, keyType key);\ + scope keyType getKey##suffix(typeName##t *map, keyType key);\ + scope void del##suffix(typeName##t *map, keyType key);\ + scope u8* seed##suffix(typeName##t *map);\ + scope typeName##Nodet* addOrFindNode##suffix(typeName##t *map, keyType key, bool *isnew, context_##typeName##Nodet *ctx);\ + scope bool setKeyNode##suffix(typeName##t *map, context_##typeName##Nodet *ctx);\ + scope typeName##Nodet *getNode##suffix(typeName##t *map, keyType key, context_##typeName##Nodet *ctx);\ + scope bool updateNodeContext##suffix(typeName##t *map, context_##typeName##Nodet *ctx);\ + scope typeName##Nodet* unlink##suffix(typeName##t *map, keyType key);\ + scope void freeUnlinked##suffix(typeName##t *map, typeName##Nodet *node);\ + scope void delNode##suffix(typeName##t *map, context_##typeName##Nodet *ctx) + +/** + * function implementations + * + * this macros implements the hash table functions: + * init, empty, free, add, addOrFind, find, get, getKey, del, seed, addOrFindNode + * setKeyNode, getNode, updateNodeContext, unlink, freeUnlinked, delNode + * + * this macro is placed most of the time in a source file + * + * the hash function, compare function and free function must be implemented before this macro + * HASHFUNC, CMPFUNC and FREEFUNC must be defined before this macro + * + * Example: + * hashTbDef(, m3232, M32, u32, u32, u32); // declare hash table type m3232t with u32 keys, u32 values and u32 hash + * + * int cmp(u32 k1, u32 k2) { + * return k1 == k2 ? 0 : 1; + * } + * + * void freeM(u32* k, u32* v) { + * } + * + * #define HASHFUNC u32Hash // hash function from the hashfunctions spm package + * #define CMPFUNC cmp + * #define FREEFUNC freeM + * + * hashTbFunctions(, m3232, M32, u32, u32); // implements global scope m3232t functions: *M32() + * + * #undef HASHFUNC + * #undef CMPFUNC + * #undef FREEFUNC + * + * m3232t h; // declares a hash table + * + * initM32(&h); + * addM32(&h, 1, 1); + * + */ +#define hashTbFunctions(scope, typeName, suffix, keyType, valueType)\ + scope void init##suffix(typeName##t *map) {\ + hashTbInit(*map, 64, 2, 1);\ + }\ + scope void empty##suffix(typeName##t *map) {\ + hashTbEmpty(*map);\ + }\ + scope void free##suffix(typeName##t *map) {\ + hashTbFree(*map);\ + }\ + scope bool add##suffix(typeName##t *map, keyType key, valueType value) {\ + hashTbAdd(*map, key, value);\ + return map->res;\ + }\ + scope valueType* addOrFind##suffix(typeName##t *map, keyType key, bool *isnew) {\ + valueType* r = NULL;\ + hashTbAddOrFind(*map, key, r, *isnew);\ + return r;\ + }\ + scope valueType* find##suffix(typeName##t *map, keyType key) {\ + valueType *r = NULL;\ + hashTbFind(*map, key, r);\ + return r;\ + }\ + scope valueType get##suffix(typeName##t *map, keyType key) {\ + valueType r;\ + hashTbGet(*map, key, r);\ + return r;\ + }\ + scope keyType getKey##suffix(typeName##t *map, keyType key) {\ + keyType r;\ + hashTbGetKey(*map, key, r);\ + return r;\ + }\ + scope void del##suffix(typeName##t *map, keyType key) {\ + hashTbDel(*map, key);\ + }\ + scope u8* seed##suffix(typeName##t *map) {\ + u8 *r = hashTbSeed(*map);\ + return r;\ + }\ + scope typeName##Nodet* addOrFindNode##suffix(typeName##t *map, keyType key, bool *isnew, context_##typeName##Nodet *ctx) {\ + typeName##Nodet* r;\ + hashTbAddOrFindNode(*map, key, r, *ctx, *isnew);\ + return r;\ + }\ + scope bool setKeyNode##suffix(typeName##t *map, context_##typeName##Nodet *ctx) {\ + hashTbSetKeyNode(*map, *ctx);\ + return map->res;\ + }\ + scope typeName##Nodet *getNode##suffix(typeName##t *map, keyType key, context_##typeName##Nodet *ctx) {\ + typeName##Nodet *r = NULL;\ + hashTbGetNode(*map , key, r, *ctx);\ + return r;\ + }\ + scope bool updateNodeContext##suffix(typeName##t *map, context_##typeName##Nodet *ctx) {\ + hashTbUpdateNodeContext(*map, *ctx);\ + return map->res;\ + }\ + scope typeName##Nodet* unlink##suffix(typeName##t *map, keyType key) {\ + typeName##Nodet *r = NULL;\ + hashTbUnlinkNode(*map, key, r);\ + return r;\ + }\ + scope void freeUnlinked##suffix(typeName##t *map, typeName##Nodet *node) {\ + hashTbFreeUnlinkedNode(*map, node);\ + }\ + scope void delNode##suffix(typeName##t *map, context_##typeName##Nodet *ctx) {\ + hashTbDelNode(*map, *ctx);\ + } + + +/** initialize hash table */ +#define hashTbInit(HT, SIZE, NUMERATOR, DENOMINATOR) hashtableInit(HT, SIZE, NUMERATOR, DENOMINATOR) + +/** remove all key,value pairs in hash table */ +#define hashTbEmpty(HT) hashtableEmpty(HT, FREEFUNC) + +/** free hash table */ +#define hashTbFree(HT) hashtableFree(HT, FREEFUNC) + +/** insert new (key,value), fail when the key already exists */ +#define hashTbAdd(HT, K, V) hashtableAdd(HT, K, V, HASHFUNC, CMPFUNC) + +/** insert/find key, return value address in VPOINTER and ISNEW */ +#define hashTbAddOrFind(HT, K, VPOINTER, ISNEW) hashtableAddOrFind(HT, K, VPOINTER, ISNEW, HASHFUNC, CMPFUNC) + +/** find key, return value address in VPOINTER */ +#define hashTbFind(HT, K, VPOINTER) hashtableFind(HT, K, VPOINTER, HASHFUNC, CMPFUNC) + +/** get value */ +#define hashTbGet(HT, K, R) hashtableGet(HT, K, R, HASHFUNC, CMPFUNC) + +/** get key */ +#define hashTbGetKey(HT, K, R) hashtableGetKey(HT, K, R, HASHFUNC, CMPFUNC) + +/** remove key, value */ +#define hashTbDel(HT, K) hashtableDel(HT, K, HASHFUNC, CMPFUNC, FREEFUNC) + +/** get random seed address */ +#define hashTbSeed(HT) hashtableSeed(HT) + +/** create or reuse an exists node, return NODEPOINTER, NODECONTEXT and ISNEW */ +#define hashTbAddOrFindNode(HT, K, NODEPOINTER, NODECONTEXT, ISNEW) hashtableAddOrFindNode(HT, K, NODEPOINTER, NODECONTEXT, ISNEW, HASHFUNC, CMPFUNC) + +/** change the key in a node */ +#define hashTbSetKeyNode(HT, NODECONTEXT) hashtableSetKeyNode(HT, NODECONTEXT, HASHFUNC, CMPFUNC) + +/** get a node, return NODE and NODECONTEXT */ +#define hashTbGetNode(HT, K, NODE, NODECONTEXT) hashtableGetNode(HT, K, NODE, NODECONTEXT, HASHFUNC, CMPFUNC) + +/** key in node */ +#define hashTbNodeKey(NODE) (NODE).key + +/** value in node */ +#define hashTbNodeValue(NODE) (NODE).value + +/** update node context after a rehash, return NODECONTEXT */ +#define hashTbUpdateNodeContext(HT, NODECONTEXT) hashtableUpdateNodeContext(HT, NODECONTEXT, HASHFUNC, CMPFUNC) + +/** return a node and remove it from the hashtable, return NODE */ +#define hashTbUnlinkNode(HT, K, NODE) hashtableUnlinkNode(HT, K, NODE, HASHFUNC, CMPFUNC) + +/** free an unlinked node */ +#define hashTbFreeUnlinkedNode(HT, NODE) hashtableFreeUnlinkedNode(HT, NODE, FREEFUNC) + +/** delete a node */ +#define hashTbDelNode(HT, NODECONTEXT) hashtableDelNode(HT, NODECONTEXT, FREEFUNC) + +/** + * node definition for bucket single linked lists + * + * keyType, valueType and hashType can any valid type. + * + * context_typeName is defined and is needed for *Node macros + * + * In general, hashType is an uint between 32bits and 256bits + */ +#define hashNodeT(typeName, keyType, valueType, hashType)\ + typedef struct typeName typeName;\ + struct typeName {\ + keyType key;\ + typeName *next;\ + hashType hash;\ + u32 index;\ + valueType value;\ + };\ + typedef struct {\ + typeName *node;\ + typeName *prev;\ + u32 mhash;\ + u8 moreOrLess;\ + u32 size;\ + } TOKENPASTE(context_, typeName) /* concatenate strings with preprocessor */ + +/** + * hash table definition + * typeName is the type of hash table defined. + * + * hashNodeT(nodeType) has to defined before this macro. + * + * Example: + * hashNodeT(hashSipNodet, char*, u32, u64); + * hashtableT(hashsipt, hashSipNodet); + * + * context_hashSipNodet is the node context type. + */ +#define hashtableT(typeName, nodeType)\ + dArrayT(UNIQVAR(aHashnodet), nodeType);\ + dArrayT(UNIQVAR(HNFreet), u32);\ + typedef struct {\ + nodeType* (*list)[][2]; /* buckets, 2 single linked lists */\ + nodeType node;\ + size_t count; /* node count */\ + u32 size; /* bucket count */\ + u32 szMask; /* mask for bucket indexing */\ + UNIQVAR(aHashnodet) aHashnode; /* node dynamic array */\ + UNIQVAR(HNFreet) HNFree; /* list of free nodes in aHashnode */\ + u8 hashseed[16]; /* random seed for siphash */\ + u32 loadfactor_numerator;\ + u32 loadfactor_denominator;\ + bool res; /* return value for macros */\ + bool newNodeInDArray; /* add, addOrFind and addOrFindNode set this flag: true an new node is pushed in aHashnode, false an existing array element is recycled */\ + } typeName + +/** + * hash table and hash node definition + * typeName is the type of hash table defined. + * + * keyType, valueType and hashType can any valid type. + * + * In general, hashType is an uint between 32bits and 256bits + */ +#define hashTbT(typeName, keyType, valueType, hashType)\ + hashNodeT(UNIQVAR(hashnodet), keyType, valueType, hashType);\ + hashtableT(typeName, UNIQVAR(hashnodet)) + +/** + * fast log2 function for computing the bucket list size and mask + */ +u32 ilog2Hashtable(u32 value); + +/** + * initialize the hash table 'name' + * + * sz is the initial size + * numerator and denominator are the load factor + * + * the random hash seed is generated + * + * this macro must be called before using the hash table + * + * \return + * (name).res true success, false failed + */ +#define hashtableInit(name, sz, numerator, denominator) do{\ + (name).size = sz;\ + if (sz <= 0) { (name).res = false; break;}\ + (name).loadfactor_numerator = numerator;\ + (name).loadfactor_denominator = denominator;\ + dArrayInit(&(name).aHashnode);\ + dArrayInit(&(name).HNFree);\ + setSoftwareRandom();\ + range(UNIQVAR(i),16) {\ + (name).hashseed[UNIQVAR(i)] = randomChoice(256);\ + }\ + (name).size = (1<<ilog2Hashtable(sz));\ + (name).szMask = (name).size - 1;\ + (name).list = malloc((name).size * 2 * sizeof(typeof((name).node)*));\ + if (!(name).list) /* alloc failed */ { (name).size = 0; (name).res = false; break;}\ + memset((name).list, 0, (name).size * 2 * sizeof(typeof((name).node)*));\ + (name).count = 0;\ + (name).res = true;\ + } while(0) + +/** Internal - for testing the hash table + * show the content of the buckets + */ +#define hashtableTest(name) do{\ + range(UNIQVAR(i), (name).size) {\ + /* free first linked list */\ + typeof((name).node) *UNIQVAR(node) = (*(name).list)[UNIQVAR(i)][0];\ + while (UNIQVAR(node)) {\ + logI("bucket %3d list 0 key %s", UNIQVAR(i), UNIQVAR(node)->key);\ + typeof((name).node) *UNIQVAR(next) = UNIQVAR(node)->next;\ + UNIQVAR(node) = UNIQVAR(next);\ + }\ + /* free second linked list */\ + UNIQVAR(node) = (*(name).list)[UNIQVAR(i)][1];\ + while (UNIQVAR(node)) {\ + logI("bucket %3d list 1 key %s", UNIQVAR(i), UNIQVAR(node)->key);\ + typeof((name).node) *UNIQVAR(next) = UNIQVAR(node)->next;\ + UNIQVAR(node) = UNIQVAR(next);\ + }\ + }\ + } while(0); + +/** + * remove all key,value pairs in hash table + */ +#define hashtableEmpty(name, freeNodeFunc) do{\ + range(UNIQVAR(i), (name).size) {\ + /* free first linked list */\ + typeof((name).node) *UNIQVAR(node) = (*(name).list)[UNIQVAR(i)][0];\ + while (UNIQVAR(node)) {\ + typeof((name).node) *UNIQVAR(next) = UNIQVAR(node)->next;\ + freeNodeFunc(&UNIQVAR(node)->key, &UNIQVAR(node)->value);\ + UNIQVAR(node) = UNIQVAR(next);\ + }\ + /* free second linked list */\ + UNIQVAR(node) = (*(name).list)[UNIQVAR(i)][1];\ + while (UNIQVAR(node)) {\ + typeof((name).node) *UNIQVAR(next) = UNIQVAR(node)->next;\ + freeNodeFunc(&UNIQVAR(node)->key, &UNIQVAR(node)->value);\ + UNIQVAR(node) = UNIQVAR(next);\ + }\ + (*(name).list)[UNIQVAR(i)][0] = NULL;\ + (*(name).list)[UNIQVAR(i)][1] = NULL;\ + }\ + (name).count = 0;\ + } while(0); + +/** + * empty hash table and free all buffers + * + * \return + * (name).res true success, false failed + */ +#define hashtableFree(name, freeNodeFunc) do{\ + (name).res = false;\ + if (!(name).size) /* size=0 invalid */ break;\ + hashtableEmpty(name, freeNodeFunc);\ + free((name).list);\ + dArrayFree(&(name).aHashnode);\ + dArrayFree(&(name).HNFree);\ + (name).list = NULL;\ + (name).count = 0;\ + (name).size = 0;\ + (name).res = true;\ + } while(0) + +/** + * resize the hash table + * + * \return + * (name).res true success, false failed + */ +#define hashtableResize(name, sz) do{\ + u32 UNIQVAR(new_size) = (1<<ilog2Hashtable(sz));\ + (name).szMask = (name).size - 1;\ + if ((name).size == UNIQVAR(new_size)) goto UNIQVAR(msuccess);\ + typeof((name).node) *(*UNIQVAR(list))[][2] = malloc(UNIQVAR(new_size) * 2 * sizeof(typeof((name).node)*));\ + if (!UNIQVAR(list)) {(name).res = false; break;}\ + memset(UNIQVAR(list), 0, UNIQVAR(new_size) * 2 * sizeof(typeof((name).node)*));\ + range(UNIQVAR(i), (name).size) {\ + range(UNIQVAR(moreLessIdx), 2) {\ + for (typeof((name).node) *UNIQVAR(node) = (*(name).list)[UNIQVAR(i)][UNIQVAR(moreLessIdx)]; UNIQVAR(node);) {\ + typeof((name).node) *UNIQVAR(next) = UNIQVAR(node)->next;\ + u32 UNIQVAR(mhash) = UNIQVAR(node)->hash & (name).szMask;\ + typeof((name).node) *UNIQVAR(search) = (*UNIQVAR(list))[UNIQVAR(mhash)][0];\ + typeof((name).node) *UNIQVAR(prev) = NULL;\ + u8 UNIQVAR(moreOrLess) = 0;\ + if (UNIQVAR(search)) {\ + if (UNIQVAR(node)->hash >= UNIQVAR(search)->hash) {\ + UNIQVAR(moreOrLess) = 0;\ + while (UNIQVAR(search) && UNIQVAR(node)->hash >= UNIQVAR(search)->hash) {\ + UNIQVAR(prev) = UNIQVAR(search);\ + UNIQVAR(search) = UNIQVAR(search)->next;\ + }\ + }\ + else {\ + UNIQVAR(moreOrLess) = 1;\ + UNIQVAR(search) = (*UNIQVAR(list))[UNIQVAR(mhash)][1];\ + while (UNIQVAR(search) && UNIQVAR(node)->hash <= UNIQVAR(search)->hash) {\ + UNIQVAR(prev) = UNIQVAR(search);\ + UNIQVAR(search) = UNIQVAR(search)->next;\ + }\ + }\ + }\ + UNIQVAR(node)->next = UNIQVAR(search);\ + if (UNIQVAR(prev)) {\ + UNIQVAR(prev)->next = UNIQVAR(node);\ + }\ + else {\ + (*UNIQVAR(list))[UNIQVAR(mhash)][UNIQVAR(moreOrLess)] = UNIQVAR(node);\ + }\ + UNIQVAR(node) = UNIQVAR(next);\ + }\ + }\ + }\ + free((name).list);\ + (name).list = UNIQVAR(list);\ + (name).size = UNIQVAR(new_size);\ + UNIQVAR(msuccess):\ + (name).res = true;\ + } while(0) + +/** + * insert key, value + * + * fails when the key already exists + * + * \return + * (name).res true success, false failed + */ +#define hashtableAdd(name, k, v, hashFunc, cmpFunc) do{\ + if ((name).loadfactor_denominator * (name).count >= (name).loadfactor_numerator * (name).size) {\ + /* load factor too high */\ + hashtableResize(name, (name).count*2);\ + /*if (!(name).res) break; else (name).res = false;*/\ + }\ + const typeof((name).node.hash) UNIQVAR(hash) = hashFunc(k);\ + const u32 UNIQVAR(mhash) = UNIQVAR(hash) & (name).szMask;\ + typeof((name).node) *UNIQVAR(node) = (*(name).list)[UNIQVAR(mhash)][0];\ + typeof((name).node) *UNIQVAR(prev) = NULL;\ + typeof((name).node) *UNIQVAR(add);\ + u8 UNIQVAR(moreOrLess) = 0;\ + if (UNIQVAR(node)) {\ + if (UNIQVAR(hash) >= UNIQVAR(node)->hash) {\ + UNIQVAR(moreOrLess) = 0;\ + while (UNIQVAR(node) && UNIQVAR(hash) >= UNIQVAR(node)->hash) {\ + if (UNIQVAR(hash) == UNIQVAR(node)->hash && ((k == UNIQVAR(node)->key) || (cmpFunc(k, UNIQVAR(node)->key) == 0))) {\ + (name).newNodeInDArray = false;\ + (name).res = false; goto UNIQVAR(mreturn);\ + }\ + UNIQVAR(prev) = UNIQVAR(node);\ + UNIQVAR(node) = UNIQVAR(node)->next;\ + }\ + }\ + else {\ + UNIQVAR(moreOrLess) = 1;\ + UNIQVAR(node) = (*(name).list)[UNIQVAR(mhash)][1];\ + while (UNIQVAR(node) && UNIQVAR(hash) <= UNIQVAR(node)->hash) {\ + if (UNIQVAR(hash) == UNIQVAR(node)->hash && ((k == UNIQVAR(node)->key) || (cmpFunc(k, UNIQVAR(node)->key) == 0))) {\ + (name).newNodeInDArray = false;\ + (name).res = false; goto UNIQVAR(mreturn);\ + }\ + UNIQVAR(prev) = UNIQVAR(node);\ + UNIQVAR(node) = UNIQVAR(node)->next;\ + }\ + }\ + }\ + if (dArrayIsEmpty(&(name).HNFree)) {\ + dArrayPush(&(name).aHashnode);\ + UNIQVAR(add) = dArrayLastPtr(&(name).aHashnode);\ + UNIQVAR(add)->index = dArrayLastIndex(&(name).aHashnode);\ + (name).newNodeInDArray = true;\ + }\ + else {\ + u32 UNIQVAR(idx) = dArrayLast(&(name).HNFree);\ + dArrayDelLast(&(name).HNFree);\ + UNIQVAR(add) = dArrayPtr(&(name).aHashnode, UNIQVAR(idx));\ + (name).newNodeInDArray = false;\ + }\ + UNIQVAR(add)->key = k;\ + UNIQVAR(add)->value = v;\ + UNIQVAR(add)->hash = UNIQVAR(hash);\ + if (UNIQVAR(prev)) UNIQVAR(prev)->next = UNIQVAR(add);\ + else (*(name).list)[UNIQVAR(mhash)][UNIQVAR(moreOrLess)] = UNIQVAR(add);\ + UNIQVAR(add)->next = UNIQVAR(node);\ + (name).count++;\ + (name).res = true;\ + UNIQVAR(mreturn):;\ + } while(0) + +/** + * add or find key, get value pointer + * + * vPointer must be of type: + * valueType * + * + * isNew must be of type: + * bool + * + * \return + * vPointer address to value associated the key k + * isNew true when the node is new, false when the node already existed + * (name).res true success, false failed + */ +#define hashtableAddOrFind(name, k, vPointer, isNew, hashFunc, cmpFunc) do{\ + if ((name).loadfactor_denominator * (name).count >= (name).loadfactor_numerator * (name).size) {\ + /* load factor too high */\ + hashtableResize(name, (name).count*2);\ + /*if (!(name).res) break; else (name).res = false;*/\ + }\ + const typeof((name).node.hash) UNIQVAR(hash) = hashFunc(k);\ + const u32 UNIQVAR(mhash) = UNIQVAR(hash) & (name).szMask;\ + typeof((name).node) *UNIQVAR(node) = (*(name).list)[UNIQVAR(mhash)][0];\ + typeof((name).node) *UNIQVAR(prev) = NULL;\ + typeof((name).node) *UNIQVAR(add);\ + u8 UNIQVAR(moreOrLess) = 0;\ + if (UNIQVAR(node)) {\ + if (UNIQVAR(hash) >= UNIQVAR(node)->hash) {\ + UNIQVAR(moreOrLess) = 0;\ + while (UNIQVAR(node) && UNIQVAR(hash) >= UNIQVAR(node)->hash) {\ + if (UNIQVAR(hash) == UNIQVAR(node)->hash && ((k == UNIQVAR(node)->key) || (cmpFunc(k, UNIQVAR(node)->key) == 0))) {\ + vPointer = &(UNIQVAR(node)->value);\ + isNew = false;\ + (name).newNodeInDArray = false;\ + (name).res = true; goto UNIQVAR(mreturn);\ + }\ + UNIQVAR(prev) = UNIQVAR(node);\ + UNIQVAR(node) = UNIQVAR(node)->next;\ + }\ + }\ + else {\ + UNIQVAR(moreOrLess) = 1;\ + UNIQVAR(node) = (*(name).list)[UNIQVAR(mhash)][1];\ + while (UNIQVAR(node) && UNIQVAR(hash) <= UNIQVAR(node)->hash) {\ + if (UNIQVAR(hash) == UNIQVAR(node)->hash && ((k == UNIQVAR(node)->key) || (cmpFunc(k, UNIQVAR(node)->key) == 0))) {\ + vPointer = &(UNIQVAR(node)->value);\ + isNew = false;\ + (name).newNodeInDArray = false;\ + (name).res = true; goto UNIQVAR(mreturn);\ + }\ + UNIQVAR(prev) = UNIQVAR(node);\ + UNIQVAR(node) = UNIQVAR(node)->next;\ + }\ + }\ + }\ + isNew = true;\ + if (dArrayIsEmpty(&(name).HNFree)) {\ + dArrayPush(&(name).aHashnode);\ + UNIQVAR(add) = dArrayLastPtr(&(name).aHashnode);\ + UNIQVAR(add)->index = dArrayLastIndex(&(name).aHashnode);\ + (name).newNodeInDArray = true;\ + }\ + else {\ + u32 UNIQVAR(idx) = dArrayLast(&(name).HNFree);\ + dArrayDelLast(&(name).HNFree);\ + UNIQVAR(add) = dArrayPtr(&(name).aHashnode, UNIQVAR(idx));\ + (name).newNodeInDArray = false;\ + }\ + UNIQVAR(add)->key = k;\ + vPointer = &(UNIQVAR(add)->value);\ + UNIQVAR(add)->hash = UNIQVAR(hash);\ + if (UNIQVAR(prev)) UNIQVAR(prev)->next = UNIQVAR(add);\ + else (*(name).list)[UNIQVAR(mhash)][UNIQVAR(moreOrLess)] = UNIQVAR(add);\ + UNIQVAR(add)->next = UNIQVAR(node);\ + (name).count++;\ + (name).res = true;\ + UNIQVAR(mreturn):;\ + } while(0) + +/** + * find key, get value pointer + * + * vPointer must be of type: + * valueType * + * + * \return + * vPointer address to value associated the key k + * (name).res true success, false failed + */ +#define hashtableFind(name, k, vPointer, hashFunc, cmpFunc) do{\ + const typeof((name).node.hash) UNIQVAR(hash) = hashFunc(k);\ + const u32 UNIQVAR(mhash) = UNIQVAR(hash) & (name).szMask;\ + typeof((name).node) *UNIQVAR(node) = (*(name).list)[UNIQVAR(hash) & (name).szMask][0];\ + if (UNIQVAR(node)) {\ + if (UNIQVAR(hash) >= UNIQVAR(node)->hash) {\ + while (UNIQVAR(node) && UNIQVAR(hash) >= UNIQVAR(node)->hash) {\ + if (UNIQVAR(hash) == UNIQVAR(node)->hash && ((k == UNIQVAR(node)->key) || (cmpFunc(k, UNIQVAR(node)->key) == 0))) {\ + vPointer = &UNIQVAR(node)->value;\ + (name).res = true;\ + goto UNIQVAR(mreturn);\ + }\ + UNIQVAR(node) = UNIQVAR(node)->next;\ + }\ + }\ + else {\ + UNIQVAR(node) = (*(name).list)[UNIQVAR(mhash)][1];\ + while (UNIQVAR(node) && UNIQVAR(hash) <= UNIQVAR(node)->hash) {\ + if (UNIQVAR(hash) == UNIQVAR(node)->hash && ((k == UNIQVAR(node)->key) || (cmpFunc(k, UNIQVAR(node)->key) == 0))) {\ + vPointer = &UNIQVAR(node)->value;\ + (name).res = true;\ + goto UNIQVAR(mreturn);\ + }\ + UNIQVAR(node) = UNIQVAR(node)->next;\ + }\ + }\ + }\ + (name).res = false;\ + UNIQVAR(mreturn):;\ + } while(0) + +/** + * get value + * + * \return + * (name).res true success, false failed + */ +#define hashtableGet(name, k, result, hashFunc, cmpFunc) do{\ + const typeof((name).node.hash) UNIQVAR(hash) = hashFunc(k);\ + const u32 UNIQVAR(mhash) = UNIQVAR(hash) & (name).szMask;\ + typeof((name).node) *UNIQVAR(node) = (*(name).list)[UNIQVAR(hash) & (name).szMask][0];\ + if (UNIQVAR(node)) {\ + if (UNIQVAR(hash) >= UNIQVAR(node)->hash) {\ + while (UNIQVAR(node) && UNIQVAR(hash) >= UNIQVAR(node)->hash) {\ + if (UNIQVAR(hash) == UNIQVAR(node)->hash && ((k == UNIQVAR(node)->key) || (cmpFunc(k, UNIQVAR(node)->key) == 0))) {\ + result = UNIQVAR(node)->value;\ + (name).res = true;\ + goto UNIQVAR(mreturn);\ + }\ + UNIQVAR(node) = UNIQVAR(node)->next;\ + }\ + }\ + else {\ + UNIQVAR(node) = (*(name).list)[UNIQVAR(mhash)][1];\ + while (UNIQVAR(node) && UNIQVAR(hash) <= UNIQVAR(node)->hash) {\ + if (UNIQVAR(hash) == UNIQVAR(node)->hash && ((k == UNIQVAR(node)->key) || (cmpFunc(k, UNIQVAR(node)->key) == 0))) {\ + result = UNIQVAR(node)->value;\ + (name).res = true;\ + goto UNIQVAR(mreturn);\ + }\ + UNIQVAR(node) = UNIQVAR(node)->next;\ + }\ + }\ + }\ + (name).res = false;\ + UNIQVAR(mreturn):;\ + } while(0) + +/** + * get key + * + * \return + * (name).res true success, false failed + */ +#define hashtableGetKey(name, k, result, hashFunc, cmpFunc) do{\ + const typeof((name).node.hash) UNIQVAR(hash) = hashFunc(k);\ + const u32 UNIQVAR(mhash) = UNIQVAR(hash) & (name).szMask;\ + typeof((name).node) *UNIQVAR(node) = (*(name).list)[UNIQVAR(hash) & (name).szMask][0];\ + if (UNIQVAR(node)) {\ + if (UNIQVAR(hash) >= UNIQVAR(node)->hash) {\ + while (UNIQVAR(node) && UNIQVAR(hash) >= UNIQVAR(node)->hash) {\ + if (UNIQVAR(hash) == UNIQVAR(node)->hash && ((k == UNIQVAR(node)->key) || (cmpFunc(k, UNIQVAR(node)->key) == 0))) {\ + result = UNIQVAR(node)->key;\ + (name).res = true;\ + goto UNIQVAR(mreturn);\ + }\ + UNIQVAR(node) = UNIQVAR(node)->next;\ + }\ + }\ + else {\ + UNIQVAR(node) = (*(name).list)[UNIQVAR(mhash)][1];\ + while (UNIQVAR(node) && UNIQVAR(hash) <= UNIQVAR(node)->hash) {\ + if (UNIQVAR(hash) == UNIQVAR(node)->hash && ((k == UNIQVAR(node)->key) || (cmpFunc(k, UNIQVAR(node)->key) == 0))) {\ + result = UNIQVAR(node)->key;\ + (name).res = true;\ + goto UNIQVAR(mreturn);\ + }\ + UNIQVAR(node) = UNIQVAR(node)->next;\ + }\ + }\ + }\ + (name).res = false;\ + UNIQVAR(mreturn):;\ + } while(0) + +/** + * remove key, value + * + * \return + * (name).res true success, false failed + */ +#define hashtableDel(name, k, hashFunc, cmpFunc, freeNodeFunc) do{\ + const typeof((name).node.hash) UNIQVAR(hash) = hashFunc(k);\ + u32 UNIQVAR(mhash) = UNIQVAR(hash) & (name).szMask;\ + typeof((name).node) *UNIQVAR(node) = (*(name).list)[UNIQVAR(mhash)][0];\ + typeof((name).node) *UNIQVAR(prev) = NULL;\ + if (UNIQVAR(node)) {\ + if (UNIQVAR(hash) >= UNIQVAR(node)->hash) {\ + while (UNIQVAR(node) && UNIQVAR(hash) >= UNIQVAR(node)->hash) {\ + if (UNIQVAR(hash) == UNIQVAR(node)->hash && ((k == UNIQVAR(node)->key) || (cmpFunc(k, UNIQVAR(node)->key) == 0))) {\ + freeNodeFunc(&UNIQVAR(node)->key, &UNIQVAR(node)->value);\ + hashtableInternalDelNode(name, UNIQVAR(node), UNIQVAR(mhash), UNIQVAR(prev), 0);\ + (name).res = true;\ + goto UNIQVAR(mreturn);\ + }\ + UNIQVAR(prev) = UNIQVAR(node);\ + UNIQVAR(node) = UNIQVAR(node)->next;\ + }\ + }\ + else {\ + UNIQVAR(node) = (*(name).list)[UNIQVAR(mhash)][1];\ + while (UNIQVAR(node) && UNIQVAR(hash) <= UNIQVAR(node)->hash) {\ + if (UNIQVAR(hash) == UNIQVAR(node)->hash && ((k == UNIQVAR(node)->key) || (cmpFunc(k, UNIQVAR(node)->key) == 0))) {\ + freeNodeFunc(&UNIQVAR(node)->key, &UNIQVAR(node)->value);\ + hashtableInternalDelNode(name, UNIQVAR(node), UNIQVAR(mhash), UNIQVAR(prev), 1);\ + (name).res = true;\ + goto UNIQVAR(mreturn);\ + }\ + UNIQVAR(prev) = UNIQVAR(node);\ + UNIQVAR(node) = UNIQVAR(node)->next;\ + }\ + }\ + }\ + (name).res = false;\ + UNIQVAR(mreturn):;\ + } while(0) + +// Internal macro +#define hashtableInternalDelNode(name, node, mhash, prev, moreOrLess) do{\ + hashtableInternalUnlink(name, node, mhash, prev, moreOrLess);\ + dArrayAppend(&(name).HNFree, node->index);\ + (name).count--;\ + } while(0) + +/** + * get hash seed address for siphash + */ +#define hashtableSeed(name) ((name).hashseed) + + + +/*************************************** + * node macros + * + * Usage scenarios: + * + * The getNode allow processing node data before reinsert or delete and running the hash function + * only once. + * + * Example: + * getNode + * use node data, change key + * setKeyNode + * + * same as + * get + * use node data, change key + * del original key + * add new key, value + * + * The unlink macro allow using data in a node before deleting it. + * The same operation can be done with Get and Del, using unlink runs the hash function only once. + * + * Example: + * unlinkNode + * use node data + * freeUnlinkedNode + * + * same as + * get + * use value + * del + * + ***************************************/ + +/** + * add or find node + * + * nodePointer must be a pointer to hash node type + * hashNode *result; + * + * nodeContext must be a struct of type + * context_hashNode ctx; + * + * \return + * (name).res true success, false failed + */ +#define hashtableAddOrFindNode(name, k, nodePointer, nodeContext, isNew, hashFunc, cmpFunc) do{\ + if ((name).loadfactor_denominator * (name).count >= (name).loadfactor_numerator * (name).size) {\ + /* load factor too high */\ + hashtableResize(name, (name).count*2);\ + /*if (!(name).res) break; else (name).res = false;*/\ + }\ + const typeof((name).node.hash) UNIQVAR(hash) = hashFunc(k);\ + const u32 UNIQVAR(mhash) = UNIQVAR(hash) & (name).szMask;\ + (nodeContext).node->hash = UNIQVAR(hash);\ + typeof((name).node) *UNIQVAR(node) = (*(name).list)[UNIQVAR(mhash)][0];\ + typeof((name).node) *UNIQVAR(prev) = NULL;\ + u8 UNIQVAR(moreOrLess) = 0;\ + if (UNIQVAR(node)) {\ + if (UNIQVAR(hash) >= UNIQVAR(node)->hash) {\ + UNIQVAR(moreOrLess) = 0;\ + while (UNIQVAR(node) && UNIQVAR(hash) >= UNIQVAR(node)->hash) {\ + if (UNIQVAR(hash) == UNIQVAR(node)->hash && ((k == UNIQVAR(node)->key) || (cmpFunc(k, UNIQVAR(node)->key) == 0))) {\ + nodePointer = UNIQVAR(node);\ + (nodeContext).node = nodePointer;\ + (nodeContext).prev = UNIQVAR(prev);\ + (nodeContext).mhash = UNIQVAR(mhash);\ + (nodeContext).moreOrLess = UNIQVAR(moreOrLess);\ + (nodeContext).size = (name).size;\ + isNew = false;\ + (name).newNodeInDArray = false;\ + (name).res = true; goto UNIQVAR(mreturn);\ + }\ + UNIQVAR(prev) = UNIQVAR(node);\ + UNIQVAR(node) = UNIQVAR(node)->next;\ + }\ + }\ + else {\ + UNIQVAR(moreOrLess) = 1;\ + UNIQVAR(node) = (*(name).list)[UNIQVAR(mhash)][1];\ + while (UNIQVAR(node) && UNIQVAR(hash) <= UNIQVAR(node)->hash) {\ + if (UNIQVAR(hash) == UNIQVAR(node)->hash && ((k == UNIQVAR(node)->key) || (cmpFunc(k, UNIQVAR(node)->key) == 0))) {\ + nodePointer = UNIQVAR(node);\ + (nodeContext).node = nodePointer;\ + (nodeContext).prev = UNIQVAR(prev);\ + (nodeContext).mhash = UNIQVAR(mhash);\ + (nodeContext).moreOrLess = UNIQVAR(moreOrLess);\ + (nodeContext).size = (name).size;\ + isNew = false;\ + (name).newNodeInDArray = false;\ + (name).res = true; goto UNIQVAR(mreturn);\ + }\ + UNIQVAR(prev) = UNIQVAR(node);\ + UNIQVAR(node) = UNIQVAR(node)->next;\ + }\ + }\ + }\ + isNew = true;\ + if (dArrayIsEmpty(&(name).HNFree)) {\ + dArrayPush(&(name).aHashnode);\ + nodePointer = dArrayLastPtr(&(name).aHashnode);\ + nodePointer->index = dArrayLastIndex(&(name).aHashnode);\ + (name).newNodeInDArray = true;\ + }\ + else {\ + u32 UNIQVAR(idx) = dArrayLast(&(name).HNFree);\ + dArrayDelLast(&(name).HNFree);\ + nodePointer = dArrayPtr(&(name).aHashnode, UNIQVAR(idx));\ + (name).newNodeInDArray = false;\ + }\ + nodePointer->key = k;\ + if (UNIQVAR(prev)) UNIQVAR(prev)->next = nodePointer;\ + else (*(name).list)[UNIQVAR(mhash)][UNIQVAR(moreOrLess)] = nodePointer;\ + nodePointer->next = UNIQVAR(node);\ + (name).count++;\ + /* set context */\ + (nodeContext).node = nodePointer;\ + (nodeContext).prev = UNIQVAR(prev);\ + (nodeContext).mhash = UNIQVAR(mhash);\ + (nodeContext).moreOrLess = UNIQVAR(moreOrLess);\ + (nodeContext).size = (name).size;\ + (name).res = true;\ + UNIQVAR(mreturn):;\ + } while(0) + +/** + * set node to be used when the key is updated + * unlink the node then + * insert it in another bucket + * + * nodeContext must be a struct of type + * context_hashNode ctx; + * + * The node context is updated and remains valid after this macro (no need to run updateNodeContext) + * + * When key,value pairs are added after getNode and the table is rehashed (the size changed), the node context should be updated with getNode before calling this macro + * + * The node is unlinked, so in case of failure, the node should be added with AddOrFindNode or the node should be freed with freeUnlinkedNode + * + * \return + * (name).res true success, false failed + */ +#define hashtableSetKeyNode(name, nodeContext, hashFunc, cmpFunc) do{\ + /* no need to resize because the node is unlinked and the key is rehashed */\ + const typeof((name).node.hash) UNIQVAR(hash) = hashFunc((nodeContext).node->key);\ + if (UNIQVAR(hash) == (nodeContext).node->hash) /* the hash is identical to the previous one, we are done */{\ + (name).res = true; goto UNIQVAR(mreturn);\ + }\ + hashtableInternalUnlinkNode(name, nodeContext);\ + if (!(name).res) /* the table has been rehashed, the context is invalid */ goto UNIQVAR(mreturn);\ + const u32 UNIQVAR(mhash) = UNIQVAR(hash) & (name).szMask;\ + (nodeContext).node->hash = UNIQVAR(hash);\ + typeof((name).node) *UNIQVAR(node) = (*(name).list)[UNIQVAR(mhash)][0];\ + typeof((name).node) *UNIQVAR(prev) = NULL;\ + u8 UNIQVAR(moreOrLess) = 0;\ + if (UNIQVAR(node)) {\ + if (UNIQVAR(hash) >= UNIQVAR(node)->hash) {\ + UNIQVAR(moreOrLess) = 0;\ + while (UNIQVAR(node) && UNIQVAR(hash) >= UNIQVAR(node)->hash) {\ + if (UNIQVAR(hash) == UNIQVAR(node)->hash && (((nodeContext).node->key == UNIQVAR(node)->key) || (cmpFunc((nodeContext).node->key, UNIQVAR(node)->key) == 0))) {\ + (name).res = false; goto UNIQVAR(mreturn);\ + }\ + UNIQVAR(prev) = UNIQVAR(node);\ + UNIQVAR(node) = UNIQVAR(node)->next;\ + }\ + }\ + else {\ + UNIQVAR(moreOrLess) = 1;\ + UNIQVAR(node) = (*(name).list)[UNIQVAR(mhash)][1];\ + while (UNIQVAR(node) && UNIQVAR(hash) <= UNIQVAR(node)->hash) {\ + if (UNIQVAR(hash) == UNIQVAR(node)->hash && (((nodeContext).node->key == UNIQVAR(node)->key) || (cmpFunc((nodeContext).node->key, UNIQVAR(node)->key) == 0))) {\ + (name).res = false; goto UNIQVAR(mreturn);\ + }\ + UNIQVAR(prev) = UNIQVAR(node);\ + UNIQVAR(node) = UNIQVAR(node)->next;\ + }\ + }\ + }\ + if (UNIQVAR(prev)) UNIQVAR(prev)->next = (nodeContext).node;\ + else (*(name).list)[UNIQVAR(mhash)][UNIQVAR(moreOrLess)] = (nodeContext).node;\ + (nodeContext).node->next = UNIQVAR(node);\ + /* update context */\ + (nodeContext).prev = UNIQVAR(prev);\ + (nodeContext).mhash = UNIQVAR(mhash);\ + (nodeContext).moreOrLess = UNIQVAR(moreOrLess);\ + (nodeContext).size = (name).size;\ + (name).res = true;\ + UNIQVAR(mreturn):;\ + } while(0) + +// Internal macro, like Internal DelNode without free the node +// the node is removed from the bucket +#define hashtableInternalUnlinkNode(name, nodeContext) do{\ + if ((nodeContext).size != (name).size) {\ + /* the table has been rehashed since the nodeContext was filled in */\ + /* update the context, run updateNodeContext with the original key */\ + /* the original is not available in the context, since the key is probably updated */\ + (name).res = false;\ + break;\ + }\ + if ((nodeContext).prev) (nodeContext).prev->next = (nodeContext).node->next;\ + else (*(name).list)[(nodeContext).mhash][(nodeContext).moreOrLess] = (nodeContext).node->next;\ + if (((nodeContext).moreOrLess == 0) && !(*(name).list)[(nodeContext).mhash][0] && (*(name).list)[(nodeContext).mhash][1]) {\ + /* the first linked list is empty, take the first element from the second linked list */\ + (*(name).list)[(nodeContext).mhash][0] = (*(name).list)[(nodeContext).mhash][1];\ + (*(name).list)[(nodeContext).mhash][1] = (*(name).list)[(nodeContext).mhash][1]->next;\ + (*(name).list)[(nodeContext).mhash][0]->next = NULL;\ + }\ + (name).res = true;\ + } while(0) + + +/** + * get node + * + * result must be a pointer to hash node type + * hashNode *result; + * + * nodeContext must be a struct of type + * context_hashNode ctx; + * + * \return + * (name).res true success, false failed + */ +#define hashtableGetNode(name, k, result, nodeContext, hashFunc, cmpFunc) do{\ + const typeof((name).node.hash) UNIQVAR(hash) = hashFunc(k);\ + const u32 UNIQVAR(mhash) = UNIQVAR(hash) & (name).szMask;\ + typeof((name).node) *UNIQVAR(node) = (*(name).list)[UNIQVAR(hash) & (name).szMask][0];\ + typeof((name).node) *UNIQVAR(prev) = NULL;\ + if (UNIQVAR(node)) {\ + if (UNIQVAR(hash) >= UNIQVAR(node)->hash) {\ + while (UNIQVAR(node) && UNIQVAR(hash) >= UNIQVAR(node)->hash) {\ + if (UNIQVAR(hash) == UNIQVAR(node)->hash && ((k == UNIQVAR(node)->key) || (cmpFunc(k, UNIQVAR(node)->key) == 0))) {\ + result = UNIQVAR(node);\ + (nodeContext).node = UNIQVAR(node);\ + (nodeContext).prev = UNIQVAR(prev);\ + (nodeContext).mhash = UNIQVAR(mhash);\ + (nodeContext).moreOrLess = 0;\ + (nodeContext).size = (name).size;\ + (name).res = true;\ + goto UNIQVAR(mreturn);\ + }\ + UNIQVAR(prev) = UNIQVAR(node);\ + UNIQVAR(node) = UNIQVAR(node)->next;\ + }\ + }\ + else {\ + UNIQVAR(node) = (*(name).list)[UNIQVAR(mhash)][1];\ + while (UNIQVAR(node) && UNIQVAR(hash) <= UNIQVAR(node)->hash) {\ + if (UNIQVAR(hash) == UNIQVAR(node)->hash && ((k == UNIQVAR(node)->key) || (cmpFunc(k, UNIQVAR(node)->key) == 0))) {\ + result = UNIQVAR(node);\ + (nodeContext).node = UNIQVAR(node);\ + (nodeContext).prev = UNIQVAR(prev);\ + (nodeContext).mhash = UNIQVAR(mhash);\ + (nodeContext).moreOrLess = 1;\ + (nodeContext).size = (name).size;\ + (name).res = true;\ + goto UNIQVAR(mreturn);\ + }\ + UNIQVAR(prev) = UNIQVAR(node);\ + UNIQVAR(node) = UNIQVAR(node)->next;\ + }\ + }\ + }\ + (name).res = false;\ + UNIQVAR(mreturn):;\ + } while(0) + +/** + * update node context + * + * the node context must have been initialized with getNode or setKeyNode + * + * call this macro when the table has been rehashed after getNode or setKeyNode + * + * nodeContext must be a struct of type + * context_hashNode ctx; + * + * \return + * (name).res true success, false failed + */ +#define hashtableUpdateNodeContext(name, nodeContext, hashFunc, cmpFunc) do{\ + const typeof((name).node.hash) UNIQVAR(hash) = (nodeContext).node->hash;\ + const u32 UNIQVAR(mhash) = UNIQVAR(hash) & (name).szMask;\ + typeof((name).node) *UNIQVAR(node) = (*(name).list)[UNIQVAR(hash) & (name).szMask][0];\ + typeof((name).node) *UNIQVAR(prev) = NULL;\ + if (UNIQVAR(node)) {\ + if (UNIQVAR(hash) >= UNIQVAR(node)->hash) {\ + while (UNIQVAR(node) && UNIQVAR(hash) >= UNIQVAR(node)->hash) {\ + if (UNIQVAR(hash) == UNIQVAR(node)->hash && (((nodeContext).node->key == UNIQVAR(node)->key) || (cmpFunc((nodeContext).node->key, UNIQVAR(node)->key) == 0))) {\ + (nodeContext).prev = UNIQVAR(prev);\ + (nodeContext).mhash = UNIQVAR(mhash);\ + (nodeContext).moreOrLess = 0;\ + (nodeContext).size = (name).size;\ + (name).res = true;\ + goto UNIQVAR(mreturn);\ + }\ + UNIQVAR(prev) = UNIQVAR(node);\ + UNIQVAR(node) = UNIQVAR(node)->next;\ + }\ + }\ + else {\ + UNIQVAR(node) = (*(name).list)[UNIQVAR(mhash)][1];\ + while (UNIQVAR(node) && UNIQVAR(hash) <= UNIQVAR(node)->hash) {\ + if (UNIQVAR(hash) == UNIQVAR(node)->hash && (((nodeContext).node->key == UNIQVAR(node)->key) || (cmpFunc((nodeContext).node->key, UNIQVAR(node)->key) == 0))) {\ + (nodeContext).prev = UNIQVAR(prev);\ + (nodeContext).mhash = UNIQVAR(mhash);\ + (nodeContext).moreOrLess = 1;\ + (nodeContext).size = (name).size;\ + (name).res = true;\ + goto UNIQVAR(mreturn);\ + }\ + UNIQVAR(prev) = UNIQVAR(node);\ + UNIQVAR(node) = UNIQVAR(node)->next;\ + }\ + }\ + }\ + (name).res = false;\ + UNIQVAR(mreturn):;\ + } while(0) + +/** + * unlink node + * + * result must be a pointer to hash node type + * hashNode *result; + * + * result must be freed with freeUnlinkNode + * + * This macro allow using data in a node before deleting it. + * The same operation can be done with Get and Del, using unlink runs the hash function only once. + * + * Example: + * unlinkNode + * use node data + * freeUnlinkedNode + * + * same as + * get + * use value + * del + * + * \return + * result unlinked node + * (name).res true success, false failed + */ +#define hashtableUnlinkNode(name, k, result, hashFunc, cmpFunc) do{\ + const typeof((name).node.hash) UNIQVAR(hash) = hashFunc(k);\ + const u32 UNIQVAR(mhash) = UNIQVAR(hash) & (name).szMask;\ + typeof((name).node) *UNIQVAR(node) = (*(name).list)[UNIQVAR(hash) & (name).szMask][0];\ + typeof((name).node) *UNIQVAR(prev) = NULL;\ + if (UNIQVAR(node)) {\ + if (UNIQVAR(hash) >= UNIQVAR(node)->hash) {\ + while (UNIQVAR(node) && UNIQVAR(hash) >= UNIQVAR(node)->hash) {\ + if (UNIQVAR(hash) == UNIQVAR(node)->hash && ((k == UNIQVAR(node)->key) || (cmpFunc(k, UNIQVAR(node)->key) == 0))) {\ + hashtableInternalUnlink(name, UNIQVAR(node), UNIQVAR(mhash), UNIQVAR(prev), 0);\ + result = UNIQVAR(node);\ + (name).res = true;\ + goto UNIQVAR(mreturn);\ + }\ + UNIQVAR(prev) = UNIQVAR(node);\ + UNIQVAR(node) = UNIQVAR(node)->next;\ + }\ + }\ + else {\ + UNIQVAR(node) = (*(name).list)[UNIQVAR(mhash)][1];\ + while (UNIQVAR(node) && UNIQVAR(hash) <= UNIQVAR(node)->hash) {\ + if (UNIQVAR(hash) == UNIQVAR(node)->hash && ((k == UNIQVAR(node)->key) || (cmpFunc(k, UNIQVAR(node)->key) == 0))) {\ + hashtableInternalUnlink(name, UNIQVAR(node), UNIQVAR(mhash), UNIQVAR(prev), 1);\ + result = UNIQVAR(node);\ + (name).res = true;\ + goto UNIQVAR(mreturn);\ + }\ + UNIQVAR(prev) = UNIQVAR(node);\ + UNIQVAR(node) = UNIQVAR(node)->next;\ + }\ + }\ + }\ + (name).res = false;\ + UNIQVAR(mreturn):;\ + } while(0) + +// Internal macro +#define hashtableInternalUnlink(name, node, mhash, prev, moreOrLess) do{\ + if (prev) prev->next = node->next;\ + else (*(name).list)[mhash][moreOrLess] = node->next;\ + if ((moreOrLess == 0) && !(*(name).list)[mhash][0] && (*(name).list)[mhash][1]) {\ + /* the first linked list is empty, take the first element from the second linked list */\ + (*(name).list)[mhash][0] = (*(name).list)[mhash][1];\ + (*(name).list)[mhash][1] = (*(name).list)[mhash][1]->next;\ + (*(name).list)[mhash][0]->next = NULL;\ + }\ + } while(0) + +/** + * free an unlinked node + * + * node must be a pointer to hash node type + * hashNode *node; + * + * the node has to be first unlinked with the unlinkNode macro + */ +#define hashtableFreeUnlinkedNode(name, node, freeNodeFunc) do{\ + freeNodeFunc(&(node)->key, &(node)->value);\ + dArrayAppend(&(name).HNFree, (node)->index);\ + (name).count--;\ + } while(0) + +/** + * delete/remove node + * + * the node context must be already initialized with the other *Node macros + * + * if the node context is changed due to rehash, updateNodeContext has to be called before this macro + */ +// no status +#define hashtableDelNode(name, nodeContext, freeNodeFunc) do{\ + freeNodeFunc(&(nodeContext).node->key, &(nodeContext).node->value);\ + hashtableInternalDelNode(name, (nodeContext).node, (nodeContext).mhash, (nodeContext).prev, (nodeContext).moreOrLess);\ + } while(0) + diff --git a/hashtableMain.c b/hashtableMain.c @@ -0,0 +1,223 @@ +#! /usr/bin/env sheepy +/* or direct path to sheepy: #! /usr/local/bin/sheepy */ + +#include "libsheepyObject.h" + +// hash table with 2 single linked list per bucket +#include "hashtable.h" + +// hash functions for the hash table +#include "shpPackages/hashfunctions/hashfunctions.h" + +int argc; char **argv; + +hashTbDef(, m3232, M32, u32, u32, u32); + +u32 h(u32 k) { + return k; +} + +int cmp(u32 k1, u32 k2) { + return k1 == k2 ? 0 : 1; +} + +void freeM(u32 *k, u32 *v) { +} + +#define HASHFUNC h +#define CMPFUNC cmp +#define FREEFUNC freeM + +hashTbFunctions(, m3232, M32, u32, u32); + +#undef HASHFUNC +#undef CMPFUNC +#undef FREEFUNC + +// hash table type definition +hashTbT(hashchart /* new hash table type */, char* /* key type*/, u32 /* value type */, u32 /* hash type*/); + + +// hash table type definition with 64 bit hashes +hashNodeT(hashSipNodet, char*, u32, u64); +hashtableT(hashsipt, hashSipNodet); + +void freeNode(char **key, u32* UNUSED value) { + //logI("free key %x\n", key); + free(*key); +} + +// siphash setup for hashsipt table +const u8 *dict_hash_function_seed; + +u64 sipHash(char *k) { + return siphash(k,strlen(k),dict_hash_function_seed); +} + +// hash function for hashTb* macros +#define HASHFUNC strHashFNV1A + +// comparison function for hashTb* macros +#define CMPFUNC strcmp + +// free key and value function for hashTb* macros +#define FREEFUNC freeNode + +// load factor: 2 +#define LOADFACTOR_NUMERATOR 2 +#define LOADFACTOR_DENOMINATOR 1 + +#define TEST_COUNT 257 + +int main(int ARGC, char** ARGV) { + + argc = ARGC; argv = ARGV; + + initLibsheepy(argv[0]); + setLogMode(LOG_FUNC); + + // STEPS + // + // HASH TABLE USING FNV HASH FUNCTION + // + // hash table declaration + // hash table setup + // set/insert some key, value pairs + // get/search values + // remove key, value + // free hash table + // + // HASH TABLE USING SIPHASH + // + // hash table declaration + // hash table setup + // set/insert some key, value pairs + // get/search values + // remove key, value + // free hash table + + // hash table declaration + hashchart h; + + // hash table setup + hashtableInit(h, 128, LOADFACTOR_NUMERATOR, LOADFACTOR_DENOMINATOR); + + char *s; + + // set/insert some key, value pairs + range(i, TEST_COUNT) { + s = intToS(i); + /* hashtableAdd(h, s, i, HASHFUNC, CMPFUNC); */ + hashTbAdd(h, s, i); + if (!h.res) logE("ERROR"); + } + + // get/search values + u32 result = 0; + char S[30]; + range(i, TEST_COUNT) { + bIntToS(S, i); + /* hashtableGet(h, S, result, HASHFUNC, CMPFUNC); */ + hashTbGet(h, S, result); + if (!h.res) {logE("ERROR %s", S);} + } + + // remove key, value + /* hashtableDel(h, "15", HASHFUNC, CMPFUNC, FREEFUNC); */ + hashTbDel(h, "15"); + logI("del 15"); + logVarG(h.res); + + // free hash table + /* hashtableFree(h, FREEFUNC); */ + hashTbFree(h); + + + // hash table using siphash + + #undef HASHFUNC + #define HASHFUNC sipHash + + // hash table declaration + hashsipt H; + hashSipNodet *nd = NULL; + context_hashSipNodet ctx; + // hash table setup + hashtableInit(H, 128, LOADFACTOR_NUMERATOR, LOADFACTOR_DENOMINATOR); + + dict_hash_function_seed = hashtableSeed(H); + + s = strdup("sheepy"); + hashTbAdd(H, s, 234); + hashTbGetNode(H, "sheepy", nd, ctx); + + // set/insert some key, value pairs + range(i, TEST_COUNT) { + s = intToS(i); + hashTbAdd(H, s, i); + if (!H.res) logE("ERROR"); + } + + /* hashtableTest(H); */ + /* put put */ + hashTbUpdateNodeContext(H, ctx); + logVarG(h.res); + hashTbDelNode(H, ctx); + //hashTbDel(H, "sheepy"); + + /* hashtableTest(H); */ + + // get/search values + range(i, TEST_COUNT) { + bIntToS(S, i); + hashTbGet(H, S, result); + if (!H.res) {logE("ERROR %s", S);} + } + + u32 *val = NULL; + bool isNew; + hashTbAddOrFind(H, "1", val, isNew); + logVarG(*val); + logVarG(isNew); + s = strdup("qwe"); + hashTbAddOrFind(H, s, val, isNew); + logVarG(*val); + logVarG(isNew); + + // get node + //hashtableGetNode(H, "3", nd, ctx, HASHFUNC, CMPFUNC); + hashTbGetNode(H, "3", nd, ctx); + logVarG(ctx.node->key); + logVarG(ctx.node->value); + logI("%p", nd); + free(nd->key); + nd->key = strdup("sheep"); + //hashtableSetKeyNode(H, ctx, HASHFUNC, CMPFUNC); + hashTbSetKeyNode(H, ctx); + //hashTbGetNode(H, "sheep", nd, ctx); + //hashtableDelNode(H, ctx, FREEFUNC); + hashTbDelNode(H, ctx); + + hashTbUnlinkNode(H, "1", nd); + logVarG(nd->key); + hashTbFreeUnlinkedNode(H, nd); + + hashTbAddOrFindNode(H, "asd", nd, ctx, isNew); + nd->key = strdup("asd"); + nd->value = 12; + logVarG(isNew); + + // remove key, value + hashTbDel(H, "10"); + logI("del 10"); + logVarG(H.res); + + // free hash table + hashTbFree(H); + + m3232t h32; + initM32(&h32); + addM32(&h32, 1, 1); + m3232Nodet *node; + context_m3232Nodet ctx32; +} diff --git a/package.yml b/package.yml @@ -0,0 +1,19 @@ +--- + name: hashtable + version: 0.0.11 + description: "hash table macros for creating hash table functions, it supports any type key, value, hash" + bin: ./hashtable.c + # with gcc 6.3 (skylake cpu, debian stretch), the performance with O1 is better than with O3 + cflags: -O1 -std=gnu11 -fPIC -pipe + #lflags: -lpcre + repository: + type: git + url: git+https://github.com/RemyNoulin/hashtable.git + keywords: + - data structure + author: Remy + license: MIT + bugs: + url: https://github.com/RemyNoulin/hashtable/issues + homepage: https://github.com/RemyNoulin/hashtable + compileHelp: Run 'spm install hashfunctions' to be able to compile hashtableMain.c