set

set with hash function
git clone https://noulin.net/git/set.git
Log | Files | Refs | LICENSE

commit 7283851a0752bbf613ece025c497a2e5c8f4f8b8
parent b76849d9a70867b310f57c9c13da8c96327f4341
Author: Remy Noulin <loader2x@gmail.com>
Date:   Mon, 19 Nov 2018 19:22:51 -0500

add sstaticSet (simple static set) and astaticSet (another static set)

main.c      | 114 ++++++++++++++++++
package.yml |  17 +++
set.h       | 374 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 505 insertions(+)

Diffstat:
Amain.c | 114+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apackage.yml | 17+++++++++++++++++
Aset.h | 374+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 505 insertions(+), 0 deletions(-)

diff --git a/main.c b/main.c @@ -0,0 +1,114 @@ +#! /usr/bin/env sheepy +/* or direct path to sheepy: #! /usr/local/bin/sheepy */ + +/* Libsheepy documentation: http://spartatek.se/libsheepy/ */ +#include "libsheepyObject.h" +#include "set.h" + +// hash functions for the hash table +#include "shpPackages/hashfunctions/hashfunctions.h" + +int argc; char **argv; + +/* enable/disable logging */ +/* #undef pLog */ +/* #define pLog(...) */ + +int main(int ARGC, char** ARGV) { + + argc = ARGC; argv = ARGV; + + initLibsheepy(ARGV[0]); + setLogMode(LOG_VERBOSE); + + sstaticSetT(sst, u8, 5); + + sst st; + + sstaticSetClear(&st); + + #undef HASHFUNC + #undef ISEMPTY + #undef CMPFUNC + #define HASHFUNC(k) k + #define ISEMPTY(e) (e == 0) + #define CMPFUNC(e1, e2) (e1 == e2) + + var idx = sstaticSetGetNAdd(&st, 1); + logVarG(idx); + idx = sstaticSetGetNAdd(&st, 3); + logVarG(idx); + logI("get: %zd", sstaticSetGet(&st, 3)); + logI("get: %zd", sstaticSetGet(&st, 2)); + logI("has: %b", sstaticSetHas(&st, 3)); + logI("has: %b", sstaticSetHas(&st, 2)); + idx = sstaticSetGetNAdd(&st, 12); + logVarG(idx); + idx = sstaticSetGetNAdd(&st, 2); + logVarG(idx); + idx = sstaticSetAdd(&st, 22); + logVarG(idx); + idx = sstaticSetAdd(&st, 2); + logVarG(idx); + idx = sstaticSetGetNAdd(&st, 4); + logVarG(idx); + logI("get: %zd", sstaticSetGet(&st, 3)); + logI("get: %zd", sstaticSetGet(&st, 2)); + logI("get: %zd", sstaticSetGet(&st, 32)); + logI("has: %b", sstaticSetHas(&st, 3)); + logI("has: %b", sstaticSetHas(&st, 2)); + logI("has: %b", sstaticSetHas(&st, 32)); + + + #undef HASHFUNC + #undef ISEMPTY + #undef CMPFUNC + // hash function for cs + #define HASHFUNC strHashFNV1A + // comparison function for cs + //#define CMPFUNC !strcmp + #define CMPFUNC eqS + + astaticSetT(cst, char*, 5); + cst cs; + + astaticSetClear(&cs); + + range(i, 5) { + logVarG(cs.set[i]); + } + + var ix = astaticSetGetNAdd(&cs, "1"); + logVarG(ix); + ix = astaticSetGetNAdd(&cs, "3"); + logVarG(ix); + logI("get: 3 - %zd", astaticSetGet(&cs, "3")); + logI("get: 2 - %zd", astaticSetGet(&cs, "2")); + logI("has: %b", astaticSetHas(&cs, "3")); + logI("has: %b", astaticSetHas(&cs, "2")); + ix = astaticSetGetNAdd(&cs, "12"); + logI("12: %zd", ix); + ix = astaticSetGetNAdd(&cs, "2"); + logI("2: %zd", ix); + ix = astaticSetAdd(&cs, "22"); + logI("add 22: %zd (1=ok)", ix); + ix = astaticSetAdd(&cs, "2"); + logVarG(ix); + ix = astaticSetGetNAdd(&cs, "4"); + logVarG(ix); + logI("get: 3 %zd", astaticSetGet(&cs, "3")); + logI("get: 2 %zd", astaticSetGet(&cs, "2")); + logI("get: 22 %zd", astaticSetGet(&cs, "22")); + logI("get: 32 %zd", astaticSetGet(&cs, "32")); + logI("has: 3 %b", astaticSetHas(&cs, "3")); + logI("has: 2 %b", astaticSetHas(&cs, "2")); + logI("has: 32 %b", astaticSetHas(&cs, "32")); + + range(i, 5) { + logVarG(astaticSetAt(&cs, i)); + } + + logVarG(sizeof st); + logVarG(sizeof cs); +} +// vim: set expandtab ts=2 sw=2: diff --git a/package.yml b/package.yml @@ -0,0 +1,17 @@ +--- + name: set + version: 0.0.1 + description: "set data structures using hash functions" + bin: ./set.h + repository: + type: git + url: git+https://github.com/RemyNoulin/set.git + keywords: + #- utility + - command + author: Remy + license: MIT + bugs: + url: https://github.com/RemyNoulin/set/issues + homepage: https://github.com/RemyNoulin/set#readme + private: false # true for private package diff --git a/set.h b/set.h @@ -0,0 +1,374 @@ + +#include "libsheepyObject.h" + +// TODO +// add and remove +// buckets with multiple keys + +/** + * simple static set + * + * keys are assigned a unique index + * this data structure maps keys to indexes in an array of size count + * + * when there is a collision, a linear search is done until an empty bucket is found + * + * the simple static set can be used together with a bitset to filter keys efficiently + * + * HASHFUNC, ISEMPTY and CMPFUNC are functions defined before using sstaticSet* macros + * + * hashType HASHFUNC(key); + * returns the hash value for key + * + * bool ISEMPTY(keyInSet); + * returns true when the bucket don't have a key + * + * bool CMPFUNC(key1, key2); + * returns true when key1 is equal key2 + */ + +/** type definition */ +#define sstaticSetT(typeName, keyType, count)\ + typedef struct { keyType set[count];} typeName; + +/** clear set */ +#define sstaticSetClear(name) memset((name)->set, 0, sizeof((name)->set)); + +/** get bucket at index */ +#define sstaticSetAt(name, index) ((name)->set[index]) + +/** + * add key to set + * + * \return + * 1 key is added + * 0 key was already in set + * -1 set is full, can't add the key + */ +#define sstaticSetAdd(name, key) ({\ + var UNIQVAR(k) = key;\ + var UNIQVAR(hash) = HASHFUNC(UNIQVAR(k));\ + size_t UNIQVAR(mhash) = UNIQVAR(hash) % ARRAY_SIZE((name)->set);\ + i8 UNIQVAR(r) = -1;\ + if (ISEMPTY((name)->set[UNIQVAR(mhash)])) {\ + /* save key in this bucket */\ + /*logI("empty");*/\ + (name)->set[UNIQVAR(mhash)] = UNIQVAR(k);\ + UNIQVAR(r) = 1;\ + }\ + else {\ + /* key already in set or collision */\ + UNIQVAR(r) = 0;\ + if (not CMPFUNC((name)->set[UNIQVAR(mhash)], UNIQVAR(k))) {\ + /* collision */\ + /*logI("collision");*/\ + circular(UNIQVAR(i), UNIQVAR(mhash), ARRAY_SIZE((name)->set)) {\ + /*logI("circ %zu %d", UNIQVAR(i), (name)->set[UNIQVAR(i)]);*/\ + if (CMPFUNC((name)->set[UNIQVAR(i)], UNIQVAR(k))) {\ + goto UNIQVAR(end);\ + }\ + if (ISEMPTY((name)->set[UNIQVAR(i)])) {\ + /* use empty bucket for key */\ + (name)->set[UNIQVAR(i)] = UNIQVAR(k);\ + UNIQVAR(r) = 1;\ + /*logI("found a bucket");*/\ + goto UNIQVAR(end);\ + }\ + } /* circular */\ + /* set is full, the key cannot be added */\ + UNIQVAR(r) = -1;\ + } /* if (not CPMFUNC((name)->set[UNIQVAR(mhash)], UNIQVAR(k))) */\ + }\ + UNIQVAR(end):\ + UNIQVAR(r);\ + }) + +#define sstaticSetHas(name, key) ({\ + var UNIQVAR(k) = key;\ + var UNIQVAR(hash) = HASHFUNC(UNIQVAR(k));\ + size_t UNIQVAR(mhash) = UNIQVAR(hash) % ARRAY_SIZE((name)->set);\ + bool UNIQVAR(r) = true;\ + if (not CMPFUNC((name)->set[UNIQVAR(mhash)], UNIQVAR(k))) {\ + /* collision */\ + /*logI("collision");*/\ + circular(UNIQVAR(i), UNIQVAR(mhash), ARRAY_SIZE((name)->set)) {\ + /*logI("circ %zu %d", UNIQVAR(i), (name)->set[UNIQVAR(i)]);*/\ + if (CMPFUNC((name)->set[UNIQVAR(i)], UNIQVAR(k))) {\ + /* found key in set */\ + /*logI("already key");*/\ + goto UNIQVAR(end);\ + }\ + if (ISEMPTY((name)->set[UNIQVAR(i)])) {\ + /* key not found */\ + /*logI("found a bucket");*/\ + break;\ + }\ + } /* circular */\ + UNIQVAR(r) = false;\ + } /* if (not CPMFUNC((name)->set[UNIQVAR(mhash)], UNIQVAR(k))) */\ + UNIQVAR(end):\ + UNIQVAR(r);\ + }) + +/** + * get index for key + * + * \return + * index for key + * -1 not found + */ +#define sstaticSetGet(name, key) ({\ + var UNIQVAR(k) = key;\ + var UNIQVAR(hash) = HASHFUNC(UNIQVAR(k));\ + ssize_t UNIQVAR(r) = UNIQVAR(hash) % ARRAY_SIZE((name)->set);\ + if (not CMPFUNC((name)->set[UNIQVAR(r)], UNIQVAR(k))) {\ + /* collision */\ + /*logI("collision");*/\ + circular(UNIQVAR(i), UNIQVAR(r), ARRAY_SIZE((name)->set)) {\ + /*logI("circ %zu %d", UNIQVAR(i), (name)->set[UNIQVAR(i)]);*/\ + if (CMPFUNC((name)->set[UNIQVAR(i)], UNIQVAR(k))) {\ + /* found key in set */\ + /*logI("already key");*/\ + UNIQVAR(r) = UNIQVAR(i);\ + goto UNIQVAR(end);\ + }\ + if (ISEMPTY((name)->set[UNIQVAR(i)])) {\ + /* key not found */\ + /*logI("found a bucket");*/\ + break;\ + }\ + } /* circular */\ + UNIQVAR(r) = -1;\ + } /* if (not CPMFUNC((name)->set[UNIQVAR(r)], UNIQVAR(k))) */\ + UNIQVAR(end):\ + UNIQVAR(r);\ + }) + +/** + * get index and add key + * + * \return + * index for key + * -1 set is full, can't add the key + */ +#define sstaticSetGetNAdd(name, key) ({\ + var UNIQVAR(k) = key;\ + var UNIQVAR(hash) = HASHFUNC(UNIQVAR(k));\ + ssize_t UNIQVAR(r) = UNIQVAR(hash) % ARRAY_SIZE((name)->set);\ + if (ISEMPTY((name)->set[UNIQVAR(r)])) {\ + /* save key in this bucket */\ + /*logI("empty");*/\ + (name)->set[UNIQVAR(r)] = UNIQVAR(k);\ + }\ + else {\ + /* key already in set or collision */\ + if (not CMPFUNC((name)->set[UNIQVAR(r)], UNIQVAR(k))) {\ + /* collision */\ + /*logI("collision");*/\ + circular(UNIQVAR(i), UNIQVAR(r), ARRAY_SIZE((name)->set)) {\ + /*logI("circ %zu %d", UNIQVAR(i), (name)->set[UNIQVAR(i)]);*/\ + if (CMPFUNC((name)->set[UNIQVAR(i)], UNIQVAR(k))) {\ + /* found key in set */\ + UNIQVAR(r) = UNIQVAR(i);\ + /*logI("already key");*/\ + goto UNIQVAR(end);\ + }\ + if (ISEMPTY((name)->set[UNIQVAR(i)])) {\ + /* use empty bucket for key */\ + (name)->set[UNIQVAR(i)] = UNIQVAR(k);\ + UNIQVAR(r) = UNIQVAR(i);\ + /*logI("found a bucket");*/\ + goto UNIQVAR(end);\ + }\ + } /* circular */\ + UNIQVAR(r) = -1;\ + } /* if (not CPMFUNC((name)->set[UNIQVAR(r)], UNIQVAR(k))) */\ + }\ + UNIQVAR(end):\ + UNIQVAR(r);\ + }) + + + +/** + * another static set + * + * this set doesn't need ISFUNC, it keeps track of empty buckets + */ + +/** type definition */ +#define astaticSetT(typeName, keyType, count)\ + staticBitsetT(UNIQVAR(bits), u64, count);\ + typedef struct { \ + keyType set[count];\ + UNIQVAR(bits) hasKey;\ + } typeName; + +/** clear set */ +#define astaticSetClear(name) do{\ + memset((name)->set, 0, sizeof((name)->set));\ + staticBitsetClear(&(name)->hasKey);\ + } while(0) + +/** get bucket at index */ +#define astaticSetAt(name, index) ((name)->set[index]) + +/** is bucket empty */ +#define astaticSetIsBucketEmpty(name, index) !staticBitsetGet(&(name)->hasKey, index) + +/** + * add key to set + * + * \return + * 1 key is added + * 0 key was already in set + * -1 set is full, can't add the key + */ +#define astaticSetAdd(name, key) ({\ + var UNIQVAR(k) = key;\ + var UNIQVAR(hash) = HASHFUNC(UNIQVAR(k));\ + size_t UNIQVAR(mhash) = UNIQVAR(hash) % ARRAY_SIZE((name)->set);\ + i8 UNIQVAR(r) = -1;\ + if (astaticSetIsBucketEmpty(name,UNIQVAR(mhash))) {\ + /* save key in this bucket */\ + /*logI("empty");*/\ + (name)->set[UNIQVAR(mhash)] = UNIQVAR(k);\ + UNIQVAR(r) = 1;\ + staticBitset1(&(name)->hasKey, UNIQVAR(mhash));\ + }\ + else {\ + /* key already in set or collision */\ + UNIQVAR(r) = 0;\ + if (not CMPFUNC((name)->set[UNIQVAR(mhash)], UNIQVAR(k))) {\ + /* collision */\ + /*logI("collision");*/\ + circular(UNIQVAR(i), UNIQVAR(mhash), ARRAY_SIZE((name)->set)) {\ + /*logI("circ %zu %d", UNIQVAR(i), (name)->set[UNIQVAR(i)]);*/\ + if (CMPFUNC((name)->set[UNIQVAR(i)], UNIQVAR(k))) {\ + goto UNIQVAR(end);\ + }\ + if (astaticSetIsBucketEmpty(name,UNIQVAR(i))) {\ + /* use empty bucket for key */\ + (name)->set[UNIQVAR(i)] = UNIQVAR(k);\ + UNIQVAR(r) = 1;\ + staticBitset1(&(name)->hasKey, UNIQVAR(i));\ + /*logI("found a bucket");*/\ + goto UNIQVAR(end);\ + }\ + } /* circular */\ + /* set is full, the key cannot be added */\ + UNIQVAR(r) = -1;\ + } /* if (not CPMFUNC((name)->set[UNIQVAR(mhash)], UNIQVAR(k))) */\ + }\ + UNIQVAR(end):\ + UNIQVAR(r);\ + }) + +#define astaticSetHas(name, key) ({\ + var UNIQVAR(k) = key;\ + var UNIQVAR(hash) = HASHFUNC(UNIQVAR(k));\ + size_t UNIQVAR(mhash) = UNIQVAR(hash) % ARRAY_SIZE((name)->set);\ + bool UNIQVAR(r) = true;\ + if (not CMPFUNC((name)->set[UNIQVAR(mhash)], UNIQVAR(k))) {\ + /* collision */\ + /*logI("collision");*/\ + circular(UNIQVAR(i), UNIQVAR(mhash), ARRAY_SIZE((name)->set)) {\ + /*logI("circ %zu %d", UNIQVAR(i), (name)->set[UNIQVAR(i)]);*/\ + if (CMPFUNC((name)->set[UNIQVAR(i)], UNIQVAR(k))) {\ + /* found key in set */\ + /*logI("already key");*/\ + goto UNIQVAR(end);\ + }\ + if (astaticSetIsBucketEmpty(name,UNIQVAR(i))) {\ + /* key not found */\ + /*logI("found a bucket");*/\ + break;\ + }\ + } /* circular */\ + UNIQVAR(r) = false;\ + } /* if (not CPMFUNC((name)->set[UNIQVAR(mhash)], UNIQVAR(k))) */\ + UNIQVAR(end):\ + UNIQVAR(r);\ + }) + +/** + * get index for key + * + * \return + * index for key + * -1 not found + */ +#define astaticSetGet(name, key) ({\ + var UNIQVAR(k) = key;\ + var UNIQVAR(hash) = HASHFUNC(UNIQVAR(k));\ + ssize_t UNIQVAR(r) = UNIQVAR(hash) % ARRAY_SIZE((name)->set);\ + if (not CMPFUNC((name)->set[UNIQVAR(r)], UNIQVAR(k))) {\ + /* collision */\ + /*logI("collision");*/\ + circular(UNIQVAR(i), UNIQVAR(r), ARRAY_SIZE((name)->set)) {\ + /*logI("circ %zu %d", UNIQVAR(i), (name)->set[UNIQVAR(i)]);*/\ + if (CMPFUNC((name)->set[UNIQVAR(i)], UNIQVAR(k))) {\ + /* found key in set */\ + /*logI("already key");*/\ + UNIQVAR(r) = UNIQVAR(i);\ + goto UNIQVAR(end);\ + }\ + if (astaticSetIsBucketEmpty(name,UNIQVAR(i))) {\ + /* key not found */\ + /*logI("found a bucket");*/\ + break;\ + }\ + } /* circular */\ + UNIQVAR(r) = -1;\ + } /* if (not CPMFUNC((name)->set[UNIQVAR(r)], UNIQVAR(k))) */\ + UNIQVAR(end):\ + UNIQVAR(r);\ + }) + +/** + * get index and add key + * + * \return + * index for key + * -1 set is full, can't add the key + */ +#define astaticSetGetNAdd(name, key) ({\ + var UNIQVAR(k) = key;\ + var UNIQVAR(hash) = HASHFUNC(UNIQVAR(k));\ + ssize_t UNIQVAR(r) = UNIQVAR(hash) % ARRAY_SIZE((name)->set);\ + if (astaticSetIsBucketEmpty(name,UNIQVAR(r))) {\ + /* save key in this bucket */\ + /*logI("empty");*/\ + (name)->set[UNIQVAR(r)] = UNIQVAR(k);\ + staticBitset1(&(name)->hasKey, UNIQVAR(r));\ + }\ + else {\ + /* key already in set or collision */\ + if (not CMPFUNC((name)->set[UNIQVAR(r)], UNIQVAR(k))) {\ + /* collision */\ + /*logI("collision");*/\ + circular(UNIQVAR(i), UNIQVAR(r), ARRAY_SIZE((name)->set)) {\ + /*logI("circ %zu %d", UNIQVAR(i), (name)->set[UNIQVAR(i)]);*/\ + if (CMPFUNC((name)->set[UNIQVAR(i)], UNIQVAR(k))) {\ + /* found key in set */\ + UNIQVAR(r) = UNIQVAR(i);\ + /*logI("already key");*/\ + goto UNIQVAR(end);\ + }\ + if (astaticSetIsBucketEmpty(name,UNIQVAR(i))) {\ + /* use empty bucket for key */\ + (name)->set[UNIQVAR(i)] = UNIQVAR(k);\ + UNIQVAR(r) = UNIQVAR(i);\ + staticBitset1(&(name)->hasKey, UNIQVAR(i));\ + /*logI("found a bucket");*/\ + goto UNIQVAR(end);\ + }\ + } /* circular */\ + UNIQVAR(r) = -1;\ + } /* if (not CPMFUNC((name)->set[UNIQVAR(r)], UNIQVAR(k))) */\ + }\ + UNIQVAR(end):\ + UNIQVAR(r);\ + }) + +// vim: set expandtab ts=2 sw=2: