commit 7283851a0752bbf613ece025c497a2e5c8f4f8b8
parent b76849d9a70867b310f57c9c13da8c96327f4341
Author: Remy Noulin <loader2x@gmail.com>
Date: Mon, 19 Nov 2018 19:22:51 -0500
add sstaticSet (simple static set) and astaticSet (another static set)
main.c | 114 ++++++++++++++++++
package.yml | 17 +++
set.h | 374 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 505 insertions(+)
Diffstat:
| A | main.c | | | 114 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | package.yml | | | 17 | +++++++++++++++++ |
| A | set.h | | | 374 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
3 files changed, 505 insertions(+), 0 deletions(-)
diff --git a/main.c b/main.c
@@ -0,0 +1,114 @@
+#! /usr/bin/env sheepy
+/* or direct path to sheepy: #! /usr/local/bin/sheepy */
+
+/* Libsheepy documentation: http://spartatek.se/libsheepy/ */
+#include "libsheepyObject.h"
+#include "set.h"
+
+// hash functions for the hash table
+#include "shpPackages/hashfunctions/hashfunctions.h"
+
+int argc; char **argv;
+
+/* enable/disable logging */
+/* #undef pLog */
+/* #define pLog(...) */
+
+int main(int ARGC, char** ARGV) {
+
+ argc = ARGC; argv = ARGV;
+
+ initLibsheepy(ARGV[0]);
+ setLogMode(LOG_VERBOSE);
+
+ sstaticSetT(sst, u8, 5);
+
+ sst st;
+
+ sstaticSetClear(&st);
+
+ #undef HASHFUNC
+ #undef ISEMPTY
+ #undef CMPFUNC
+ #define HASHFUNC(k) k
+ #define ISEMPTY(e) (e == 0)
+ #define CMPFUNC(e1, e2) (e1 == e2)
+
+ var idx = sstaticSetGetNAdd(&st, 1);
+ logVarG(idx);
+ idx = sstaticSetGetNAdd(&st, 3);
+ logVarG(idx);
+ logI("get: %zd", sstaticSetGet(&st, 3));
+ logI("get: %zd", sstaticSetGet(&st, 2));
+ logI("has: %b", sstaticSetHas(&st, 3));
+ logI("has: %b", sstaticSetHas(&st, 2));
+ idx = sstaticSetGetNAdd(&st, 12);
+ logVarG(idx);
+ idx = sstaticSetGetNAdd(&st, 2);
+ logVarG(idx);
+ idx = sstaticSetAdd(&st, 22);
+ logVarG(idx);
+ idx = sstaticSetAdd(&st, 2);
+ logVarG(idx);
+ idx = sstaticSetGetNAdd(&st, 4);
+ logVarG(idx);
+ logI("get: %zd", sstaticSetGet(&st, 3));
+ logI("get: %zd", sstaticSetGet(&st, 2));
+ logI("get: %zd", sstaticSetGet(&st, 32));
+ logI("has: %b", sstaticSetHas(&st, 3));
+ logI("has: %b", sstaticSetHas(&st, 2));
+ logI("has: %b", sstaticSetHas(&st, 32));
+
+
+ #undef HASHFUNC
+ #undef ISEMPTY
+ #undef CMPFUNC
+ // hash function for cs
+ #define HASHFUNC strHashFNV1A
+ // comparison function for cs
+ //#define CMPFUNC !strcmp
+ #define CMPFUNC eqS
+
+ astaticSetT(cst, char*, 5);
+ cst cs;
+
+ astaticSetClear(&cs);
+
+ range(i, 5) {
+ logVarG(cs.set[i]);
+ }
+
+ var ix = astaticSetGetNAdd(&cs, "1");
+ logVarG(ix);
+ ix = astaticSetGetNAdd(&cs, "3");
+ logVarG(ix);
+ logI("get: 3 - %zd", astaticSetGet(&cs, "3"));
+ logI("get: 2 - %zd", astaticSetGet(&cs, "2"));
+ logI("has: %b", astaticSetHas(&cs, "3"));
+ logI("has: %b", astaticSetHas(&cs, "2"));
+ ix = astaticSetGetNAdd(&cs, "12");
+ logI("12: %zd", ix);
+ ix = astaticSetGetNAdd(&cs, "2");
+ logI("2: %zd", ix);
+ ix = astaticSetAdd(&cs, "22");
+ logI("add 22: %zd (1=ok)", ix);
+ ix = astaticSetAdd(&cs, "2");
+ logVarG(ix);
+ ix = astaticSetGetNAdd(&cs, "4");
+ logVarG(ix);
+ logI("get: 3 %zd", astaticSetGet(&cs, "3"));
+ logI("get: 2 %zd", astaticSetGet(&cs, "2"));
+ logI("get: 22 %zd", astaticSetGet(&cs, "22"));
+ logI("get: 32 %zd", astaticSetGet(&cs, "32"));
+ logI("has: 3 %b", astaticSetHas(&cs, "3"));
+ logI("has: 2 %b", astaticSetHas(&cs, "2"));
+ logI("has: 32 %b", astaticSetHas(&cs, "32"));
+
+ range(i, 5) {
+ logVarG(astaticSetAt(&cs, i));
+ }
+
+ logVarG(sizeof st);
+ logVarG(sizeof cs);
+}
+// vim: set expandtab ts=2 sw=2:
diff --git a/package.yml b/package.yml
@@ -0,0 +1,17 @@
+---
+ name: set
+ version: 0.0.1
+ description: "set data structures using hash functions"
+ bin: ./set.h
+ repository:
+ type: git
+ url: git+https://github.com/RemyNoulin/set.git
+ keywords:
+ #- utility
+ - command
+ author: Remy
+ license: MIT
+ bugs:
+ url: https://github.com/RemyNoulin/set/issues
+ homepage: https://github.com/RemyNoulin/set#readme
+ private: false # true for private package
diff --git a/set.h b/set.h
@@ -0,0 +1,374 @@
+
+#include "libsheepyObject.h"
+
+// TODO
+// add and remove
+// buckets with multiple keys
+
+/**
+ * simple static set
+ *
+ * keys are assigned a unique index
+ * this data structure maps keys to indexes in an array of size count
+ *
+ * when there is a collision, a linear search is done until an empty bucket is found
+ *
+ * the simple static set can be used together with a bitset to filter keys efficiently
+ *
+ * HASHFUNC, ISEMPTY and CMPFUNC are functions defined before using sstaticSet* macros
+ *
+ * hashType HASHFUNC(key);
+ * returns the hash value for key
+ *
+ * bool ISEMPTY(keyInSet);
+ * returns true when the bucket don't have a key
+ *
+ * bool CMPFUNC(key1, key2);
+ * returns true when key1 is equal key2
+ */
+
+/** type definition */
+#define sstaticSetT(typeName, keyType, count)\
+ typedef struct { keyType set[count];} typeName;
+
+/** clear set */
+#define sstaticSetClear(name) memset((name)->set, 0, sizeof((name)->set));
+
+/** get bucket at index */
+#define sstaticSetAt(name, index) ((name)->set[index])
+
+/**
+ * add key to set
+ *
+ * \return
+ * 1 key is added
+ * 0 key was already in set
+ * -1 set is full, can't add the key
+ */
+#define sstaticSetAdd(name, key) ({\
+ var UNIQVAR(k) = key;\
+ var UNIQVAR(hash) = HASHFUNC(UNIQVAR(k));\
+ size_t UNIQVAR(mhash) = UNIQVAR(hash) % ARRAY_SIZE((name)->set);\
+ i8 UNIQVAR(r) = -1;\
+ if (ISEMPTY((name)->set[UNIQVAR(mhash)])) {\
+ /* save key in this bucket */\
+ /*logI("empty");*/\
+ (name)->set[UNIQVAR(mhash)] = UNIQVAR(k);\
+ UNIQVAR(r) = 1;\
+ }\
+ else {\
+ /* key already in set or collision */\
+ UNIQVAR(r) = 0;\
+ if (not CMPFUNC((name)->set[UNIQVAR(mhash)], UNIQVAR(k))) {\
+ /* collision */\
+ /*logI("collision");*/\
+ circular(UNIQVAR(i), UNIQVAR(mhash), ARRAY_SIZE((name)->set)) {\
+ /*logI("circ %zu %d", UNIQVAR(i), (name)->set[UNIQVAR(i)]);*/\
+ if (CMPFUNC((name)->set[UNIQVAR(i)], UNIQVAR(k))) {\
+ goto UNIQVAR(end);\
+ }\
+ if (ISEMPTY((name)->set[UNIQVAR(i)])) {\
+ /* use empty bucket for key */\
+ (name)->set[UNIQVAR(i)] = UNIQVAR(k);\
+ UNIQVAR(r) = 1;\
+ /*logI("found a bucket");*/\
+ goto UNIQVAR(end);\
+ }\
+ } /* circular */\
+ /* set is full, the key cannot be added */\
+ UNIQVAR(r) = -1;\
+ } /* if (not CPMFUNC((name)->set[UNIQVAR(mhash)], UNIQVAR(k))) */\
+ }\
+ UNIQVAR(end):\
+ UNIQVAR(r);\
+ })
+
+#define sstaticSetHas(name, key) ({\
+ var UNIQVAR(k) = key;\
+ var UNIQVAR(hash) = HASHFUNC(UNIQVAR(k));\
+ size_t UNIQVAR(mhash) = UNIQVAR(hash) % ARRAY_SIZE((name)->set);\
+ bool UNIQVAR(r) = true;\
+ if (not CMPFUNC((name)->set[UNIQVAR(mhash)], UNIQVAR(k))) {\
+ /* collision */\
+ /*logI("collision");*/\
+ circular(UNIQVAR(i), UNIQVAR(mhash), ARRAY_SIZE((name)->set)) {\
+ /*logI("circ %zu %d", UNIQVAR(i), (name)->set[UNIQVAR(i)]);*/\
+ if (CMPFUNC((name)->set[UNIQVAR(i)], UNIQVAR(k))) {\
+ /* found key in set */\
+ /*logI("already key");*/\
+ goto UNIQVAR(end);\
+ }\
+ if (ISEMPTY((name)->set[UNIQVAR(i)])) {\
+ /* key not found */\
+ /*logI("found a bucket");*/\
+ break;\
+ }\
+ } /* circular */\
+ UNIQVAR(r) = false;\
+ } /* if (not CPMFUNC((name)->set[UNIQVAR(mhash)], UNIQVAR(k))) */\
+ UNIQVAR(end):\
+ UNIQVAR(r);\
+ })
+
+/**
+ * get index for key
+ *
+ * \return
+ * index for key
+ * -1 not found
+ */
+#define sstaticSetGet(name, key) ({\
+ var UNIQVAR(k) = key;\
+ var UNIQVAR(hash) = HASHFUNC(UNIQVAR(k));\
+ ssize_t UNIQVAR(r) = UNIQVAR(hash) % ARRAY_SIZE((name)->set);\
+ if (not CMPFUNC((name)->set[UNIQVAR(r)], UNIQVAR(k))) {\
+ /* collision */\
+ /*logI("collision");*/\
+ circular(UNIQVAR(i), UNIQVAR(r), ARRAY_SIZE((name)->set)) {\
+ /*logI("circ %zu %d", UNIQVAR(i), (name)->set[UNIQVAR(i)]);*/\
+ if (CMPFUNC((name)->set[UNIQVAR(i)], UNIQVAR(k))) {\
+ /* found key in set */\
+ /*logI("already key");*/\
+ UNIQVAR(r) = UNIQVAR(i);\
+ goto UNIQVAR(end);\
+ }\
+ if (ISEMPTY((name)->set[UNIQVAR(i)])) {\
+ /* key not found */\
+ /*logI("found a bucket");*/\
+ break;\
+ }\
+ } /* circular */\
+ UNIQVAR(r) = -1;\
+ } /* if (not CPMFUNC((name)->set[UNIQVAR(r)], UNIQVAR(k))) */\
+ UNIQVAR(end):\
+ UNIQVAR(r);\
+ })
+
+/**
+ * get index and add key
+ *
+ * \return
+ * index for key
+ * -1 set is full, can't add the key
+ */
+#define sstaticSetGetNAdd(name, key) ({\
+ var UNIQVAR(k) = key;\
+ var UNIQVAR(hash) = HASHFUNC(UNIQVAR(k));\
+ ssize_t UNIQVAR(r) = UNIQVAR(hash) % ARRAY_SIZE((name)->set);\
+ if (ISEMPTY((name)->set[UNIQVAR(r)])) {\
+ /* save key in this bucket */\
+ /*logI("empty");*/\
+ (name)->set[UNIQVAR(r)] = UNIQVAR(k);\
+ }\
+ else {\
+ /* key already in set or collision */\
+ if (not CMPFUNC((name)->set[UNIQVAR(r)], UNIQVAR(k))) {\
+ /* collision */\
+ /*logI("collision");*/\
+ circular(UNIQVAR(i), UNIQVAR(r), ARRAY_SIZE((name)->set)) {\
+ /*logI("circ %zu %d", UNIQVAR(i), (name)->set[UNIQVAR(i)]);*/\
+ if (CMPFUNC((name)->set[UNIQVAR(i)], UNIQVAR(k))) {\
+ /* found key in set */\
+ UNIQVAR(r) = UNIQVAR(i);\
+ /*logI("already key");*/\
+ goto UNIQVAR(end);\
+ }\
+ if (ISEMPTY((name)->set[UNIQVAR(i)])) {\
+ /* use empty bucket for key */\
+ (name)->set[UNIQVAR(i)] = UNIQVAR(k);\
+ UNIQVAR(r) = UNIQVAR(i);\
+ /*logI("found a bucket");*/\
+ goto UNIQVAR(end);\
+ }\
+ } /* circular */\
+ UNIQVAR(r) = -1;\
+ } /* if (not CPMFUNC((name)->set[UNIQVAR(r)], UNIQVAR(k))) */\
+ }\
+ UNIQVAR(end):\
+ UNIQVAR(r);\
+ })
+
+
+
+/**
+ * another static set
+ *
+ * this set doesn't need ISFUNC, it keeps track of empty buckets
+ */
+
+/** type definition */
+#define astaticSetT(typeName, keyType, count)\
+ staticBitsetT(UNIQVAR(bits), u64, count);\
+ typedef struct { \
+ keyType set[count];\
+ UNIQVAR(bits) hasKey;\
+ } typeName;
+
+/** clear set */
+#define astaticSetClear(name) do{\
+ memset((name)->set, 0, sizeof((name)->set));\
+ staticBitsetClear(&(name)->hasKey);\
+ } while(0)
+
+/** get bucket at index */
+#define astaticSetAt(name, index) ((name)->set[index])
+
+/** is bucket empty */
+#define astaticSetIsBucketEmpty(name, index) !staticBitsetGet(&(name)->hasKey, index)
+
+/**
+ * add key to set
+ *
+ * \return
+ * 1 key is added
+ * 0 key was already in set
+ * -1 set is full, can't add the key
+ */
+#define astaticSetAdd(name, key) ({\
+ var UNIQVAR(k) = key;\
+ var UNIQVAR(hash) = HASHFUNC(UNIQVAR(k));\
+ size_t UNIQVAR(mhash) = UNIQVAR(hash) % ARRAY_SIZE((name)->set);\
+ i8 UNIQVAR(r) = -1;\
+ if (astaticSetIsBucketEmpty(name,UNIQVAR(mhash))) {\
+ /* save key in this bucket */\
+ /*logI("empty");*/\
+ (name)->set[UNIQVAR(mhash)] = UNIQVAR(k);\
+ UNIQVAR(r) = 1;\
+ staticBitset1(&(name)->hasKey, UNIQVAR(mhash));\
+ }\
+ else {\
+ /* key already in set or collision */\
+ UNIQVAR(r) = 0;\
+ if (not CMPFUNC((name)->set[UNIQVAR(mhash)], UNIQVAR(k))) {\
+ /* collision */\
+ /*logI("collision");*/\
+ circular(UNIQVAR(i), UNIQVAR(mhash), ARRAY_SIZE((name)->set)) {\
+ /*logI("circ %zu %d", UNIQVAR(i), (name)->set[UNIQVAR(i)]);*/\
+ if (CMPFUNC((name)->set[UNIQVAR(i)], UNIQVAR(k))) {\
+ goto UNIQVAR(end);\
+ }\
+ if (astaticSetIsBucketEmpty(name,UNIQVAR(i))) {\
+ /* use empty bucket for key */\
+ (name)->set[UNIQVAR(i)] = UNIQVAR(k);\
+ UNIQVAR(r) = 1;\
+ staticBitset1(&(name)->hasKey, UNIQVAR(i));\
+ /*logI("found a bucket");*/\
+ goto UNIQVAR(end);\
+ }\
+ } /* circular */\
+ /* set is full, the key cannot be added */\
+ UNIQVAR(r) = -1;\
+ } /* if (not CPMFUNC((name)->set[UNIQVAR(mhash)], UNIQVAR(k))) */\
+ }\
+ UNIQVAR(end):\
+ UNIQVAR(r);\
+ })
+
+#define astaticSetHas(name, key) ({\
+ var UNIQVAR(k) = key;\
+ var UNIQVAR(hash) = HASHFUNC(UNIQVAR(k));\
+ size_t UNIQVAR(mhash) = UNIQVAR(hash) % ARRAY_SIZE((name)->set);\
+ bool UNIQVAR(r) = true;\
+ if (not CMPFUNC((name)->set[UNIQVAR(mhash)], UNIQVAR(k))) {\
+ /* collision */\
+ /*logI("collision");*/\
+ circular(UNIQVAR(i), UNIQVAR(mhash), ARRAY_SIZE((name)->set)) {\
+ /*logI("circ %zu %d", UNIQVAR(i), (name)->set[UNIQVAR(i)]);*/\
+ if (CMPFUNC((name)->set[UNIQVAR(i)], UNIQVAR(k))) {\
+ /* found key in set */\
+ /*logI("already key");*/\
+ goto UNIQVAR(end);\
+ }\
+ if (astaticSetIsBucketEmpty(name,UNIQVAR(i))) {\
+ /* key not found */\
+ /*logI("found a bucket");*/\
+ break;\
+ }\
+ } /* circular */\
+ UNIQVAR(r) = false;\
+ } /* if (not CPMFUNC((name)->set[UNIQVAR(mhash)], UNIQVAR(k))) */\
+ UNIQVAR(end):\
+ UNIQVAR(r);\
+ })
+
+/**
+ * get index for key
+ *
+ * \return
+ * index for key
+ * -1 not found
+ */
+#define astaticSetGet(name, key) ({\
+ var UNIQVAR(k) = key;\
+ var UNIQVAR(hash) = HASHFUNC(UNIQVAR(k));\
+ ssize_t UNIQVAR(r) = UNIQVAR(hash) % ARRAY_SIZE((name)->set);\
+ if (not CMPFUNC((name)->set[UNIQVAR(r)], UNIQVAR(k))) {\
+ /* collision */\
+ /*logI("collision");*/\
+ circular(UNIQVAR(i), UNIQVAR(r), ARRAY_SIZE((name)->set)) {\
+ /*logI("circ %zu %d", UNIQVAR(i), (name)->set[UNIQVAR(i)]);*/\
+ if (CMPFUNC((name)->set[UNIQVAR(i)], UNIQVAR(k))) {\
+ /* found key in set */\
+ /*logI("already key");*/\
+ UNIQVAR(r) = UNIQVAR(i);\
+ goto UNIQVAR(end);\
+ }\
+ if (astaticSetIsBucketEmpty(name,UNIQVAR(i))) {\
+ /* key not found */\
+ /*logI("found a bucket");*/\
+ break;\
+ }\
+ } /* circular */\
+ UNIQVAR(r) = -1;\
+ } /* if (not CPMFUNC((name)->set[UNIQVAR(r)], UNIQVAR(k))) */\
+ UNIQVAR(end):\
+ UNIQVAR(r);\
+ })
+
+/**
+ * get index and add key
+ *
+ * \return
+ * index for key
+ * -1 set is full, can't add the key
+ */
+#define astaticSetGetNAdd(name, key) ({\
+ var UNIQVAR(k) = key;\
+ var UNIQVAR(hash) = HASHFUNC(UNIQVAR(k));\
+ ssize_t UNIQVAR(r) = UNIQVAR(hash) % ARRAY_SIZE((name)->set);\
+ if (astaticSetIsBucketEmpty(name,UNIQVAR(r))) {\
+ /* save key in this bucket */\
+ /*logI("empty");*/\
+ (name)->set[UNIQVAR(r)] = UNIQVAR(k);\
+ staticBitset1(&(name)->hasKey, UNIQVAR(r));\
+ }\
+ else {\
+ /* key already in set or collision */\
+ if (not CMPFUNC((name)->set[UNIQVAR(r)], UNIQVAR(k))) {\
+ /* collision */\
+ /*logI("collision");*/\
+ circular(UNIQVAR(i), UNIQVAR(r), ARRAY_SIZE((name)->set)) {\
+ /*logI("circ %zu %d", UNIQVAR(i), (name)->set[UNIQVAR(i)]);*/\
+ if (CMPFUNC((name)->set[UNIQVAR(i)], UNIQVAR(k))) {\
+ /* found key in set */\
+ UNIQVAR(r) = UNIQVAR(i);\
+ /*logI("already key");*/\
+ goto UNIQVAR(end);\
+ }\
+ if (astaticSetIsBucketEmpty(name,UNIQVAR(i))) {\
+ /* use empty bucket for key */\
+ (name)->set[UNIQVAR(i)] = UNIQVAR(k);\
+ UNIQVAR(r) = UNIQVAR(i);\
+ staticBitset1(&(name)->hasKey, UNIQVAR(i));\
+ /*logI("found a bucket");*/\
+ goto UNIQVAR(end);\
+ }\
+ } /* circular */\
+ UNIQVAR(r) = -1;\
+ } /* if (not CPMFUNC((name)->set[UNIQVAR(r)], UNIQVAR(k))) */\
+ }\
+ UNIQVAR(end):\
+ UNIQVAR(r);\
+ })
+
+// vim: set expandtab ts=2 sw=2: