sodiumTest

Libsodium examples, client/server system
git clone https://noulin.net/git/sodiumTest.git
Log | Files | Refs | README

bloom.c (7564B)


      1 /*
      2  *  Copyright (c) 2012-2022, Jyri J. Virkki
      3  *  All rights reserved.
      4  *
      5  *  This file is under BSD license. See LICENSE file.
      6  */
      7 
      8 /*
      9  * Refer to bloom.h for documentation on the public interfaces.
     10  */
     11 
     12 #include <assert.h>
     13 #include <fcntl.h>
     14 #include <math.h>
     15 #include <stdint.h>
     16 #include <stdio.h>
     17 #include <stdlib.h>
     18 #include <string.h>
     19 #include <sys/stat.h>
     20 #include <sys/types.h>
     21 #include <unistd.h>
     22 
     23 #include "bloom.h"
     24 #include "murmurhash2.h"
     25 
     26 #define MAKESTRING(n) STRING(n)
     27 #define STRING(n) #n
     28 #define BLOOM_MAGIC "libbloom2"
     29 
     30 inline static int test_bit_set_bit(unsigned char * buf,
     31                                    unsigned long int bit, int set_bit)
     32 {
     33   unsigned long int byte = bit >> 3;
     34   unsigned char c = buf[byte];        // expensive memory access
     35   unsigned char mask = 1 << (bit % 8ul);
     36 
     37   if (c & mask) {
     38     return 1;
     39   } else {
     40     if (set_bit) {
     41       buf[byte] = c | mask;
     42     }
     43     return 0;
     44   }
     45 }
     46 
     47 
     48 static int bloom_check_add(struct bloom * bloom,
     49                            const void * buffer, int len, int add)
     50 {
     51   if (bloom->ready == 0) {
     52     printf("bloom at %p not initialized!\n", (void *)bloom);
     53     return -1;
     54   }
     55 
     56   unsigned char hits = 0;
     57   unsigned int a = murmurhash2(buffer, len, 0x9747b28c);
     58   unsigned int b = murmurhash2(buffer, len, a);
     59   unsigned long int x;
     60   unsigned long int i;
     61 
     62   for (i = 0; i < bloom->hashes; i++) {
     63     x = (a + b*i) % bloom->bits;
     64     if (test_bit_set_bit(bloom->bf, x, add)) {
     65       hits++;
     66     } else if (!add) {
     67       // Don't care about the presence of all the bits. Just our own.
     68       return 0;
     69     }
     70   }
     71 
     72   if (hits == bloom->hashes) {
     73     return 1;                // 1 == element already in (or collision)
     74   }
     75 
     76   return 0;
     77 }
     78 
     79 
     80 // DEPRECATED - Please migrate to bloom_init2.
     81 int bloom_init(struct bloom * bloom, int entries, double error)
     82 {
     83   return bloom_init2(bloom, (unsigned int)entries, error);
     84 }
     85 
     86 
     87 int bloom_init2(struct bloom * bloom, unsigned int entries, double error)
     88 {
     89   if (sizeof(unsigned long int) < 8) {
     90     printf("error: libbloom will not function correctly because\n");
     91     printf("sizeof(unsigned long int) == %ld\n", sizeof(unsigned long int));
     92     exit(1);
     93   }
     94 
     95   memset(bloom, 0, sizeof(struct bloom));
     96 
     97   if (entries < 1000 || error <= 0 || error >= 1) {
     98     return 1;
     99   }
    100 
    101   bloom->entries = entries;
    102   bloom->error = error;
    103 
    104   double num = -log(bloom->error);
    105   double denom = 0.480453013918201; // ln(2)^2
    106   bloom->bpe = (num / denom);
    107 
    108   long double dentries = (long double)entries;
    109   long double allbits = dentries * bloom->bpe;
    110   bloom->bits = (unsigned long int)allbits;
    111 
    112   if (bloom->bits % 8) {
    113     bloom->bytes = (bloom->bits / 8) + 1;
    114   } else {
    115     bloom->bytes = bloom->bits / 8;
    116   }
    117 
    118   bloom->hashes = (unsigned char)ceil(0.693147180559945 * bloom->bpe); // ln(2)
    119 
    120   bloom->bf = (unsigned char *)calloc(bloom->bytes, sizeof(unsigned char));
    121   if (bloom->bf == NULL) {                                   // LCOV_EXCL_START
    122     return 1;
    123   }                                                          // LCOV_EXCL_STOP
    124 
    125   bloom->ready = 1;
    126 
    127   bloom->major = BLOOM_VERSION_MAJOR;
    128   bloom->minor = BLOOM_VERSION_MINOR;
    129 
    130   return 0;
    131 }
    132 
    133 
    134 int bloom_check(struct bloom * bloom, const void * buffer, int len)
    135 {
    136   return bloom_check_add(bloom, buffer, len, 0);
    137 }
    138 
    139 
    140 int bloom_add(struct bloom * bloom, const void * buffer, int len)
    141 {
    142   return bloom_check_add(bloom, buffer, len, 1);
    143 }
    144 
    145 
    146 void bloom_print(struct bloom * bloom)
    147 {
    148   printf("bloom at %p\n", (void *)bloom);
    149   if (!bloom->ready) { printf(" *** NOT READY ***\n"); }
    150   printf(" ->version = %d.%d\n", bloom->major, bloom->minor);
    151   printf(" ->entries = %u\n", bloom->entries);
    152   printf(" ->error = %f\n", bloom->error);
    153   printf(" ->bits = %lu\n", bloom->bits);
    154   printf(" ->bits per elem = %f\n", bloom->bpe);
    155   printf(" ->bytes = %lu", bloom->bytes);
    156   unsigned int KB = bloom->bytes / 1024;
    157   unsigned int MB = KB / 1024;
    158   printf(" (%u KB, %u MB)\n", KB, MB);
    159   printf(" ->hash functions = %d\n", bloom->hashes);
    160 }
    161 
    162 
    163 void bloom_free(struct bloom * bloom)
    164 {
    165   if (bloom->ready) {
    166     free(bloom->bf);
    167   }
    168   bloom->ready = 0;
    169 }
    170 
    171 
    172 int bloom_reset(struct bloom * bloom)
    173 {
    174   if (!bloom->ready) return 1;
    175   memset(bloom->bf, 0, bloom->bytes);
    176   return 0;
    177 }
    178 
    179 
    180 int bloom_save(struct bloom * bloom, char * filename)
    181 {
    182   if (filename == NULL || filename[0] == 0) {
    183     return 1;
    184   }
    185 
    186   int fd = open(filename, O_WRONLY | O_CREAT, 0644);
    187   if (fd < 0) {
    188     return 1;
    189   }
    190 
    191   ssize_t out = write(fd, BLOOM_MAGIC, strlen(BLOOM_MAGIC));
    192   if (out != strlen(BLOOM_MAGIC)) { goto save_error; }       // LCOV_EXCL_LINE
    193 
    194   uint16_t size = sizeof(struct bloom);
    195   out = write(fd, &size, sizeof(uint16_t));
    196   if (out != sizeof(uint16_t)) { goto save_error; }          // LCOV_EXCL_LINE
    197 
    198   out = write(fd, bloom, sizeof(struct bloom));
    199   if (out != sizeof(struct bloom)) { goto save_error; }      // LCOV_EXCL_LINE
    200 
    201   out = write(fd, bloom->bf, bloom->bytes);
    202   if (out != bloom->bytes) { goto save_error; }              // LCOV_EXCL_LINE
    203 
    204   close(fd);
    205   return 0;
    206                                                              // LCOV_EXCL_START
    207  save_error:
    208   close(fd);
    209   return 1;
    210                                                              // LCOV_EXCL_STOP
    211 }
    212 
    213 
    214 int bloom_load(struct bloom * bloom, char * filename)
    215 {
    216   int rv = 0;
    217 
    218   if (filename == NULL || filename[0] == 0) { return 1; }
    219   if (bloom == NULL) { return 2; }
    220 
    221   memset(bloom, 0, sizeof(struct bloom));
    222 
    223   int fd = open(filename, O_RDONLY);
    224   if (fd < 0) { return 3; }
    225 
    226   char line[30];
    227   memset(line, 0, 30);
    228   ssize_t in = read(fd, line, strlen(BLOOM_MAGIC));
    229 
    230   if (in != strlen(BLOOM_MAGIC)) {
    231     rv = 4;
    232     goto load_error;
    233   }
    234 
    235   if (strncmp(line, BLOOM_MAGIC, strlen(BLOOM_MAGIC))) {
    236     rv = 5;
    237     goto load_error;
    238   }
    239 
    240   uint16_t size;
    241   in = read(fd, &size, sizeof(uint16_t));
    242   if (in != sizeof(uint16_t)) {
    243     rv = 6;
    244     goto load_error;
    245   }
    246 
    247   if (size != sizeof(struct bloom)) {
    248     rv = 7;
    249     goto load_error;
    250   }
    251 
    252   in = read(fd, bloom, sizeof(struct bloom));
    253   if (in != sizeof(struct bloom)) {
    254     rv = 8;
    255     goto load_error;
    256   }
    257 
    258   bloom->bf = NULL;
    259   if (bloom->major != BLOOM_VERSION_MAJOR) {
    260     rv = 9;
    261     goto load_error;
    262   }
    263 
    264   bloom->bf = (unsigned char *)malloc(bloom->bytes);
    265   if (bloom->bf == NULL) { rv = 10; goto load_error; }       // LCOV_EXCL_LINE
    266 
    267   in = read(fd, bloom->bf, bloom->bytes);
    268   if (in != bloom->bytes) {
    269     rv = 11;
    270     free(bloom->bf);
    271     bloom->bf = NULL;
    272     goto load_error;
    273   }
    274 
    275   close(fd);
    276   return rv;
    277 
    278  load_error:
    279   close(fd);
    280   bloom->ready = 0;
    281   return rv;
    282 }
    283 
    284 
    285 int bloom_merge(struct bloom * bloom_dest, struct bloom * bloom_src)
    286 {
    287   if (bloom_dest->ready == 0) {
    288     printf("bloom at %p not initialized!\n", (void *)bloom_dest);
    289     return -1;
    290   }
    291 
    292   if (bloom_src->ready == 0) {
    293     printf("bloom at %p not initialized!\n", (void *)bloom_src);
    294     return -1;
    295   }
    296 
    297   if (bloom_dest->entries != bloom_src->entries) {
    298     return 1;
    299   }
    300 
    301   if (bloom_dest->error != bloom_src->error) {
    302     return 1;
    303   }
    304 
    305   if (bloom_dest->major != bloom_src->major) {
    306     return 1;
    307   }
    308 
    309   if (bloom_dest->minor != bloom_src->minor) {
    310     return 1;
    311   }
    312 
    313   // Not really possible if properly used but check anyway to avoid the
    314   // possibility of buffer overruns.
    315   if (bloom_dest->bytes != bloom_src->bytes) {
    316     return 1;                                                // LCOV_EXCL_LINE
    317   }
    318 
    319   unsigned long int p;
    320   for (p = 0; p < bloom_dest->bytes; p++) {
    321     bloom_dest->bf[p] |= bloom_src->bf[p];
    322   }
    323 
    324   return 0;
    325 }
    326 
    327 
    328 const char * bloom_version()
    329 {
    330   return MAKESTRING(BLOOM_VERSION);
    331 }