hashfunctions

generic hash functions and PRFs: Jenkins, FNV, siphash
git clone https://noulin.net/git/hashfunctions.git
Log | Files | Refs | LICENSE

siphash.c (13845B)


      1 /*
      2    SipHash reference C implementation
      3 
      4    Copyright (c) 2012-2016 Jean-Philippe Aumasson
      5    <jeanphilippe.aumasson@gmail.com>
      6    Copyright (c) 2012-2014 Daniel J. Bernstein <djb@cr.yp.to>
      7    Copyright (c) 2017 Salvatore Sanfilippo <antirez@gmail.com>
      8 
      9    To the extent possible under law, the author(s) have dedicated all copyright
     10    and related and neighboring rights to this software to the public domain
     11    worldwide. This software is distributed without any warranty.
     12 
     13    You should have received a copy of the CC0 Public Domain Dedication along
     14    with this software. If not, see
     15    <http://creativecommons.org/publicdomain/zero/1.0/>.
     16 
     17    ----------------------------------------------------------------------------
     18 
     19    This version was modified by Salvatore Sanfilippo <antirez@gmail.com>
     20    in the following ways:
     21 
     22    1. We use SipHash 1-2. This is not believed to be as strong as the
     23       suggested 2-4 variant, but AFAIK there are not trivial attacks
     24       against this reduced-rounds version, and it runs at the same speed
     25       as Murmurhash2 that we used previously, why the 2-4 variant slowed
     26       down Redis by a 4% figure more or less.
     27    2. Hard-code rounds in the hope the compiler can optimize it more
     28       in this raw from. Anyway we always want the standard 2-4 variant.
     29    3. Modify the prototype and implementation so that the function directly
     30       returns an uint64_t value, the hash itself, instead of receiving an
     31       output buffer. This also means that the output size is set to 8 bytes
     32       and the 16 bytes output code handling was removed.
     33    4. Provide a case insensitive variant to be used when hashing strings that
     34       must be considered identical by the hash table regardless of the case.
     35       If we don't have directly a case insensitive hash function, we need to
     36       perform a text transformation in some temporary buffer, which is costly.
     37    5. Remove debugging code.
     38    6. Modified the original test.c file to be a stand-alone function testing
     39       the function in the new form (returing an uint64_t) using just the
     40       relevant test vector.
     41  */
     42 #include <assert.h>
     43 #include <stdint.h>
     44 #include <stdio.h>
     45 #include <string.h>
     46 #include <ctype.h>
     47 
     48 /* Fast tolower() alike function that does not care about locale
     49  * but just returns a-z insetad of A-Z. */
     50 int siptlw(int c) {
     51     if (c >= 'A' && c <= 'Z') {
     52         return c+('a'-'A');
     53     } else {
     54         return c;
     55     }
     56 }
     57 
     58 /* Test of the CPU is Little Endian and supports not aligned accesses.
     59  * Two interesting conditions to speedup the function that happen to be
     60  * in most of x86 servers. */
     61 #if defined(__X86_64__) || defined(__x86_64__) || defined (__i386__)
     62 #define UNALIGNED_LE_CPU
     63 #endif
     64 
     65 #define ROTL(x, b) (uint64_t)(((x) << (b)) | ((x) >> (64 - (b))))
     66 
     67 #define U32TO8_LE(p, v)                                                        \
     68     (p)[0] = (uint8_t)((v));                                                   \
     69     (p)[1] = (uint8_t)((v) >> 8);                                              \
     70     (p)[2] = (uint8_t)((v) >> 16);                                             \
     71     (p)[3] = (uint8_t)((v) >> 24);
     72 
     73 #define U64TO8_LE(p, v)                                                        \
     74     U32TO8_LE((p), (uint32_t)((v)));                                           \
     75     U32TO8_LE((p) + 4, (uint32_t)((v) >> 32));
     76 
     77 #ifdef UNALIGNED_LE_CPU
     78 #define U8TO64_LE(p) (*((uint64_t*)(p)))
     79 #else
     80 #define U8TO64_LE(p)                                                           \
     81     (((uint64_t)((p)[0])) | ((uint64_t)((p)[1]) << 8) |                        \
     82      ((uint64_t)((p)[2]) << 16) | ((uint64_t)((p)[3]) << 24) |                 \
     83      ((uint64_t)((p)[4]) << 32) | ((uint64_t)((p)[5]) << 40) |                 \
     84      ((uint64_t)((p)[6]) << 48) | ((uint64_t)((p)[7]) << 56))
     85 #endif
     86 
     87 #define U8TO64_LE_NOCASE(p)                                                    \
     88     (((uint64_t)(siptlw((p)[0]))) |                                           \
     89      ((uint64_t)(siptlw((p)[1])) << 8) |                                      \
     90      ((uint64_t)(siptlw((p)[2])) << 16) |                                     \
     91      ((uint64_t)(siptlw((p)[3])) << 24) |                                     \
     92      ((uint64_t)(siptlw((p)[4])) << 32) |                                              \
     93      ((uint64_t)(siptlw((p)[5])) << 40) |                                              \
     94      ((uint64_t)(siptlw((p)[6])) << 48) |                                              \
     95      ((uint64_t)(siptlw((p)[7])) << 56))
     96 
     97 #define SIPROUND                                                               \
     98     do {                                                                       \
     99         v0 += v1;                                                              \
    100         v1 = ROTL(v1, 13);                                                     \
    101         v1 ^= v0;                                                              \
    102         v0 = ROTL(v0, 32);                                                     \
    103         v2 += v3;                                                              \
    104         v3 = ROTL(v3, 16);                                                     \
    105         v3 ^= v2;                                                              \
    106         v0 += v3;                                                              \
    107         v3 = ROTL(v3, 21);                                                     \
    108         v3 ^= v0;                                                              \
    109         v2 += v1;                                                              \
    110         v1 = ROTL(v1, 17);                                                     \
    111         v1 ^= v2;                                                              \
    112         v2 = ROTL(v2, 32);                                                     \
    113     } while (0)
    114 
    115 uint64_t siphash(const uint8_t *in, const size_t inlen, const uint8_t *k) {
    116 #ifndef UNALIGNED_LE_CPU
    117     uint64_t hash;
    118     uint8_t *out = (uint8_t*) &hash;
    119 #endif
    120     uint64_t v0 = 0x736f6d6570736575ULL;
    121     uint64_t v1 = 0x646f72616e646f6dULL;
    122     uint64_t v2 = 0x6c7967656e657261ULL;
    123     uint64_t v3 = 0x7465646279746573ULL;
    124     uint64_t k0 = U8TO64_LE(k);
    125     uint64_t k1 = U8TO64_LE(k + 8);
    126     uint64_t m;
    127     const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t));
    128     const int left = inlen & 7;
    129     uint64_t b = ((uint64_t)inlen) << 56;
    130     v3 ^= k1;
    131     v2 ^= k0;
    132     v1 ^= k1;
    133     v0 ^= k0;
    134 
    135     for (; in != end; in += 8) {
    136         m = U8TO64_LE(in);
    137         v3 ^= m;
    138 
    139         SIPROUND;
    140 
    141         v0 ^= m;
    142     }
    143 
    144     switch (left) {
    145     case 7: b |= ((uint64_t)in[6]) << 48;
    146     case 6: b |= ((uint64_t)in[5]) << 40;
    147     case 5: b |= ((uint64_t)in[4]) << 32;
    148     case 4: b |= ((uint64_t)in[3]) << 24;
    149     case 3: b |= ((uint64_t)in[2]) << 16;
    150     case 2: b |= ((uint64_t)in[1]) << 8;
    151     case 1: b |= ((uint64_t)in[0]); break;
    152     case 0: break;
    153     }
    154 
    155     v3 ^= b;
    156 
    157     SIPROUND;
    158 
    159     v0 ^= b;
    160     v2 ^= 0xff;
    161 
    162     SIPROUND;
    163     SIPROUND;
    164 
    165     b = v0 ^ v1 ^ v2 ^ v3;
    166 #ifndef UNALIGNED_LE_CPU
    167     U64TO8_LE(out, b);
    168     return hash;
    169 #else
    170     return b;
    171 #endif
    172 }
    173 
    174 uint64_t siphash_nocase(const uint8_t *in, const size_t inlen, const uint8_t *k)
    175 {
    176 #ifndef UNALIGNED_LE_CPU
    177     uint64_t hash;
    178     uint8_t *out = (uint8_t*) &hash;
    179 #endif
    180     uint64_t v0 = 0x736f6d6570736575ULL;
    181     uint64_t v1 = 0x646f72616e646f6dULL;
    182     uint64_t v2 = 0x6c7967656e657261ULL;
    183     uint64_t v3 = 0x7465646279746573ULL;
    184     uint64_t k0 = U8TO64_LE(k);
    185     uint64_t k1 = U8TO64_LE(k + 8);
    186     uint64_t m;
    187     const uint8_t *end = in + inlen - (inlen % sizeof(uint64_t));
    188     const int left = inlen & 7;
    189     uint64_t b = ((uint64_t)inlen) << 56;
    190     v3 ^= k1;
    191     v2 ^= k0;
    192     v1 ^= k1;
    193     v0 ^= k0;
    194 
    195     for (; in != end; in += 8) {
    196         m = U8TO64_LE_NOCASE(in);
    197         v3 ^= m;
    198 
    199         SIPROUND;
    200 
    201         v0 ^= m;
    202     }
    203 
    204     switch (left) {
    205     case 7: b |= ((uint64_t)siptlw(in[6])) << 48;
    206     case 6: b |= ((uint64_t)siptlw(in[5])) << 40;
    207     case 5: b |= ((uint64_t)siptlw(in[4])) << 32;
    208     case 4: b |= ((uint64_t)siptlw(in[3])) << 24;
    209     case 3: b |= ((uint64_t)siptlw(in[2])) << 16;
    210     case 2: b |= ((uint64_t)siptlw(in[1])) << 8;
    211     case 1: b |= ((uint64_t)siptlw(in[0])); break;
    212     case 0: break;
    213     }
    214 
    215     v3 ^= b;
    216 
    217     SIPROUND;
    218 
    219     v0 ^= b;
    220     v2 ^= 0xff;
    221 
    222     SIPROUND;
    223     SIPROUND;
    224 
    225     b = v0 ^ v1 ^ v2 ^ v3;
    226 #ifndef UNALIGNED_LE_CPU
    227     U64TO8_LE(out, b);
    228     return hash;
    229 #else
    230     return b;
    231 #endif
    232 }
    233 
    234 
    235 /* --------------------------------- TEST ------------------------------------ */
    236 
    237 #ifdef SIPHASH_TEST
    238 
    239 const uint8_t vectors_sip64[64][8] = {
    240     { 0x31, 0x0e, 0x0e, 0xdd, 0x47, 0xdb, 0x6f, 0x72, },
    241     { 0xfd, 0x67, 0xdc, 0x93, 0xc5, 0x39, 0xf8, 0x74, },
    242     { 0x5a, 0x4f, 0xa9, 0xd9, 0x09, 0x80, 0x6c, 0x0d, },
    243     { 0x2d, 0x7e, 0xfb, 0xd7, 0x96, 0x66, 0x67, 0x85, },
    244     { 0xb7, 0x87, 0x71, 0x27, 0xe0, 0x94, 0x27, 0xcf, },
    245     { 0x8d, 0xa6, 0x99, 0xcd, 0x64, 0x55, 0x76, 0x18, },
    246     { 0xce, 0xe3, 0xfe, 0x58, 0x6e, 0x46, 0xc9, 0xcb, },
    247     { 0x37, 0xd1, 0x01, 0x8b, 0xf5, 0x00, 0x02, 0xab, },
    248     { 0x62, 0x24, 0x93, 0x9a, 0x79, 0xf5, 0xf5, 0x93, },
    249     { 0xb0, 0xe4, 0xa9, 0x0b, 0xdf, 0x82, 0x00, 0x9e, },
    250     { 0xf3, 0xb9, 0xdd, 0x94, 0xc5, 0xbb, 0x5d, 0x7a, },
    251     { 0xa7, 0xad, 0x6b, 0x22, 0x46, 0x2f, 0xb3, 0xf4, },
    252     { 0xfb, 0xe5, 0x0e, 0x86, 0xbc, 0x8f, 0x1e, 0x75, },
    253     { 0x90, 0x3d, 0x84, 0xc0, 0x27, 0x56, 0xea, 0x14, },
    254     { 0xee, 0xf2, 0x7a, 0x8e, 0x90, 0xca, 0x23, 0xf7, },
    255     { 0xe5, 0x45, 0xbe, 0x49, 0x61, 0xca, 0x29, 0xa1, },
    256     { 0xdb, 0x9b, 0xc2, 0x57, 0x7f, 0xcc, 0x2a, 0x3f, },
    257     { 0x94, 0x47, 0xbe, 0x2c, 0xf5, 0xe9, 0x9a, 0x69, },
    258     { 0x9c, 0xd3, 0x8d, 0x96, 0xf0, 0xb3, 0xc1, 0x4b, },
    259     { 0xbd, 0x61, 0x79, 0xa7, 0x1d, 0xc9, 0x6d, 0xbb, },
    260     { 0x98, 0xee, 0xa2, 0x1a, 0xf2, 0x5c, 0xd6, 0xbe, },
    261     { 0xc7, 0x67, 0x3b, 0x2e, 0xb0, 0xcb, 0xf2, 0xd0, },
    262     { 0x88, 0x3e, 0xa3, 0xe3, 0x95, 0x67, 0x53, 0x93, },
    263     { 0xc8, 0xce, 0x5c, 0xcd, 0x8c, 0x03, 0x0c, 0xa8, },
    264     { 0x94, 0xaf, 0x49, 0xf6, 0xc6, 0x50, 0xad, 0xb8, },
    265     { 0xea, 0xb8, 0x85, 0x8a, 0xde, 0x92, 0xe1, 0xbc, },
    266     { 0xf3, 0x15, 0xbb, 0x5b, 0xb8, 0x35, 0xd8, 0x17, },
    267     { 0xad, 0xcf, 0x6b, 0x07, 0x63, 0x61, 0x2e, 0x2f, },
    268     { 0xa5, 0xc9, 0x1d, 0xa7, 0xac, 0xaa, 0x4d, 0xde, },
    269     { 0x71, 0x65, 0x95, 0x87, 0x66, 0x50, 0xa2, 0xa6, },
    270     { 0x28, 0xef, 0x49, 0x5c, 0x53, 0xa3, 0x87, 0xad, },
    271     { 0x42, 0xc3, 0x41, 0xd8, 0xfa, 0x92, 0xd8, 0x32, },
    272     { 0xce, 0x7c, 0xf2, 0x72, 0x2f, 0x51, 0x27, 0x71, },
    273     { 0xe3, 0x78, 0x59, 0xf9, 0x46, 0x23, 0xf3, 0xa7, },
    274     { 0x38, 0x12, 0x05, 0xbb, 0x1a, 0xb0, 0xe0, 0x12, },
    275     { 0xae, 0x97, 0xa1, 0x0f, 0xd4, 0x34, 0xe0, 0x15, },
    276     { 0xb4, 0xa3, 0x15, 0x08, 0xbe, 0xff, 0x4d, 0x31, },
    277     { 0x81, 0x39, 0x62, 0x29, 0xf0, 0x90, 0x79, 0x02, },
    278     { 0x4d, 0x0c, 0xf4, 0x9e, 0xe5, 0xd4, 0xdc, 0xca, },
    279     { 0x5c, 0x73, 0x33, 0x6a, 0x76, 0xd8, 0xbf, 0x9a, },
    280     { 0xd0, 0xa7, 0x04, 0x53, 0x6b, 0xa9, 0x3e, 0x0e, },
    281     { 0x92, 0x59, 0x58, 0xfc, 0xd6, 0x42, 0x0c, 0xad, },
    282     { 0xa9, 0x15, 0xc2, 0x9b, 0xc8, 0x06, 0x73, 0x18, },
    283     { 0x95, 0x2b, 0x79, 0xf3, 0xbc, 0x0a, 0xa6, 0xd4, },
    284     { 0xf2, 0x1d, 0xf2, 0xe4, 0x1d, 0x45, 0x35, 0xf9, },
    285     { 0x87, 0x57, 0x75, 0x19, 0x04, 0x8f, 0x53, 0xa9, },
    286     { 0x10, 0xa5, 0x6c, 0xf5, 0xdf, 0xcd, 0x9a, 0xdb, },
    287     { 0xeb, 0x75, 0x09, 0x5c, 0xcd, 0x98, 0x6c, 0xd0, },
    288     { 0x51, 0xa9, 0xcb, 0x9e, 0xcb, 0xa3, 0x12, 0xe6, },
    289     { 0x96, 0xaf, 0xad, 0xfc, 0x2c, 0xe6, 0x66, 0xc7, },
    290     { 0x72, 0xfe, 0x52, 0x97, 0x5a, 0x43, 0x64, 0xee, },
    291     { 0x5a, 0x16, 0x45, 0xb2, 0x76, 0xd5, 0x92, 0xa1, },
    292     { 0xb2, 0x74, 0xcb, 0x8e, 0xbf, 0x87, 0x87, 0x0a, },
    293     { 0x6f, 0x9b, 0xb4, 0x20, 0x3d, 0xe7, 0xb3, 0x81, },
    294     { 0xea, 0xec, 0xb2, 0xa3, 0x0b, 0x22, 0xa8, 0x7f, },
    295     { 0x99, 0x24, 0xa4, 0x3c, 0xc1, 0x31, 0x57, 0x24, },
    296     { 0xbd, 0x83, 0x8d, 0x3a, 0xaf, 0xbf, 0x8d, 0xb7, },
    297     { 0x0b, 0x1a, 0x2a, 0x32, 0x65, 0xd5, 0x1a, 0xea, },
    298     { 0x13, 0x50, 0x79, 0xa3, 0x23, 0x1c, 0xe6, 0x60, },
    299     { 0x93, 0x2b, 0x28, 0x46, 0xe4, 0xd7, 0x06, 0x66, },
    300     { 0xe1, 0x91, 0x5f, 0x5c, 0xb1, 0xec, 0xa4, 0x6c, },
    301     { 0xf3, 0x25, 0x96, 0x5c, 0xa1, 0x6d, 0x62, 0x9f, },
    302     { 0x57, 0x5f, 0xf2, 0x8e, 0x60, 0x38, 0x1b, 0xe5, },
    303     { 0x72, 0x45, 0x06, 0xeb, 0x4c, 0x32, 0x8a, 0x95, },
    304 };
    305 
    306 
    307 /* Test siphash using a test vector. Returns 0 if the function passed
    308  * all the tests, otherwise 1 is returned.
    309  *
    310  * IMPORTANT: The test vector is for SipHash 2-4. Before running
    311  * the test revert back the siphash() function to 2-4 rounds since
    312  * now it uses 1-2 rounds. */
    313 int siphash_test(void) {
    314     uint8_t in[64], k[16];
    315     int i;
    316     int fails = 0;
    317 
    318     for (i = 0; i < 16; ++i)
    319         k[i] = i;
    320 
    321     for (i = 0; i < 64; ++i) {
    322         in[i] = i;
    323         uint64_t hash = siphash(in, i, k);
    324         const uint8_t *v = NULL;
    325         v = (uint8_t *)vectors_sip64;
    326         if (memcmp(&hash, v + (i * 8), 8)) {
    327             /* printf("fail for %d bytes\n", i); */
    328             fails++;
    329         }
    330     }
    331 
    332     /* Run a few basic tests with the case insensitive version. */
    333     uint64_t h1, h2;
    334     h1 = siphash((uint8_t*)"hello world",11,(uint8_t*)"1234567812345678");
    335     h2 = siphash_nocase((uint8_t*)"hello world",11,(uint8_t*)"1234567812345678");
    336     if (h1 != h2) fails++;
    337 
    338     h1 = siphash((uint8_t*)"hello world",11,(uint8_t*)"1234567812345678");
    339     h2 = siphash_nocase((uint8_t*)"HELLO world",11,(uint8_t*)"1234567812345678");
    340     if (h1 != h2) fails++;
    341 
    342     h1 = siphash((uint8_t*)"HELLO world",11,(uint8_t*)"1234567812345678");
    343     h2 = siphash_nocase((uint8_t*)"HELLO world",11,(uint8_t*)"1234567812345678");
    344     if (h1 == h2) fails++;
    345 
    346     if (!fails) return 0;
    347     return 1;
    348 }
    349 
    350 int main(void) {
    351     if (siphash_test() == 0) {
    352         printf("SipHash test: OK\n");
    353         return 0;
    354     } else {
    355         printf("SipHash test: FAILED\n");
    356         return 1;
    357     }
    358 }
    359 
    360 #endif