26
24
#include <string.h>
27
25
#include <assert.h>
30
* Since the hash function does bit manipulation, it needs to know
31
* whether it's big or little-endian. ENDIAN_LITTLE and ENDIAN_BIG
32
* are set in the configure script.
35
# define HASH_LITTLE_ENDIAN 0
36
# define HASH_BIG_ENDIAN 1
38
# if ENDIAN_LITTLE == 1
39
# define HASH_LITTLE_ENDIAN 1
40
# define HASH_BIG_ENDIAN 0
42
# define HASH_LITTLE_ENDIAN 0
43
# define HASH_BIG_ENDIAN 0
47
#define rot(x,k) (((x)<<(k)) ^ ((x)>>(32-(k))))
50
-------------------------------------------------------------------------------
51
mix -- mix 3 32-bit values reversibly.
53
This is reversible, so any information in (a,b,c) before mix() is
54
still in (a,b,c) after mix().
56
If four pairs of (a,b,c) inputs are run through mix(), or through
57
mix() in reverse, there are at least 32 bits of the output that
58
are sometimes the same for one pair and different for another pair.
60
* pairs that differed by one bit, by two bits, in any combination
61
of top bits of (a,b,c), or in any combination of bottom bits of
63
* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
64
the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
65
is commonly produced by subtraction) look like a single 1-bit
67
* the base values were pseudorandom, all zero but one bit set, or
68
all zero plus a counter that starts at zero.
70
Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
75
Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
76
for "differ" defined as + with a one-bit base and a two-bit delta. I
77
used http://burtleburtle.net/bob/hash/avalanche.html to choose
78
the operations, constants, and arrangements of the variables.
80
This does not achieve avalanche. There are input bits of (a,b,c)
81
that fail to affect some output bits of (a,b,c), especially of a. The
82
most thoroughly mixed value is c, but it doesn't really even achieve
85
This allows some parallelism. Read-after-writes are good at doubling
86
the number of bits affected, so the goal of mixing pulls in the opposite
87
direction as the goal of parallelism. I did what I could. Rotates
88
seem to cost as much as shifts on every machine I could lay my hands
89
on, and rotates are much kinder to the top and bottom bits, so I used
91
-------------------------------------------------------------------------------
95
a -= c; a ^= rot(c, 4); c += b; \
96
b -= a; b ^= rot(a, 6); a += c; \
97
c -= b; c ^= rot(b, 8); b += a; \
98
a -= c; a ^= rot(c,16); c += b; \
99
b -= a; b ^= rot(a,19); a += c; \
100
c -= b; c ^= rot(b, 4); b += a; \
104
-------------------------------------------------------------------------------
105
final -- final mixing of 3 32-bit values (a,b,c) into c
107
Pairs of (a,b,c) values differing in only a few bits will usually
108
produce values of c that look totally different. This was tested for
109
* pairs that differed by one bit, by two bits, in any combination
110
of top bits of (a,b,c), or in any combination of bottom bits of
112
* "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
113
the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
114
is commonly produced by subtraction) look like a single 1-bit
116
* the base values were pseudorandom, all zero but one bit set, or
117
all zero plus a counter that starts at zero.
119
These constants passed:
122
and these came close:
126
-------------------------------------------------------------------------------
128
#define final(a,b,c) \
130
c ^= b; c -= rot(b,14); \
131
a ^= c; a -= rot(c,11); \
132
b ^= a; b -= rot(a,25); \
133
c ^= b; c -= rot(b,16); \
134
a ^= c; a -= rot(c,4); \
135
b ^= a; b -= rot(a,14); \
136
c ^= b; c -= rot(b,24); \
139
#if HASH_LITTLE_ENDIAN == 1
141
const void *key, /* the key to hash */
142
size_t length, /* length of the key */
143
const uint32_t initval) /* initval */
145
uint32_t a,b,c; /* internal state */
146
union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
148
/* Set up the internal state */
149
a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
152
if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
153
const uint32_t *k = key; /* read 32-bit chunks */
156
#endif /* ifdef VALGRIND */
158
/*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
169
/*----------------------------- handle the last (probably partial) block */
171
* "k[2]&0xffffff" actually reads beyond the end of the string, but
172
* then masks off the part it's not allowed to read. Because the
173
* string is aligned, the masked-off tail is in the same word as the
174
* rest of the string. Every machine with memory protection I've seen
175
* does it on word boundaries, so is OK with this. But VALGRIND will
176
* still catch it and complain. The masking trick does make the hash
177
* noticably faster for short strings (like English words).
183
case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
184
case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
185
case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
186
case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
187
case 8 : b+=k[1]; a+=k[0]; break;
188
case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
189
case 6 : b+=k[1]&0xffff; a+=k[0]; break;
190
case 5 : b+=k[1]&0xff; a+=k[0]; break;
191
case 4 : a+=k[0]; break;
192
case 3 : a+=k[0]&0xffffff; break;
193
case 2 : a+=k[0]&0xffff; break;
194
case 1 : a+=k[0]&0xff; break;
195
case 0 : return c; /* zero length strings require no mixing */
198
#else /* make valgrind happy */
200
k8 = (const uint8_t *)k;
203
case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
204
case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
205
case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
206
case 9 : c+=k8[8]; /* fall through */
207
case 8 : b+=k[1]; a+=k[0]; break;
208
case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
209
case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
210
case 5 : b+=k8[4]; /* fall through */
211
case 4 : a+=k[0]; break;
212
case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
213
case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
214
case 1 : a+=k8[0]; break;
215
case 0 : return c; /* zero length strings require no mixing */
218
#endif /* !valgrind */
220
} else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
221
const uint16_t *k = key; /* read 16-bit chunks */
224
/*--------------- all but last block: aligned reads and different mixing */
227
a += k[0] + (((uint32_t)k[1])<<16);
228
b += k[2] + (((uint32_t)k[3])<<16);
229
c += k[4] + (((uint32_t)k[5])<<16);
235
/*----------------------------- handle the last (probably partial) block */
236
k8 = (const uint8_t *)k;
239
case 12: c+=k[4]+(((uint32_t)k[5])<<16);
240
b+=k[2]+(((uint32_t)k[3])<<16);
241
a+=k[0]+(((uint32_t)k[1])<<16);
243
case 11: c+=((uint32_t)k8[10])<<16; /* @fallthrough */
244
case 10: c+=k[4]; /* @fallthrough@ */
245
b+=k[2]+(((uint32_t)k[3])<<16);
246
a+=k[0]+(((uint32_t)k[1])<<16);
248
case 9 : c+=k8[8]; /* @fallthrough */
249
case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
250
a+=k[0]+(((uint32_t)k[1])<<16);
252
case 7 : b+=((uint32_t)k8[6])<<16; /* @fallthrough */
254
a+=k[0]+(((uint32_t)k[1])<<16);
256
case 5 : b+=k8[4]; /* @fallthrough */
257
case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
259
case 3 : a+=((uint32_t)k8[2])<<16; /* @fallthrough */
264
case 0 : return c; /* zero length strings require no mixing */
267
} else { /* need to read the key one byte at a time */
268
const uint8_t *k = key;
270
/*--------------- all but the last block: affect some 32 bits of (a,b,c) */
274
a += ((uint32_t)k[1])<<8;
275
a += ((uint32_t)k[2])<<16;
276
a += ((uint32_t)k[3])<<24;
278
b += ((uint32_t)k[5])<<8;
279
b += ((uint32_t)k[6])<<16;
280
b += ((uint32_t)k[7])<<24;
282
c += ((uint32_t)k[9])<<8;
283
c += ((uint32_t)k[10])<<16;
284
c += ((uint32_t)k[11])<<24;
290
/*-------------------------------- last block: affect all 32 bits of (c) */
291
switch(length) /* all the case statements fall through */
293
case 12: c+=((uint32_t)k[11])<<24;
294
case 11: c+=((uint32_t)k[10])<<16;
295
case 10: c+=((uint32_t)k[9])<<8;
297
case 8 : b+=((uint32_t)k[7])<<24;
298
case 7 : b+=((uint32_t)k[6])<<16;
299
case 6 : b+=((uint32_t)k[5])<<8;
301
case 4 : a+=((uint32_t)k[3])<<24;
302
case 3 : a+=((uint32_t)k[2])<<16;
303
case 2 : a+=((uint32_t)k[1])<<8;
306
case 0 : return c; /* zero length strings require no mixing */
311
return c; /* zero length strings require no mixing */
314
#elif HASH_BIG_ENDIAN == 1
317
* This is the same as hashword() on big-endian machines. It is different
318
* from hashlittle() on all machines. hashbig() takes advantage of
319
* big-endian byte ordering.
321
uint32_t hash( const void *key, size_t length, const uint32_t initval)
324
union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
326
/* Set up the internal state */
327
a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
330
if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
331
const uint32_t *k = key; /* read 32-bit chunks */
334
#endif /* ifdef VALGRIND */
336
/*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
347
/*----------------------------- handle the last (probably partial) block */
349
* "k[2]<<8" actually reads beyond the end of the string, but
350
* then shifts out the part it's not allowed to read. Because the
351
* string is aligned, the illegal read is in the same word as the
352
* rest of the string. Every machine with memory protection I've seen
353
* does it on word boundaries, so is OK with this. But VALGRIND will
354
* still catch it and complain. The masking trick does make the hash
355
* noticably faster for short strings (like English words).
361
case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
362
case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
363
case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
364
case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
365
case 8 : b+=k[1]; a+=k[0]; break;
366
case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
367
case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
368
case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
369
case 4 : a+=k[0]; break;
370
case 3 : a+=k[0]&0xffffff00; break;
371
case 2 : a+=k[0]&0xffff0000; break;
372
case 1 : a+=k[0]&0xff000000; break;
373
case 0 : return c; /* zero length strings require no mixing */
376
#else /* make valgrind happy */
378
k8 = (const uint8_t *)k;
379
switch(length) /* all the case statements fall through */
381
case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
382
case 11: c+=((uint32_t)k8[10])<<8; /* fall through */
383
case 10: c+=((uint32_t)k8[9])<<16; /* fall through */
384
case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */
385
case 8 : b+=k[1]; a+=k[0]; break;
386
case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */
387
case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */
388
case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */
389
case 4 : a+=k[0]; break;
390
case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */
391
case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */
392
case 1 : a+=((uint32_t)k8[0])<<24; break;
396
#endif /* !VALGRIND */
398
} else { /* need to read the key one byte at a time */
399
const uint8_t *k = key;
401
/*--------------- all but the last block: affect some 32 bits of (a,b,c) */
404
a += ((uint32_t)k[0])<<24;
405
a += ((uint32_t)k[1])<<16;
406
a += ((uint32_t)k[2])<<8;
407
a += ((uint32_t)k[3]);
408
b += ((uint32_t)k[4])<<24;
409
b += ((uint32_t)k[5])<<16;
410
b += ((uint32_t)k[6])<<8;
411
b += ((uint32_t)k[7]);
412
c += ((uint32_t)k[8])<<24;
413
c += ((uint32_t)k[9])<<16;
414
c += ((uint32_t)k[10])<<8;
415
c += ((uint32_t)k[11]);
421
/*-------------------------------- last block: affect all 32 bits of (c) */
422
switch(length) /* all the case statements fall through */
425
case 11: c+=((uint32_t)k[10])<<8;
426
case 10: c+=((uint32_t)k[9])<<16;
427
case 9 : c+=((uint32_t)k[8])<<24;
429
case 7 : b+=((uint32_t)k[6])<<8;
430
case 6 : b+=((uint32_t)k[5])<<16;
431
case 5 : b+=((uint32_t)k[4])<<24;
433
case 3 : a+=((uint32_t)k[2])<<8;
434
case 2 : a+=((uint32_t)k[1])<<16;
435
case 1 : a+=((uint32_t)k[0])<<24;
444
#else /* HASH_XXX_ENDIAN == 1 */
445
#error Must define HASH_BIG_ENDIAN or HASH_LITTLE_ENDIAN
446
#endif /* HASH_XXX_ENDIAN == 1 */
28
static pthread_cond_t maintenance_cond = PTHREAD_COND_INITIALIZER;
448
31
typedef unsigned long int ub4; /* unsigned 4-byte quantities */
449
32
typedef unsigned char ub1; /* unsigned 1-byte quantities */