2
* hash.c Non-thread-safe split-ordered hash table.
4
* The weird "reverse" function is based on an idea from
5
* "Split-Ordered Lists - Lock-free Resizable Hash Tables", with
6
* modifications so that they're not lock-free. :(
8
* However, the split-order idea allows a fast & easy splitting of the
9
* hash bucket chain when the hash table is resized.
11
* This implementation can do ~10^6 lookups/s, which should be fast
12
* enough for most people.
14
* Version: $Id: hash.c,v 1.9.2.8 2006/08/22 23:02:36 aland Exp $
16
* This library is free software; you can redistribute it and/or
17
* modify it under the terms of the GNU Lesser General Public
18
* License as published by the Free Software Foundation; either
19
* version 2.1 of the License, or (at your option) any later version.
21
* This library is distributed in the hope that it will be useful,
22
* but WITHOUT ANY WARRANTY; without even the implied warranty of
23
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24
* Lesser General Public License for more details.
26
* You should have received a copy of the GNU Lesser General Public
27
* License along with this library; if not, write to the Free Software
28
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
30
* Copyright 2005 The FreeRADIUS server project
33
static const char rcsid[] = "$Id: hash.c,v 1.9.2.8 2006/08/22 23:02:36 aland Exp $";
41
#include "libradius.h"
44
* A reasonable number of buckets to start off with.
45
* Should be a power of two.
47
#define LRAD_HASH_NUM_BUCKETS (64)
49
typedef struct lrad_hash_entry_t {
50
struct lrad_hash_entry_t *next;
57
struct lrad_hash_table_t {
59
int num_buckets; /* power of 2 */
63
lrad_hash_table_free_t free;
64
lrad_hash_table_hash_t hash;
65
lrad_hash_table_cmp_t cmp;
67
lrad_hash_entry_t null;
69
lrad_hash_entry_t **buckets;
77
* perl -e 'foreach $i (0..255) {$r = 0; foreach $j (0 .. 7 ) { if (($i & ( 1<< $j)) != 0) { $r |= (1 << (7 - $j));}} print $r, ", ";if (($i & 7) == 7) {print "\n";}}'
79
static const uint8_t reversed_byte[256] = {
80
0, 128, 64, 192, 32, 160, 96, 224,
81
16, 144, 80, 208, 48, 176, 112, 240,
82
8, 136, 72, 200, 40, 168, 104, 232,
83
24, 152, 88, 216, 56, 184, 120, 248,
84
4, 132, 68, 196, 36, 164, 100, 228,
85
20, 148, 84, 212, 52, 180, 116, 244,
86
12, 140, 76, 204, 44, 172, 108, 236,
87
28, 156, 92, 220, 60, 188, 124, 252,
88
2, 130, 66, 194, 34, 162, 98, 226,
89
18, 146, 82, 210, 50, 178, 114, 242,
90
10, 138, 74, 202, 42, 170, 106, 234,
91
26, 154, 90, 218, 58, 186, 122, 250,
92
6, 134, 70, 198, 38, 166, 102, 230,
93
22, 150, 86, 214, 54, 182, 118, 246,
94
14, 142, 78, 206, 46, 174, 110, 238,
95
30, 158, 94, 222, 62, 190, 126, 254,
96
1, 129, 65, 193, 33, 161, 97, 225,
97
17, 145, 81, 209, 49, 177, 113, 241,
98
9, 137, 73, 201, 41, 169, 105, 233,
99
25, 153, 89, 217, 57, 185, 121, 249,
100
5, 133, 69, 197, 37, 165, 101, 229,
101
21, 149, 85, 213, 53, 181, 117, 245,
102
13, 141, 77, 205, 45, 173, 109, 237,
103
29, 157, 93, 221, 61, 189, 125, 253,
104
3, 131, 67, 195, 35, 163, 99, 227,
105
19, 147, 83, 211, 51, 179, 115, 243,
106
11, 139, 75, 203, 43, 171, 107, 235,
107
27, 155, 91, 219, 59, 187, 123, 251,
108
7, 135, 71, 199, 39, 167, 103, 231,
109
23, 151, 87, 215, 55, 183, 119, 247,
110
15, 143, 79, 207, 47, 175, 111, 239,
111
31, 159, 95, 223, 63, 191, 127, 255
116
* perl -e 'foreach $i (0..255) {$r = 0;foreach $j (0 .. 7) { $r = $i & (1 << (7 - $j)); last if ($r)} print $i & ~($r), ", ";if (($i & 7) == 7) {print "\n";}}'
118
static uint8_t parent_byte[256] = {
119
0, 0, 0, 1, 0, 1, 2, 3,
120
0, 1, 2, 3, 4, 5, 6, 7,
121
0, 1, 2, 3, 4, 5, 6, 7,
122
8, 9, 10, 11, 12, 13, 14, 15,
123
0, 1, 2, 3, 4, 5, 6, 7,
124
8, 9, 10, 11, 12, 13, 14, 15,
125
16, 17, 18, 19, 20, 21, 22, 23,
126
24, 25, 26, 27, 28, 29, 30, 31,
127
0, 1, 2, 3, 4, 5, 6, 7,
128
8, 9, 10, 11, 12, 13, 14, 15,
129
16, 17, 18, 19, 20, 21, 22, 23,
130
24, 25, 26, 27, 28, 29, 30, 31,
131
32, 33, 34, 35, 36, 37, 38, 39,
132
40, 41, 42, 43, 44, 45, 46, 47,
133
48, 49, 50, 51, 52, 53, 54, 55,
134
56, 57, 58, 59, 60, 61, 62, 63,
135
0, 1, 2, 3, 4, 5, 6, 7,
136
8, 9, 10, 11, 12, 13, 14, 15,
137
16, 17, 18, 19, 20, 21, 22, 23,
138
24, 25, 26, 27, 28, 29, 30, 31,
139
32, 33, 34, 35, 36, 37, 38, 39,
140
40, 41, 42, 43, 44, 45, 46, 47,
141
48, 49, 50, 51, 52, 53, 54, 55,
142
56, 57, 58, 59, 60, 61, 62, 63,
143
64, 65, 66, 67, 68, 69, 70, 71,
144
72, 73, 74, 75, 76, 77, 78, 79,
145
80, 81, 82, 83, 84, 85, 86, 87,
146
88, 89, 90, 91, 92, 93, 94, 95,
147
96, 97, 98, 99, 100, 101, 102, 103,
148
104, 105, 106, 107, 108, 109, 110, 111,
149
112, 113, 114, 115, 116, 117, 118, 119,
150
120, 121, 122, 123, 124, 125, 126, 127
157
static uint32_t reverse(uint32_t key)
159
return ((reversed_byte[key & 0xff] << 24) |
160
(reversed_byte[(key >> 8) & 0xff] << 16) |
161
(reversed_byte[(key >> 16) & 0xff] << 8) |
162
(reversed_byte[(key >> 24) & 0xff]));
166
* Take the parent by discarding the highest bit that is set.
168
static uint32_t parent_of(uint32_t key)
170
if (key > 0x00ffffff)
171
return (key & 0x00ffffff) | (parent_byte[key >> 24] << 24);
173
if (key > 0x0000ffff)
174
return (key & 0x0000ffff) | (parent_byte[key >> 16] << 16);
176
if (key > 0x000000ff)
177
return (key & 0x000000ff) | (parent_byte[key >> 8] << 8);
179
return parent_byte[key];
183
static lrad_hash_entry_t *list_find(lrad_hash_table_t *ht,
184
lrad_hash_entry_t *head,
188
lrad_hash_entry_t *cur;
190
for (cur = head; cur != &ht->null; cur = cur->next) {
191
if (cur->reversed == reversed) {
193
int cmp = ht->cmp(data, cur->data);
195
if (cmp < 0) continue;
199
if (cur->reversed > reversed) break;
207
* Inserts a new entry into the list, in order.
209
static int list_insert(lrad_hash_table_t *ht,
210
lrad_hash_entry_t **head, lrad_hash_entry_t *node)
212
lrad_hash_entry_t **last, *cur;
216
for (cur = *head; cur != &ht->null; cur = cur->next) {
217
if (cur->reversed > node->reversed) break;
220
if (cur->reversed == node->reversed) {
222
int cmp = ht->cmp(node->data, cur->data);
224
if (cmp < 0) continue;
238
* Delete an entry from the list.
240
static int list_delete(lrad_hash_table_t *ht,
241
lrad_hash_entry_t **head, lrad_hash_entry_t *node)
243
lrad_hash_entry_t **last, *cur;
247
for (cur = *head; cur != &ht->null; cur = cur->next) {
250
int cmp = ht->cmp(node->data, cur->data);
252
if (cmp < 0) continue;
267
* Memory usage in bytes is (20/3) * number of entries.
269
lrad_hash_table_t *lrad_hash_table_create(lrad_hash_table_hash_t hashNode,
270
lrad_hash_table_cmp_t cmpNode,
271
lrad_hash_table_free_t freeNode)
273
lrad_hash_table_t *ht;
275
if (!hashNode) return NULL;
277
ht = malloc(sizeof(*ht));
278
if (!ht) return NULL;
280
memset(ht, 0, sizeof(*ht));
284
ht->num_buckets = LRAD_HASH_NUM_BUCKETS;
285
ht->mask = ht->num_buckets - 1;
288
* Have a default load factor of 2.5. In practice this
289
* means that the average load will hit 3 before the
292
ht->next_grow = (ht->num_buckets << 1) + (ht->num_buckets >> 1);
294
ht->buckets = malloc(sizeof(*ht->buckets) * ht->num_buckets);
299
memset(ht->buckets, 0, sizeof(*ht->buckets) * ht->num_buckets);
301
ht->null.reversed = ~0;
303
ht->null.next = &ht->null;
305
ht->buckets[0] = &ht->null;
312
* If the current bucket is uninitialized, initialize it
313
* by recursively copying information from the parent.
315
* We may have a situation where entry E is a parent to 2 other
316
* entries E' and E". If we split E into E and E', then the
317
* nodes meant for E" end up in E or E', either of which is
318
* wrong. To solve that problem, we walk down the whole chain,
319
* inserting the elements into the correct place.
321
static void lrad_hash_table_fixup(lrad_hash_table_t *ht, uint32_t entry)
323
uint32_t parent_entry = parent_of(entry);
324
lrad_hash_entry_t **last, *cur;
327
parent_entry = parent_of(entry);
329
/* parent_entry == entry if and only if entry == 0 */
331
if (!ht->buckets[parent_entry]) {
332
lrad_hash_table_fixup(ht, parent_entry);
336
* Keep walking down cur, trying to find entries that
337
* don't belong here any more. There may be multiple
338
* ones, so we can't have a naive algorithm...
340
last = &ht->buckets[parent_entry];
343
for (cur = *last; cur != &ht->null; cur = cur->next) {
346
real_entry = cur->key & ht->mask;
347
if (real_entry != this) { /* ht->buckets[real_entry] == NULL */
349
ht->buckets[real_entry] = cur;
357
* We may NOT have initialized this bucket, so do it now.
359
if (!ht->buckets[entry]) ht->buckets[entry] = &ht->null;
363
* This should be a power of two. Changing it to 4 doesn't seem
364
* to make any difference.
366
#define GROW_FACTOR (2)
369
* Grow the hash table.
371
static void lrad_hash_table_grow(lrad_hash_table_t *ht)
373
lrad_hash_entry_t **buckets;
375
buckets = malloc(sizeof(*buckets) * GROW_FACTOR * ht->num_buckets);
376
if (!buckets) return;
378
memcpy(buckets, ht->buckets,
379
sizeof(*buckets) * ht->num_buckets);
380
memset(&buckets[ht->num_buckets], 0,
381
sizeof(*buckets) * ht->num_buckets);
384
ht->buckets = buckets;
385
ht->num_buckets *= GROW_FACTOR;
386
ht->next_grow *= GROW_FACTOR;
387
ht->mask = ht->num_buckets - 1;
390
fprintf(stderr, "GROW TO %d\n", ht->num_buckets);
398
int lrad_hash_table_insert(lrad_hash_table_t *ht, void *data)
403
lrad_hash_entry_t *node;
405
if (!ht || !data) return 0;
407
key = ht->hash(data);
408
entry = key & ht->mask;
409
reversed = reverse(key);
411
if (!ht->buckets[entry]) lrad_hash_table_fixup(ht, entry);
414
* If we try to do our own memory allocation here, the
415
* speedup is only ~15% or so, which isn't worth it.
417
node = malloc(sizeof(*node));
419
memset(node, 0, sizeof(*node));
421
node->next = &ht->null;
422
node->reversed = reversed;
426
/* already in the table, can't insert it */
427
if (!list_insert(ht, &ht->buckets[entry], node)) {
433
* Check the load factor, and grow the table if
437
if (ht->num_elements >= ht->next_grow) {
438
lrad_hash_table_grow(ht);
446
* Internal find a node routine.
448
static lrad_hash_entry_t *lrad_hash_table_find(lrad_hash_table_t *ht,
455
if (!ht) return NULL;
457
key = ht->hash(data);
458
entry = key & ht->mask;
459
reversed = reverse(key);
461
if (!ht->buckets[entry]) lrad_hash_table_fixup(ht, entry);
463
return list_find(ht, ht->buckets[entry], reversed, data);
468
* Replace old data with new data, OR insert if there is no old.
470
int lrad_hash_table_replace(lrad_hash_table_t *ht, void *data)
472
lrad_hash_entry_t *node;
474
if (!ht || !data) return 0;
476
node = lrad_hash_table_find(ht, data);
477
if (!node) return lrad_hash_table_insert(ht, data);
479
if (ht->free) ht->free(node->data);
487
* Find data from a template
489
void *lrad_hash_table_finddata(lrad_hash_table_t *ht, const void *data)
491
lrad_hash_entry_t *node;
493
node = lrad_hash_table_find(ht, data);
494
if (!node) return NULL;
502
* Yank an entry from the hash table, without freeing the data.
504
void *lrad_hash_table_yank(lrad_hash_table_t *ht, const void *data)
510
lrad_hash_entry_t *node;
512
if (!ht) return NULL;
514
key = ht->hash(data);
515
entry = key & ht->mask;
516
reversed = reverse(key);
518
if (!ht->buckets[entry]) lrad_hash_table_fixup(ht, entry);
520
node = list_find(ht, ht->buckets[entry], reversed, data);
521
if (!node) return NULL;
523
list_delete(ht, &ht->buckets[entry], node);
534
* Delete a piece of data from the hash table.
536
int lrad_hash_table_delete(lrad_hash_table_t *ht, const void *data)
540
old = lrad_hash_table_yank(ht, data);
543
if (ht->free) ht->free(old);
552
void lrad_hash_table_free(lrad_hash_table_t *ht)
554
lrad_hash_entry_t *node, *next;
559
* The entries MUST be all in one linked list.
561
for (node = ht->buckets[0]; node != &ht->null; node = next) {
564
if (!node->data) continue; /* dummy entry */
566
if (ht->free) ht->free(node->data);
576
* Count number of elements
578
int lrad_hash_table_num_elements(lrad_hash_table_t *ht)
582
return ht->num_elements;
587
* Walk over the nodes, allowing deletes & inserts to happen.
589
int lrad_hash_table_walk(lrad_hash_table_t *ht,
590
lrad_hash_table_walk_t callback,
595
if (!ht || !callback) return 0;
597
for (i = ht->num_buckets - 1; i >= 0; i--) {
598
lrad_hash_entry_t *node, *next;
601
* Ensure that the current bucket is filled.
603
if (!ht->buckets[i]) lrad_hash_table_fixup(ht, i);
605
for (node = ht->buckets[i]; node != &ht->null; node = next) {
608
rcode = callback(context, node->data);
609
if (rcode != 0) return rcode;
619
* Show what the hash table is doing.
621
int lrad_hash_table_info(lrad_hash_table_t *ht)
623
int i, a, collisions, uninitialized;
628
uninitialized = collisions = 0;
629
memset(array, 0, sizeof(array));
631
for (i = 0; i < ht->num_buckets; i++) {
634
lrad_hash_entry_t *node, *next;
637
* If we haven't inserted or looked up an entry
638
* in a bucket, it's uninitialized.
640
if (!ht->buckets[i]) {
647
for (node = ht->buckets[i]; node != &ht->null; node = next) {
648
if (node->reversed == key) {
651
key = node->reversed;
657
if (load > 255) load = 255;
661
printf("HASH TABLE %p\tbuckets: %d\t(%d uninitialized)\n", ht,
662
ht->num_buckets, uninitialized);
663
printf("\tnum entries %d\thash collisions %d\n",
664
ht->num_elements, collisions);
667
for (i = 1; i < 256; i++) {
668
if (!array[i]) continue;
669
printf("%d\t%d\n", i, array[i]);
672
* Since the entries are ordered, the lookup cost
673
* for any one element in a chain is (on average)
674
* the cost of walking half of the chain.
683
printf("\texpected lookup cost = %d/%d or %f\n\n",
685
(float) ht->num_elements / (float) a);
692
#define FNV_MAGIC_INIT (0x811c9dc5)
693
#define FNV_MAGIC_PRIME (0x01000193)
696
* A fast hash function. For details, see:
698
* http://www.isthe.com/chongo/tech/comp/fnv/
700
* Which also includes public domain source. We've re-written
701
* it here for our purposes.
703
uint32_t lrad_hash(const void *data, size_t size)
705
const uint8_t *p = data;
706
const uint8_t *q = p + size;
707
uint32_t hash = FNV_MAGIC_INIT;
710
* FNV-1 hash each octet in the buffer
714
* Multiple by 32-bit magic FNV prime, mod 2^32
716
hash *= FNV_MAGIC_PRIME;
719
* Potential optimization.
721
hash += (hash<<1) + (hash<<4) + (hash<<7) + (hash<<8) + (hash<<24);
724
* XOR the 8-bit quantity into the bottom of
727
hash ^= (uint32_t) (*p++);
734
* Continue hashing data.
736
uint32_t lrad_hash_update(const void *data, size_t size, uint32_t hash)
738
const uint8_t *p = data;
739
const uint8_t *q = p + size;
742
hash *= FNV_MAGIC_PRIME;
743
hash ^= (uint32_t) (*p++);
751
* Return a "folded" hash, where the lower "bits" are the
752
* hash, and the upper bits are zero.
754
* If you need a non-power-of-two hash, cope.
756
uint32_t lrad_hash_fold(uint32_t hash, int bits)
761
if ((bits <= 0) || (bits >= 32)) return hash;
766
* Never use the same bits twice in an xor.
768
for (count = 0; count < 32; count += bits) {
773
return result & (((uint32_t) (1 << bits)) - 1);
778
* Hash a C string, so we loop over it once.
780
uint32_t lrad_hash_string(const char *p)
782
uint32_t hash = FNV_MAGIC_INIT;
785
hash *= FNV_MAGIC_PRIME;
786
hash ^= (uint32_t) (*p++);
795
* cc -DTESTING -I .. hash.c -o hash
803
static uint32_t hash_int(const void *data)
805
return lrad_hash((int *) data, sizeof(int));
808
#define MAX 1024*1024
809
int main(int argc, char **argv)
812
lrad_hash_table_t *ht;
815
ht = lrad_hash_table_create(hash_int, NULL, NULL);
817
fprintf(stderr, "Hash create failed\n");
821
array = malloc(sizeof(int) * MAX);
823
for (i = 0; i < MAX; i++) {
827
if (!lrad_hash_table_insert(ht, p)) {
828
fprintf(stderr, "Failed insert %08x\n", i);
832
q = lrad_hash_table_finddata(ht, p);
834
fprintf(stderr, "Bad data %d\n", i);
840
lrad_hash_table_info(ht);
843
* Build this to see how lookups result in shortening
844
* of the hash chains.
847
for (i = 0; i < MAX ; i++) {
848
q = lrad_hash_table_finddata(ht, &i);
850
fprintf(stderr, "Failed finding %d\n", i);
855
if (!lrad_hash_table_delete(ht, &i)) {
856
fprintf(stderr, "Failed deleting %d\n", i);
859
q = lrad_hash_table_finddata(ht, &i);
861
fprintf(stderr, "Failed to delete %08x\n", i);
867
lrad_hash_table_info(ht);
870
lrad_hash_table_free(ht);