1
/* An expandable hash tables datatype.
2
Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
3
Contributed by Vladimir Makarov (vmakarov@cygnus.com).
5
This file is part of the libiberty library.
6
Libiberty is free software; you can redistribute it and/or
7
modify it under the terms of the GNU Library General Public
8
License as published by the Free Software Foundation; either
9
version 2 of the License, or (at your option) any later version.
11
Libiberty is distributed in the hope that it will be useful,
12
but WITHOUT ANY WARRANTY; without even the implied warranty of
13
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
Library General Public License for more details.
16
You should have received a copy of the GNU Library General Public
17
License along with libiberty; see the file COPYING.LIB. If
18
not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19
Boston, MA 02111-1307, USA. */
21
/* This package implements basic hash table functionality. It is possible
22
to search for an entry, create an entry and destroy an entry.
24
Elements in the table are generic pointers.
26
The size of the table is not fixed; if the occupancy of the table
27
grows too high the hash table will be expanded.
29
The abstract data implementation is based on generalized Algorithm D
30
from Knuth's book "The art of computer programming". Hash table is
31
expanded by creation of new hash table and transferring elements from
32
the old table to the new table. */
38
#include <sys/types.h>
54
#include "libiberty.h"
57
/* This macro defines reserved value for empty table entry. */
59
#define EMPTY_ENTRY ((PTR) 0)
61
/* This macro defines reserved value for table entry which contained
64
#define DELETED_ENTRY ((PTR) 1)
66
static unsigned long higher_prime_number PARAMS ((unsigned long));
67
static hashval_t hash_pointer PARAMS ((const void *));
68
static int eq_pointer PARAMS ((const void *, const void *));
69
static int htab_expand PARAMS ((htab_t));
70
static PTR *find_empty_slot_for_expand PARAMS ((htab_t, hashval_t));
72
/* At some point, we could make these be NULL, and modify the
73
hash-table routines to handle NULL specially; that would avoid
74
function-call overhead for the common case of hashing pointers. */
75
htab_hash htab_hash_pointer = hash_pointer;
76
htab_eq htab_eq_pointer = eq_pointer;
78
/* The following function returns a nearest prime number which is
79
greater than N, and near a power of two. */
82
higher_prime_number (n)
85
/* These are primes that are near, but slightly smaller than, a
87
static const unsigned long primes[] = {
99
(unsigned long) 16381,
100
(unsigned long) 32749,
101
(unsigned long) 65521,
102
(unsigned long) 131071,
103
(unsigned long) 262139,
104
(unsigned long) 524287,
105
(unsigned long) 1048573,
106
(unsigned long) 2097143,
107
(unsigned long) 4194301,
108
(unsigned long) 8388593,
109
(unsigned long) 16777213,
110
(unsigned long) 33554393,
111
(unsigned long) 67108859,
112
(unsigned long) 134217689,
113
(unsigned long) 268435399,
114
(unsigned long) 536870909,
115
(unsigned long) 1073741789,
116
(unsigned long) 2147483647,
118
((unsigned long) 2147483647) + ((unsigned long) 2147483644),
121
const unsigned long *low = &primes[0];
122
const unsigned long *high = &primes[sizeof(primes) / sizeof(primes[0])];
126
const unsigned long *mid = low + (high - low) / 2;
133
/* If we've run out of primes, abort. */
136
fprintf (stderr, "Cannot find prime bigger than %lu\n", n);
143
/* Returns a hash code for P. */
149
return (hashval_t) ((long)p >> 3);
152
/* Returns non-zero if P1 and P2 are equal. */
162
/* This function creates table with length slightly longer than given
163
source length. Created hash table is initiated as empty (all the
164
hash table entries are EMPTY_ENTRY). The function returns the
165
created hash table, or NULL if memory allocation fails. */
168
htab_create_alloc (size, hash_f, eq_f, del_f, alloc_f, free_f)
178
size = higher_prime_number (size);
179
result = (htab_t) (*alloc_f) (1, sizeof (struct htab));
182
result->entries = (PTR *) (*alloc_f) (size, sizeof (PTR));
183
if (result->entries == NULL)
190
result->hash_f = hash_f;
192
result->del_f = del_f;
193
result->alloc_f = alloc_f;
194
result->free_f = free_f;
198
/* As above, but use the variants of alloc_f and free_f which accept
199
an extra argument. */
202
htab_create_alloc_ex (size, hash_f, eq_f, del_f, alloc_arg, alloc_f,
209
htab_alloc_with_arg alloc_f;
210
htab_free_with_arg free_f;
214
size = higher_prime_number (size);
215
result = (htab_t) (*alloc_f) (alloc_arg, 1, sizeof (struct htab));
218
result->entries = (PTR *) (*alloc_f) (alloc_arg, size, sizeof (PTR));
219
if (result->entries == NULL)
222
(*free_f) (alloc_arg, result);
226
result->hash_f = hash_f;
228
result->del_f = del_f;
229
result->alloc_arg = alloc_arg;
230
result->alloc_with_arg_f = alloc_f;
231
result->free_with_arg_f = free_f;
235
/* Update the function pointers and allocation parameter in the htab_t. */
238
htab_set_functions_ex (htab, hash_f, eq_f, del_f, alloc_arg, alloc_f, free_f)
244
htab_alloc_with_arg alloc_f;
245
htab_free_with_arg free_f;
247
htab->hash_f = hash_f;
250
htab->alloc_arg = alloc_arg;
251
htab->alloc_with_arg_f = alloc_f;
252
htab->free_with_arg_f = free_f;
255
/* These functions exist solely for backward compatibility. */
259
htab_create (size, hash_f, eq_f, del_f)
265
return htab_create_alloc (size, hash_f, eq_f, del_f, xcalloc, free);
269
htab_try_create (size, hash_f, eq_f, del_f)
275
return htab_create_alloc (size, hash_f, eq_f, del_f, calloc, free);
278
/* This function frees all memory allocated for given hash table.
279
Naturally the hash table must already exist. */
288
for (i = htab->size - 1; i >= 0; i--)
289
if (htab->entries[i] != EMPTY_ENTRY
290
&& htab->entries[i] != DELETED_ENTRY)
291
(*htab->del_f) (htab->entries[i]);
293
if (htab->free_f != NULL)
295
(*htab->free_f) (htab->entries);
296
(*htab->free_f) (htab);
298
else if (htab->free_with_arg_f != NULL)
300
(*htab->free_with_arg_f) (htab->alloc_arg, htab->entries);
301
(*htab->free_with_arg_f) (htab->alloc_arg, htab);
305
/* This function clears all entries in the given hash table. */
314
for (i = htab->size - 1; i >= 0; i--)
315
if (htab->entries[i] != EMPTY_ENTRY
316
&& htab->entries[i] != DELETED_ENTRY)
317
(*htab->del_f) (htab->entries[i]);
319
memset (htab->entries, 0, htab->size * sizeof (PTR));
322
/* Similar to htab_find_slot, but without several unwanted side effects:
323
- Does not call htab->eq_f when it finds an existing entry.
324
- Does not change the count of elements/searches/collisions in the
326
This function also assumes there are no deleted entries in the table.
327
HASH is the hash value for the element to be inserted. */
330
find_empty_slot_for_expand (htab, hash)
334
size_t size = htab->size;
335
unsigned int index = hash % size;
336
PTR *slot = htab->entries + index;
339
if (*slot == EMPTY_ENTRY)
341
else if (*slot == DELETED_ENTRY)
344
hash2 = 1 + hash % (size - 2);
351
slot = htab->entries + index;
352
if (*slot == EMPTY_ENTRY)
354
else if (*slot == DELETED_ENTRY)
359
/* The following function changes size of memory allocated for the
360
entries and repeatedly inserts the table elements. The occupancy
361
of the table after the call will be about 50%. Naturally the hash
362
table must already exist. Remember also that the place of the
363
table entries is changed. If memory allocation failures are allowed,
364
this function will return zero, indicating that the table could not be
365
expanded. If all goes well, it will return a non-zero value. */
377
oentries = htab->entries;
378
olimit = oentries + htab->size;
380
/* Resize only when table after removal of unused elements is either
381
too full or too empty. */
382
if ((htab->n_elements - htab->n_deleted) * 2 > htab->size
383
|| ((htab->n_elements - htab->n_deleted) * 8 < htab->size
385
nsize = higher_prime_number ((htab->n_elements - htab->n_deleted) * 2);
389
if (htab->alloc_with_arg_f != NULL)
390
nentries = (PTR *) (*htab->alloc_with_arg_f) (htab->alloc_arg, nsize,
393
nentries = (PTR *) (*htab->alloc_f) (nsize, sizeof (PTR *));
394
if (nentries == NULL)
396
htab->entries = nentries;
399
htab->n_elements -= htab->n_deleted;
407
if (x != EMPTY_ENTRY && x != DELETED_ENTRY)
409
PTR *q = find_empty_slot_for_expand (htab, (*htab->hash_f) (x));
418
if (htab->free_f != NULL)
419
(*htab->free_f) (oentries);
420
else if (htab->free_with_arg_f != NULL)
421
(*htab->free_with_arg_f) (htab->alloc_arg, oentries);
425
/* This function searches for a hash table entry equal to the given
426
element. It cannot be used to insert or delete an element. */
429
htab_find_with_hash (htab, element, hash)
443
entry = htab->entries[index];
444
if (entry == EMPTY_ENTRY
445
|| (entry != DELETED_ENTRY && (*htab->eq_f) (entry, element)))
448
hash2 = 1 + hash % (size - 2);
457
entry = htab->entries[index];
458
if (entry == EMPTY_ENTRY
459
|| (entry != DELETED_ENTRY && (*htab->eq_f) (entry, element)))
464
/* Like htab_find_slot_with_hash, but compute the hash value from the
468
htab_find (htab, element)
472
return htab_find_with_hash (htab, element, (*htab->hash_f) (element));
475
/* This function searches for a hash table slot containing an entry
476
equal to the given element. To delete an entry, call this with
477
INSERT = 0, then call htab_clear_slot on the slot returned (possibly
478
after doing some checks). To insert an entry, call this with
479
INSERT = 1, then write the value you want into the returned slot.
480
When inserting an entry, NULL may be returned if memory allocation
484
htab_find_slot_with_hash (htab, element, hash, insert)
488
enum insert_option insert;
490
PTR *first_deleted_slot;
496
if (insert == INSERT && htab->size * 3 <= htab->n_elements * 4
497
&& htab_expand (htab) == 0)
504
first_deleted_slot = NULL;
506
entry = htab->entries[index];
507
if (entry == EMPTY_ENTRY)
509
else if (entry == DELETED_ENTRY)
510
first_deleted_slot = &htab->entries[index];
511
else if ((*htab->eq_f) (entry, element))
512
return &htab->entries[index];
514
hash2 = 1 + hash % (size - 2);
522
entry = htab->entries[index];
523
if (entry == EMPTY_ENTRY)
525
else if (entry == DELETED_ENTRY)
527
if (!first_deleted_slot)
528
first_deleted_slot = &htab->entries[index];
530
else if ((*htab->eq_f) (entry, element))
531
return &htab->entries[index];
535
if (insert == NO_INSERT)
540
if (first_deleted_slot)
542
*first_deleted_slot = EMPTY_ENTRY;
543
return first_deleted_slot;
546
return &htab->entries[index];
549
/* Like htab_find_slot_with_hash, but compute the hash value from the
553
htab_find_slot (htab, element, insert)
556
enum insert_option insert;
558
return htab_find_slot_with_hash (htab, element, (*htab->hash_f) (element),
562
/* This function deletes an element with the given value from hash
563
table. If there is no matching element in the hash table, this
564
function does nothing. */
567
htab_remove_elt (htab, element)
573
slot = htab_find_slot (htab, element, NO_INSERT);
574
if (*slot == EMPTY_ENTRY)
578
(*htab->del_f) (*slot);
580
*slot = DELETED_ENTRY;
584
/* This function clears a specified slot in a hash table. It is
585
useful when you've already done the lookup and don't want to do it
589
htab_clear_slot (htab, slot)
593
if (slot < htab->entries || slot >= htab->entries + htab->size
594
|| *slot == EMPTY_ENTRY || *slot == DELETED_ENTRY)
598
(*htab->del_f) (*slot);
600
*slot = DELETED_ENTRY;
604
/* This function scans over the entire hash table calling
605
CALLBACK for each live entry. If CALLBACK returns false,
606
the iteration stops. INFO is passed as CALLBACK's second
610
htab_traverse_noresize (htab, callback, info)
618
slot = htab->entries;
619
limit = slot + htab->size;
625
if (x != EMPTY_ENTRY && x != DELETED_ENTRY)
626
if (!(*callback) (slot, info))
629
while (++slot < limit);
632
/* Like htab_traverse_noresize, but does resize the table when it is
633
too empty to improve effectivity of subsequent calls. */
636
htab_traverse (htab, callback, info)
641
if ((htab->n_elements - htab->n_deleted) * 8 < htab->size)
644
htab_traverse_noresize (htab, callback, info);
647
/* Return the current size of given hash table. */
656
/* Return the current number of elements in given hash table. */
662
return htab->n_elements - htab->n_deleted;
665
/* Return the fraction of fixed collisions during all work with given
669
htab_collisions (htab)
672
if (htab->searches == 0)
675
return (double) htab->collisions / (double) htab->searches;
678
/* Hash P as a null-terminated string.
680
Copied from gcc/hashtable.c. Zack had the following to say with respect
681
to applicability, though note that unlike hashtable.c, this hash table
682
implementation re-hashes rather than chain buckets.
684
http://gcc.gnu.org/ml/gcc-patches/2001-08/msg01021.html
685
From: Zack Weinberg <zackw@panix.com>
686
Date: Fri, 17 Aug 2001 02:15:56 -0400
688
I got it by extracting all the identifiers from all the source code
689
I had lying around in mid-1999, and testing many recurrences of
690
the form "H_n = H_{n-1} * K + c_n * L + M" where K, L, M were either
691
prime numbers or the appropriate identity. This was the best one.
692
I don't remember exactly what constituted "best", except I was
693
looking at bucket-length distributions mostly.
695
So it should be very good at hashing identifiers, but might not be
696
as good at arbitrary strings.
698
I'll add that it thoroughly trounces the hash functions recommended
699
for this use at http://burtleburtle.net/bob/hash/index.html, both
700
on speed and bucket distribution. I haven't tried it against the
701
function they just started using for Perl's hashes. */
707
const unsigned char *str = (const unsigned char *) p;
711
while ((c = *str++) != 0)
712
r = r * 67 + c - 113;
718
--------------------------------------------------------------------
719
lookup2.c, by Bob Jenkins, December 1996, Public Domain.
720
hash(), hash2(), hash3, and mix() are externally useful functions.
721
Routines to test the hash are included if SELF_TEST is defined.
722
You can use this free for any purpose. It has no warranty.
723
--------------------------------------------------------------------
727
--------------------------------------------------------------------
728
mix -- mix 3 32-bit values reversibly.
729
For every delta with one or two bit set, and the deltas of all three
730
high bits or all three low bits, whether the original value of a,b,c
731
is almost all zero or is uniformly distributed,
732
* If mix() is run forward or backward, at least 32 bits in a,b,c
733
have at least 1/4 probability of changing.
734
* If mix() is run forward, every bit of c will change between 1/3 and
735
2/3 of the time. (Well, 22/100 and 78/100 for some 2-bit deltas.)
736
mix() was built out of 36 single-cycle latency instructions in a
737
structure that could supported 2x parallelism, like so:
745
Unfortunately, superscalar Pentiums and Sparcs can't take advantage
746
of that parallelism. They've also turned some of those single-cycle
747
latency instructions into multi-cycle latency instructions. Still,
748
this is the fastest good hash I could find. There were about 2^^68
749
to choose from. I only looked at a billion or so.
750
--------------------------------------------------------------------
752
/* same, but slower, works on systems that might have 8 byte hashval_t's */
755
a -= b; a -= c; a ^= (c>>13); \
756
b -= c; b -= a; b ^= (a<< 8); \
757
c -= a; c -= b; c ^= ((b&0xffffffff)>>13); \
758
a -= b; a -= c; a ^= ((c&0xffffffff)>>12); \
759
b -= c; b -= a; b = (b ^ (a<<16)) & 0xffffffff; \
760
c -= a; c -= b; c = (c ^ (b>> 5)) & 0xffffffff; \
761
a -= b; a -= c; a = (a ^ (c>> 3)) & 0xffffffff; \
762
b -= c; b -= a; b = (b ^ (a<<10)) & 0xffffffff; \
763
c -= a; c -= b; c = (c ^ (b>>15)) & 0xffffffff; \
767
--------------------------------------------------------------------
768
hash() -- hash a variable-length key into a 32-bit value
769
k : the key (the unaligned variable-length array of bytes)
770
len : the length of the key, counting by bytes
771
level : can be any 4-byte value
772
Returns a 32-bit value. Every bit of the key affects every bit of
773
the return value. Every 1-bit and 2-bit delta achieves avalanche.
774
About 36+6len instructions.
776
The best hash table sizes are powers of 2. There is no need to do
777
mod a prime (mod is sooo slow!). If you need less than 32 bits,
778
use a bitmask. For example, if you need only 10 bits, do
779
h = (h & hashmask(10));
780
In which case, the hash table should have hashsize(10) elements.
782
If you are hashing n strings (ub1 **)k, do it like this:
783
for (i=0, h=0; i<n; ++i) h = hash( k[i], len[i], h);
785
By Bob Jenkins, 1996. bob_jenkins@burtleburtle.net. You may use this
786
code any way you wish, private, educational, or commercial. It's free.
788
See http://burtleburtle.net/bob/hash/evahash.html
789
Use for hash table lookup, or anything where one collision in 2^32 is
790
acceptable. Do NOT use for cryptographic purposes.
791
--------------------------------------------------------------------
794
hashval_t iterative_hash (k_in, length, initval)
795
const PTR k_in; /* the key */
796
register size_t length; /* the length of the key */
797
register hashval_t initval; /* the previous hash, or an arbitrary value */
799
register const unsigned char *k = (const unsigned char *)k_in;
800
register hashval_t a,b,c,len;
802
/* Set up the internal state */
804
a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */
805
c = initval; /* the previous hash value */
807
/*---------------------------------------- handle most of the key */
808
#ifndef WORDS_BIGENDIAN
809
/* On a little-endian machine, if the data is 4-byte aligned we can hash
810
by word for better speed. This gives nondeterministic results on
811
big-endian machines. */
812
if (sizeof (hashval_t) == 4 && (((size_t)k)&3) == 0)
813
while (len >= 12) /* aligned */
815
a += *(hashval_t *)(k+0);
816
b += *(hashval_t *)(k+4);
817
c += *(hashval_t *)(k+8);
825
a += (k[0] +((hashval_t)k[1]<<8) +((hashval_t)k[2]<<16) +((hashval_t)k[3]<<24));
826
b += (k[4] +((hashval_t)k[5]<<8) +((hashval_t)k[6]<<16) +((hashval_t)k[7]<<24));
827
c += (k[8] +((hashval_t)k[9]<<8) +((hashval_t)k[10]<<16)+((hashval_t)k[11]<<24));
832
/*------------------------------------- handle the last 11 bytes */
834
switch(len) /* all the case statements fall through */
836
case 11: c+=((hashval_t)k[10]<<24);
837
case 10: c+=((hashval_t)k[9]<<16);
838
case 9 : c+=((hashval_t)k[8]<<8);
839
/* the first byte of c is reserved for the length */
840
case 8 : b+=((hashval_t)k[7]<<24);
841
case 7 : b+=((hashval_t)k[6]<<16);
842
case 6 : b+=((hashval_t)k[5]<<8);
844
case 4 : a+=((hashval_t)k[3]<<24);
845
case 3 : a+=((hashval_t)k[2]<<16);
846
case 2 : a+=((hashval_t)k[1]<<8);
848
/* case 0: nothing left to add */
851
/*-------------------------------------------- report the result */