1
/* hash - hashing table processing.
3
Copyright (C) 1998-2004, 2006-2007, 2009-2010 Free Software Foundation, Inc.
5
Written by Jim Meyering, 1992.
7
This program is free software: you can redistribute it and/or modify
8
it under the terms of the GNU General Public License as published by
9
the Free Software Foundation; either version 3 of the License, or
10
(at your option) any later version.
12
This program is distributed in the hope that it will be useful,
13
but WITHOUT ANY WARRANTY; without even the implied warranty of
14
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
GNU General Public License for more details.
17
You should have received a copy of the GNU General Public License
18
along with this program. If not, see <http://www.gnu.org/licenses/>. */
20
/* A generic hash table package. */
22
/* Define USE_OBSTACK to 1 if you want the allocator to use obstacks instead
23
of malloc. If you change USE_OBSTACK, you have to recompile! */
29
#include "bitrotate.h"
38
# ifndef obstack_chunk_alloc
39
# define obstack_chunk_alloc malloc
41
# ifndef obstack_chunk_free
42
# define obstack_chunk_free free
49
struct hash_entry *next;
54
/* The array of buckets starts at BUCKET and extends to BUCKET_LIMIT-1,
55
for a possibility of N_BUCKETS. Among those, N_BUCKETS_USED buckets
56
are not empty, there are N_ENTRIES active entries in the table. */
57
struct hash_entry *bucket;
58
struct hash_entry const *bucket_limit;
60
size_t n_buckets_used;
63
/* Tuning arguments, kept in a physically separate structure. */
64
const Hash_tuning *tuning;
66
/* Three functions are given to `hash_initialize', see the documentation
67
block for this function. In a word, HASHER randomizes a user entry
68
into a number up from 0 up to some maximum minus 1; COMPARATOR returns
69
true if two user entries compare equally; and DATA_FREER is the cleanup
70
function for a user entry. */
72
Hash_comparator comparator;
73
Hash_data_freer data_freer;
75
/* A linked list of freed struct hash_entry structs. */
76
struct hash_entry *free_entry_list;
79
/* Whenever obstacks are used, it is possible to allocate all overflowed
80
entries into a single stack, so they all can be freed in a single
81
operation. It is not clear if the speedup is worth the trouble. */
82
struct obstack entry_stack;
86
/* A hash table contains many internal entries, each holding a pointer to
87
some user-provided data (also called a user entry). An entry indistinctly
88
refers to both the internal entry and its associated user entry. A user
89
entry contents may be hashed by a randomization function (the hashing
90
function, or just `hasher' for short) into a number (or `slot') between 0
91
and the current table size. At each slot position in the hash table,
92
starts a linked chain of entries for which the user data all hash to this
93
slot. A bucket is the collection of all entries hashing to the same slot.
95
A good `hasher' function will distribute entries rather evenly in buckets.
96
In the ideal case, the length of each bucket is roughly the number of
97
entries divided by the table size. Finding the slot for a data is usually
98
done in constant time by the `hasher', and the later finding of a precise
99
entry is linear in time with the size of the bucket. Consequently, a
100
larger hash table size (that is, a larger number of buckets) is prone to
101
yielding shorter chains, *given* the `hasher' function behaves properly.
103
Long buckets slow down the lookup algorithm. One might use big hash table
104
sizes in hope to reduce the average length of buckets, but this might
105
become inordinate, as unused slots in the hash table take some space. The
106
best bet is to make sure you are using a good `hasher' function (beware
107
that those are not that easy to write! :-), and to use a table size
108
larger than the actual number of entries. */
110
/* If an insertion makes the ratio of nonempty buckets to table size larger
111
than the growth threshold (a number between 0.0 and 1.0), then increase
112
the table size by multiplying by the growth factor (a number greater than
113
1.0). The growth threshold defaults to 0.8, and the growth factor
114
defaults to 1.414, meaning that the table will have doubled its size
115
every second time 80% of the buckets get used. */
116
#define DEFAULT_GROWTH_THRESHOLD 0.8
117
#define DEFAULT_GROWTH_FACTOR 1.414
119
/* If a deletion empties a bucket and causes the ratio of used buckets to
120
table size to become smaller than the shrink threshold (a number between
121
0.0 and 1.0), then shrink the table by multiplying by the shrink factor (a
122
number greater than the shrink threshold but smaller than 1.0). The shrink
123
threshold and factor default to 0.0 and 1.0, meaning that the table never
125
#define DEFAULT_SHRINK_THRESHOLD 0.0
126
#define DEFAULT_SHRINK_FACTOR 1.0
128
/* Use this to initialize or reset a TUNING structure to
129
some sensible values. */
130
static const Hash_tuning default_tuning =
132
DEFAULT_SHRINK_THRESHOLD,
133
DEFAULT_SHRINK_FACTOR,
134
DEFAULT_GROWTH_THRESHOLD,
135
DEFAULT_GROWTH_FACTOR,
139
/* Information and lookup. */
141
/* The following few functions provide information about the overall hash
142
table organization: the number of entries, number of buckets and maximum
143
length of buckets. */
145
/* Return the number of buckets in the hash table. The table size, the total
146
number of buckets (used plus unused), or the maximum number of slots, are
147
the same quantity. */
150
hash_get_n_buckets (const Hash_table *table)
152
return table->n_buckets;
155
/* Return the number of slots in use (non-empty buckets). */
158
hash_get_n_buckets_used (const Hash_table *table)
160
return table->n_buckets_used;
163
/* Return the number of active entries. */
166
hash_get_n_entries (const Hash_table *table)
168
return table->n_entries;
171
/* Return the length of the longest chain (bucket). */
174
hash_get_max_bucket_length (const Hash_table *table)
176
struct hash_entry const *bucket;
177
size_t max_bucket_length = 0;
179
for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
183
struct hash_entry const *cursor = bucket;
184
size_t bucket_length = 1;
186
while (cursor = cursor->next, cursor)
189
if (bucket_length > max_bucket_length)
190
max_bucket_length = bucket_length;
194
return max_bucket_length;
197
/* Do a mild validation of a hash table, by traversing it and checking two
201
hash_table_ok (const Hash_table *table)
203
struct hash_entry const *bucket;
204
size_t n_buckets_used = 0;
205
size_t n_entries = 0;
207
for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
211
struct hash_entry const *cursor = bucket;
213
/* Count bucket head. */
217
/* Count bucket overflow. */
218
while (cursor = cursor->next, cursor)
223
if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries)
230
hash_print_statistics (const Hash_table *table, FILE *stream)
232
size_t n_entries = hash_get_n_entries (table);
233
size_t n_buckets = hash_get_n_buckets (table);
234
size_t n_buckets_used = hash_get_n_buckets_used (table);
235
size_t max_bucket_length = hash_get_max_bucket_length (table);
237
fprintf (stream, "# entries: %lu\n", (unsigned long int) n_entries);
238
fprintf (stream, "# buckets: %lu\n", (unsigned long int) n_buckets);
239
fprintf (stream, "# buckets used: %lu (%.2f%%)\n",
240
(unsigned long int) n_buckets_used,
241
(100.0 * n_buckets_used) / n_buckets);
242
fprintf (stream, "max bucket length: %lu\n",
243
(unsigned long int) max_bucket_length);
246
/* If ENTRY matches an entry already in the hash table, return the
247
entry from the table. Otherwise, return NULL. */
250
hash_lookup (const Hash_table *table, const void *entry)
252
struct hash_entry const *bucket
253
= table->bucket + table->hasher (entry, table->n_buckets);
254
struct hash_entry const *cursor;
256
if (! (bucket < table->bucket_limit))
259
if (bucket->data == NULL)
262
for (cursor = bucket; cursor; cursor = cursor->next)
263
if (entry == cursor->data || table->comparator (entry, cursor->data))
271
/* The functions in this page traverse the hash table and process the
272
contained entries. For the traversal to work properly, the hash table
273
should not be resized nor modified while any particular entry is being
274
processed. In particular, entries should not be added, and an entry
275
may be removed only if there is no shrink threshold and the entry being
276
removed has already been passed to hash_get_next. */
278
/* Return the first data in the table, or NULL if the table is empty. */
281
hash_get_first (const Hash_table *table)
283
struct hash_entry const *bucket;
285
if (table->n_entries == 0)
288
for (bucket = table->bucket; ; bucket++)
289
if (! (bucket < table->bucket_limit))
291
else if (bucket->data)
295
/* Return the user data for the entry following ENTRY, where ENTRY has been
296
returned by a previous call to either `hash_get_first' or `hash_get_next'.
297
Return NULL if there are no more entries. */
300
hash_get_next (const Hash_table *table, const void *entry)
302
struct hash_entry const *bucket
303
= table->bucket + table->hasher (entry, table->n_buckets);
304
struct hash_entry const *cursor;
306
if (! (bucket < table->bucket_limit))
309
/* Find next entry in the same bucket. */
310
for (cursor = bucket; cursor; cursor = cursor->next)
311
if (cursor->data == entry && cursor->next)
312
return cursor->next->data;
314
/* Find first entry in any subsequent bucket. */
315
while (++bucket < table->bucket_limit)
323
/* Fill BUFFER with pointers to active user entries in the hash table, then
324
return the number of pointers copied. Do not copy more than BUFFER_SIZE
328
hash_get_entries (const Hash_table *table, void **buffer,
332
struct hash_entry const *bucket;
333
struct hash_entry const *cursor;
335
for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
339
for (cursor = bucket; cursor; cursor = cursor->next)
341
if (counter >= buffer_size)
343
buffer[counter++] = cursor->data;
351
/* Call a PROCESSOR function for each entry of a hash table, and return the
352
number of entries for which the processor function returned success. A
353
pointer to some PROCESSOR_DATA which will be made available to each call to
354
the processor function. The PROCESSOR accepts two arguments: the first is
355
the user entry being walked into, the second is the value of PROCESSOR_DATA
356
as received. The walking continue for as long as the PROCESSOR function
357
returns nonzero. When it returns zero, the walking is interrupted. */
360
hash_do_for_each (const Hash_table *table, Hash_processor processor,
361
void *processor_data)
364
struct hash_entry const *bucket;
365
struct hash_entry const *cursor;
367
for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
371
for (cursor = bucket; cursor; cursor = cursor->next)
373
if (! processor (cursor->data, processor_data))
383
/* Allocation and clean-up. */
385
/* Return a hash index for a NUL-terminated STRING between 0 and N_BUCKETS-1.
386
This is a convenience routine for constructing other hashing functions. */
390
/* About hashings, Paul Eggert writes to me (FP), on 1994-01-01: "Please see
391
B. J. McKenzie, R. Harries & T. Bell, Selecting a hashing algorithm,
392
Software--practice & experience 20, 2 (Feb 1990), 209-224. Good hash
393
algorithms tend to be domain-specific, so what's good for [diffutils'] io.c
394
may not be good for your application." */
397
hash_string (const char *string, size_t n_buckets)
399
# define HASH_ONE_CHAR(Value, Byte) \
400
((Byte) + rotl_sz (Value, 7))
405
for (; (ch = *string); string++)
406
value = HASH_ONE_CHAR (value, ch);
407
return value % n_buckets;
409
# undef HASH_ONE_CHAR
412
#else /* not USE_DIFF_HASH */
414
/* This one comes from `recode', and performs a bit better than the above as
415
per a few experiments. It is inspired from a hashing routine found in the
416
very old Cyber `snoop', itself written in typical Greg Mansfield style.
417
(By the way, what happened to this excellent man? Is he still alive?) */
420
hash_string (const char *string, size_t n_buckets)
425
for (; (ch = *string); string++)
426
value = (value * 31 + ch) % n_buckets;
430
#endif /* not USE_DIFF_HASH */
432
/* Return true if CANDIDATE is a prime number. CANDIDATE should be an odd
433
number at least equal to 11. */
436
is_prime (size_t candidate)
439
size_t square = divisor * divisor;
441
while (square < candidate && (candidate % divisor))
444
square += 4 * divisor;
448
return (candidate % divisor ? true : false);
451
/* Round a given CANDIDATE number up to the nearest prime, and return that
452
prime. Primes lower than 10 are merely skipped. */
455
next_prime (size_t candidate)
457
/* Skip small primes. */
461
/* Make it definitely odd. */
464
while (SIZE_MAX != candidate && !is_prime (candidate))
471
hash_reset_tuning (Hash_tuning *tuning)
473
*tuning = default_tuning;
476
/* If the user passes a NULL hasher, we hash the raw pointer. */
478
raw_hasher (const void *data, size_t n)
480
/* When hashing unique pointers, it is often the case that they were
481
generated by malloc and thus have the property that the low-order
482
bits are 0. As this tends to give poorer performance with small
483
tables, we rotate the pointer value before performing division,
484
in an attempt to improve hash quality. */
485
size_t val = rotr_sz ((size_t) data, 3);
489
/* If the user passes a NULL comparator, we use pointer comparison. */
491
raw_comparator (const void *a, const void *b)
497
/* For the given hash TABLE, check the user supplied tuning structure for
498
reasonable values, and return true if there is no gross error with it.
499
Otherwise, definitively reset the TUNING field to some acceptable default
500
in the hash table (that is, the user loses the right of further modifying
501
tuning arguments), and return false. */
504
check_tuning (Hash_table *table)
506
const Hash_tuning *tuning = table->tuning;
508
if (tuning == &default_tuning)
511
/* Be a bit stricter than mathematics would require, so that
512
rounding errors in size calculations do not cause allocations to
513
fail to grow or shrink as they should. The smallest allocation
514
is 11 (due to next_prime's algorithm), so an epsilon of 0.1
515
should be good enough. */
518
if (epsilon < tuning->growth_threshold
519
&& tuning->growth_threshold < 1 - epsilon
520
&& 1 + epsilon < tuning->growth_factor
521
&& 0 <= tuning->shrink_threshold
522
&& tuning->shrink_threshold + epsilon < tuning->shrink_factor
523
&& tuning->shrink_factor <= 1
524
&& tuning->shrink_threshold + epsilon < tuning->growth_threshold)
527
table->tuning = &default_tuning;
531
/* Compute the size of the bucket array for the given CANDIDATE and
532
TUNING, or return 0 if there is no possible way to allocate that
536
compute_bucket_size (size_t candidate, const Hash_tuning *tuning)
538
if (!tuning->is_n_buckets)
540
float new_candidate = candidate / tuning->growth_threshold;
541
if (SIZE_MAX <= new_candidate)
543
candidate = new_candidate;
545
candidate = next_prime (candidate);
546
if (xalloc_oversized (candidate, sizeof (struct hash_entry *)))
551
/* Allocate and return a new hash table, or NULL upon failure. The initial
552
number of buckets is automatically selected so as to _guarantee_ that you
553
may insert at least CANDIDATE different user entries before any growth of
554
the hash table size occurs. So, if have a reasonably tight a-priori upper
555
bound on the number of entries you intend to insert in the hash table, you
556
may save some table memory and insertion time, by specifying it here. If
557
the IS_N_BUCKETS field of the TUNING structure is true, the CANDIDATE
558
argument has its meaning changed to the wanted number of buckets.
560
TUNING points to a structure of user-supplied values, in case some fine
561
tuning is wanted over the default behavior of the hasher. If TUNING is
562
NULL, the default tuning parameters are used instead. If TUNING is
563
provided but the values requested are out of bounds or might cause
564
rounding errors, return NULL.
566
The user-supplied HASHER function, when not NULL, accepts two
567
arguments ENTRY and TABLE_SIZE. It computes, by hashing ENTRY contents, a
568
slot number for that entry which should be in the range 0..TABLE_SIZE-1.
569
This slot number is then returned.
571
The user-supplied COMPARATOR function, when not NULL, accepts two
572
arguments pointing to user data, it then returns true for a pair of entries
573
that compare equal, or false otherwise. This function is internally called
574
on entries which are already known to hash to the same bucket index,
575
but which are distinct pointers.
577
The user-supplied DATA_FREER function, when not NULL, may be later called
578
with the user data as an argument, just before the entry containing the
579
data gets freed. This happens from within `hash_free' or `hash_clear'.
580
You should specify this function only if you want these functions to free
581
all of your `data' data. This is typically the case when your data is
582
simply an auxiliary struct that you have malloc'd to aggregate several
586
hash_initialize (size_t candidate, const Hash_tuning *tuning,
587
Hash_hasher hasher, Hash_comparator comparator,
588
Hash_data_freer data_freer)
594
if (comparator == NULL)
595
comparator = raw_comparator;
597
table = malloc (sizeof *table);
602
tuning = &default_tuning;
603
table->tuning = tuning;
604
if (!check_tuning (table))
606
/* Fail if the tuning options are invalid. This is the only occasion
607
when the user gets some feedback about it. Once the table is created,
608
if the user provides invalid tuning options, we silently revert to
609
using the defaults, and ignore further request to change the tuning
614
table->n_buckets = compute_bucket_size (candidate, tuning);
615
if (!table->n_buckets)
618
table->bucket = calloc (table->n_buckets, sizeof *table->bucket);
619
if (table->bucket == NULL)
621
table->bucket_limit = table->bucket + table->n_buckets;
622
table->n_buckets_used = 0;
623
table->n_entries = 0;
625
table->hasher = hasher;
626
table->comparator = comparator;
627
table->data_freer = data_freer;
629
table->free_entry_list = NULL;
631
obstack_init (&table->entry_stack);
640
/* Make all buckets empty, placing any chained entries on the free list.
641
Apply the user-specified function data_freer (if any) to the datas of any
645
hash_clear (Hash_table *table)
647
struct hash_entry *bucket;
649
for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
653
struct hash_entry *cursor;
654
struct hash_entry *next;
656
/* Free the bucket overflow. */
657
for (cursor = bucket->next; cursor; cursor = next)
659
if (table->data_freer)
660
table->data_freer (cursor->data);
664
/* Relinking is done one entry at a time, as it is to be expected
665
that overflows are either rare or short. */
666
cursor->next = table->free_entry_list;
667
table->free_entry_list = cursor;
670
/* Free the bucket head. */
671
if (table->data_freer)
672
table->data_freer (bucket->data);
678
table->n_buckets_used = 0;
679
table->n_entries = 0;
682
/* Reclaim all storage associated with a hash table. If a data_freer
683
function has been supplied by the user when the hash table was created,
684
this function applies it to the data of each entry before freeing that
688
hash_free (Hash_table *table)
690
struct hash_entry *bucket;
691
struct hash_entry *cursor;
692
struct hash_entry *next;
694
/* Call the user data_freer function. */
695
if (table->data_freer && table->n_entries)
697
for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
701
for (cursor = bucket; cursor; cursor = cursor->next)
702
table->data_freer (cursor->data);
709
obstack_free (&table->entry_stack, NULL);
713
/* Free all bucket overflowed entries. */
714
for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
716
for (cursor = bucket->next; cursor; cursor = next)
723
/* Also reclaim the internal list of previously freed entries. */
724
for (cursor = table->free_entry_list; cursor; cursor = next)
732
/* Free the remainder of the hash table structure. */
733
free (table->bucket);
737
/* Insertion and deletion. */
739
/* Get a new hash entry for a bucket overflow, possibly by recycling a
740
previously freed one. If this is not possible, allocate a new one. */
742
static struct hash_entry *
743
allocate_entry (Hash_table *table)
745
struct hash_entry *new;
747
if (table->free_entry_list)
749
new = table->free_entry_list;
750
table->free_entry_list = new->next;
755
new = obstack_alloc (&table->entry_stack, sizeof *new);
757
new = malloc (sizeof *new);
764
/* Free a hash entry which was part of some bucket overflow,
765
saving it for later recycling. */
768
free_entry (Hash_table *table, struct hash_entry *entry)
771
entry->next = table->free_entry_list;
772
table->free_entry_list = entry;
775
/* This private function is used to help with insertion and deletion. When
776
ENTRY matches an entry in the table, return a pointer to the corresponding
777
user data and set *BUCKET_HEAD to the head of the selected bucket.
778
Otherwise, return NULL. When DELETE is true and ENTRY matches an entry in
779
the table, unlink the matching entry. */
782
hash_find_entry (Hash_table *table, const void *entry,
783
struct hash_entry **bucket_head, bool delete)
785
struct hash_entry *bucket
786
= table->bucket + table->hasher (entry, table->n_buckets);
787
struct hash_entry *cursor;
789
if (! (bucket < table->bucket_limit))
792
*bucket_head = bucket;
794
/* Test for empty bucket. */
795
if (bucket->data == NULL)
798
/* See if the entry is the first in the bucket. */
799
if (entry == bucket->data || table->comparator (entry, bucket->data))
801
void *data = bucket->data;
807
struct hash_entry *next = bucket->next;
809
/* Bump the first overflow entry into the bucket head, then save
810
the previous first overflow entry for later recycling. */
812
free_entry (table, next);
823
/* Scan the bucket overflow. */
824
for (cursor = bucket; cursor->next; cursor = cursor->next)
826
if (entry == cursor->next->data
827
|| table->comparator (entry, cursor->next->data))
829
void *data = cursor->next->data;
833
struct hash_entry *next = cursor->next;
835
/* Unlink the entry to delete, then save the freed entry for later
837
cursor->next = next->next;
838
free_entry (table, next);
845
/* No entry found. */
849
/* Internal helper, to move entries from SRC to DST. Both tables must
850
share the same free entry list. If SAFE, only move overflow
851
entries, saving bucket heads for later, so that no allocations will
852
occur. Return false if the free entry list is exhausted and an
856
transfer_entries (Hash_table *dst, Hash_table *src, bool safe)
858
struct hash_entry *bucket;
859
struct hash_entry *cursor;
860
struct hash_entry *next;
861
for (bucket = src->bucket; bucket < src->bucket_limit; bucket++)
865
struct hash_entry *new_bucket;
867
/* Within each bucket, transfer overflow entries first and
868
then the bucket head, to minimize memory pressure. After
869
all, the only time we might allocate is when moving the
870
bucket head, but moving overflow entries first may create
871
free entries that can be recycled by the time we finally
872
get to the bucket head. */
873
for (cursor = bucket->next; cursor; cursor = next)
876
new_bucket = (dst->bucket + dst->hasher (data, dst->n_buckets));
878
if (! (new_bucket < dst->bucket_limit))
883
if (new_bucket->data)
885
/* Merely relink an existing entry, when moving from a
886
bucket overflow into a bucket overflow. */
887
cursor->next = new_bucket->next;
888
new_bucket->next = cursor;
892
/* Free an existing entry, when moving from a bucket
893
overflow into a bucket header. */
894
new_bucket->data = data;
895
dst->n_buckets_used++;
896
free_entry (dst, cursor);
899
/* Now move the bucket head. Be sure that if we fail due to
900
allocation failure that the src table is in a consistent
906
new_bucket = (dst->bucket + dst->hasher (data, dst->n_buckets));
908
if (! (new_bucket < dst->bucket_limit))
911
if (new_bucket->data)
913
/* Allocate or recycle an entry, when moving from a bucket
914
header into a bucket overflow. */
915
struct hash_entry *new_entry = allocate_entry (dst);
917
if (new_entry == NULL)
920
new_entry->data = data;
921
new_entry->next = new_bucket->next;
922
new_bucket->next = new_entry;
926
/* Move from one bucket header to another. */
927
new_bucket->data = data;
928
dst->n_buckets_used++;
931
src->n_buckets_used--;
936
/* For an already existing hash table, change the number of buckets through
937
specifying CANDIDATE. The contents of the hash table are preserved. The
938
new number of buckets is automatically selected so as to _guarantee_ that
939
the table may receive at least CANDIDATE different user entries, including
940
those already in the table, before any other growth of the hash table size
941
occurs. If TUNING->IS_N_BUCKETS is true, then CANDIDATE specifies the
942
exact number of buckets desired. Return true iff the rehash succeeded. */
945
hash_rehash (Hash_table *table, size_t candidate)
948
Hash_table *new_table;
949
size_t new_size = compute_bucket_size (candidate, table->tuning);
953
if (new_size == table->n_buckets)
955
new_table = &storage;
956
new_table->bucket = calloc (new_size, sizeof *new_table->bucket);
957
if (new_table->bucket == NULL)
959
new_table->n_buckets = new_size;
960
new_table->bucket_limit = new_table->bucket + new_size;
961
new_table->n_buckets_used = 0;
962
new_table->n_entries = 0;
963
new_table->tuning = table->tuning;
964
new_table->hasher = table->hasher;
965
new_table->comparator = table->comparator;
966
new_table->data_freer = table->data_freer;
968
/* In order for the transfer to successfully complete, we need
969
additional overflow entries when distinct buckets in the old
970
table collide into a common bucket in the new table. The worst
971
case possible is a hasher that gives a good spread with the old
972
size, but returns a constant with the new size; if we were to
973
guarantee table->n_buckets_used-1 free entries in advance, then
974
the transfer would be guaranteed to not allocate memory.
975
However, for large tables, a guarantee of no further allocation
976
introduces a lot of extra memory pressure, all for an unlikely
977
corner case (most rehashes reduce, rather than increase, the
978
number of overflow entries needed). So, we instead ensure that
979
the transfer process can be reversed if we hit a memory
980
allocation failure mid-transfer. */
982
/* Merely reuse the extra old space into the new table. */
984
new_table->entry_stack = table->entry_stack;
986
new_table->free_entry_list = table->free_entry_list;
988
if (transfer_entries (new_table, table, false))
990
/* Entries transferred successfully; tie up the loose ends. */
991
free (table->bucket);
992
table->bucket = new_table->bucket;
993
table->bucket_limit = new_table->bucket_limit;
994
table->n_buckets = new_table->n_buckets;
995
table->n_buckets_used = new_table->n_buckets_used;
996
table->free_entry_list = new_table->free_entry_list;
997
/* table->n_entries and table->entry_stack already hold their value. */
1001
/* We've allocated new_table->bucket (and possibly some entries),
1002
exhausted the free list, and moved some but not all entries into
1003
new_table. We must undo the partial move before returning
1004
failure. The only way to get into this situation is if new_table
1005
uses fewer buckets than the old table, so we will reclaim some
1006
free entries as overflows in the new table are put back into
1007
distinct buckets in the old table.
1009
There are some pathological cases where a single pass through the
1010
table requires more intermediate overflow entries than using two
1011
passes. Two passes give worse cache performance and takes
1012
longer, but at this point, we're already out of memory, so slow
1013
and safe is better than failure. */
1014
table->free_entry_list = new_table->free_entry_list;
1015
if (! (transfer_entries (table, new_table, true)
1016
&& transfer_entries (table, new_table, false)))
1018
/* table->n_entries already holds its value. */
1019
free (new_table->bucket);
1023
/* If ENTRY matches an entry already in the hash table, return the pointer
1024
to the entry from the table. Otherwise, insert ENTRY and return ENTRY.
1025
Return NULL if the storage required for insertion cannot be allocated.
1026
This implementation does not support duplicate entries or insertion of
1030
hash_insert (Hash_table *table, const void *entry)
1033
struct hash_entry *bucket;
1035
/* The caller cannot insert a NULL entry. */
1039
/* If there's a matching entry already in the table, return that. */
1040
if ((data = hash_find_entry (table, entry, &bucket, false)) != NULL)
1043
/* If the growth threshold of the buckets in use has been reached, increase
1044
the table size and rehash. There's no point in checking the number of
1045
entries: if the hashing function is ill-conditioned, rehashing is not
1046
likely to improve it. */
1048
if (table->n_buckets_used
1049
> table->tuning->growth_threshold * table->n_buckets)
1051
/* Check more fully, before starting real work. If tuning arguments
1052
became invalid, the second check will rely on proper defaults. */
1053
check_tuning (table);
1054
if (table->n_buckets_used
1055
> table->tuning->growth_threshold * table->n_buckets)
1057
const Hash_tuning *tuning = table->tuning;
1059
(tuning->is_n_buckets
1060
? (table->n_buckets * tuning->growth_factor)
1061
: (table->n_buckets * tuning->growth_factor
1062
* tuning->growth_threshold));
1064
if (SIZE_MAX <= candidate)
1067
/* If the rehash fails, arrange to return NULL. */
1068
if (!hash_rehash (table, candidate))
1071
/* Update the bucket we are interested in. */
1072
if (hash_find_entry (table, entry, &bucket, false) != NULL)
1077
/* ENTRY is not matched, it should be inserted. */
1081
struct hash_entry *new_entry = allocate_entry (table);
1083
if (new_entry == NULL)
1086
/* Add ENTRY in the overflow of the bucket. */
1088
new_entry->data = (void *) entry;
1089
new_entry->next = bucket->next;
1090
bucket->next = new_entry;
1092
return (void *) entry;
1095
/* Add ENTRY right in the bucket head. */
1097
bucket->data = (void *) entry;
1099
table->n_buckets_used++;
1101
return (void *) entry;
1104
/* If ENTRY is already in the table, remove it and return the just-deleted
1105
data (the user may want to deallocate its storage). If ENTRY is not in the
1106
table, don't modify the table and return NULL. */
1109
hash_delete (Hash_table *table, const void *entry)
1112
struct hash_entry *bucket;
1114
data = hash_find_entry (table, entry, &bucket, true);
1121
table->n_buckets_used--;
1123
/* If the shrink threshold of the buckets in use has been reached,
1124
rehash into a smaller table. */
1126
if (table->n_buckets_used
1127
< table->tuning->shrink_threshold * table->n_buckets)
1129
/* Check more fully, before starting real work. If tuning arguments
1130
became invalid, the second check will rely on proper defaults. */
1131
check_tuning (table);
1132
if (table->n_buckets_used
1133
< table->tuning->shrink_threshold * table->n_buckets)
1135
const Hash_tuning *tuning = table->tuning;
1137
(tuning->is_n_buckets
1138
? table->n_buckets * tuning->shrink_factor
1139
: (table->n_buckets * tuning->shrink_factor
1140
* tuning->growth_threshold));
1142
if (!hash_rehash (table, candidate))
1144
/* Failure to allocate memory in an attempt to
1145
shrink the table is not fatal. But since memory
1146
is low, we can at least be kind and free any
1147
spare entries, rather than keeping them tied up
1148
in the free entry list. */
1150
struct hash_entry *cursor = table->free_entry_list;
1151
struct hash_entry *next;
1154
next = cursor->next;
1158
table->free_entry_list = NULL;
1173
hash_print (const Hash_table *table)
1175
struct hash_entry *bucket = (struct hash_entry *) table->bucket;
1177
for ( ; bucket < table->bucket_limit; bucket++)
1179
struct hash_entry *cursor;
1182
printf ("%lu:\n", (unsigned long int) (bucket - table->bucket));
1184
for (cursor = bucket; cursor; cursor = cursor->next)
1186
char const *s = cursor->data;
1189
printf (" %s\n", s);
1194
#endif /* TESTING */