1
1
/* hash - hashing table processing.
3
Copyright (C) 1998-2004, 2006-2007, 2009-2011 Free Software Foundation, Inc.
3
Copyright (C) 1998-2004, 2006-2007, 2009-2012 Free Software Foundation, Inc.
5
5
Written by Jim Meyering, 1992.
63
63
/* Tuning arguments, kept in a physically separate structure. */
64
64
const Hash_tuning *tuning;
66
/* Three functions are given to `hash_initialize', see the documentation
66
/* Three functions are given to 'hash_initialize', see the documentation
67
67
block for this function. In a word, HASHER randomizes a user entry
68
68
into a number up from 0 up to some maximum minus 1; COMPARATOR returns
69
69
true if two user entries compare equally; and DATA_FREER is the cleanup
87
87
some user-provided data (also called a user entry). An entry indistinctly
88
88
refers to both the internal entry and its associated user entry. A user
89
89
entry contents may be hashed by a randomization function (the hashing
90
function, or just `hasher' for short) into a number (or `slot') between 0
90
function, or just "hasher" for short) into a number (or "slot") between 0
91
91
and the current table size. At each slot position in the hash table,
92
92
starts a linked chain of entries for which the user data all hash to this
93
93
slot. A bucket is the collection of all entries hashing to the same slot.
95
A good `hasher' function will distribute entries rather evenly in buckets.
95
A good "hasher" function will distribute entries rather evenly in buckets.
96
96
In the ideal case, the length of each bucket is roughly the number of
97
97
entries divided by the table size. Finding the slot for a data is usually
98
done in constant time by the `hasher', and the later finding of a precise
98
done in constant time by the "hasher", and the later finding of a precise
99
99
entry is linear in time with the size of the bucket. Consequently, a
100
100
larger hash table size (that is, a larger number of buckets) is prone to
101
yielding shorter chains, *given* the `hasher' function behaves properly.
101
yielding shorter chains, *given* the "hasher" function behaves properly.
103
103
Long buckets slow down the lookup algorithm. One might use big hash table
104
104
sizes in hope to reduce the average length of buckets, but this might
105
105
become inordinate, as unused slots in the hash table take some space. The
106
best bet is to make sure you are using a good `hasher' function (beware
106
best bet is to make sure you are using a good "hasher" function (beware
107
107
that those are not that easy to write! :-), and to use a table size
108
108
larger than the actual number of entries. */
113
113
1.0). The growth threshold defaults to 0.8, and the growth factor
114
114
defaults to 1.414, meaning that the table will have doubled its size
115
115
every second time 80% of the buckets get used. */
116
#define DEFAULT_GROWTH_THRESHOLD 0.8
117
#define DEFAULT_GROWTH_FACTOR 1.414
116
#define DEFAULT_GROWTH_THRESHOLD 0.8f
117
#define DEFAULT_GROWTH_FACTOR 1.414f
119
119
/* If a deletion empties a bucket and causes the ratio of used buckets to
120
120
table size to become smaller than the shrink threshold (a number between
122
122
number greater than the shrink threshold but smaller than 1.0). The shrink
123
123
threshold and factor default to 0.0 and 1.0, meaning that the table never
125
#define DEFAULT_SHRINK_THRESHOLD 0.0
126
#define DEFAULT_SHRINK_FACTOR 1.0
125
#define DEFAULT_SHRINK_THRESHOLD 0.0f
126
#define DEFAULT_SHRINK_FACTOR 1.0f
128
128
/* Use this to initialize or reset a TUNING structure to
129
129
some sensible values. */
302
302
/* Return the user data for the entry following ENTRY, where ENTRY has been
303
returned by a previous call to either `hash_get_first' or `hash_get_next'.
303
returned by a previous call to either 'hash_get_first' or 'hash_get_next'.
304
304
Return NULL if there are no more entries. */
420
420
#else /* not USE_DIFF_HASH */
422
/* This one comes from `recode', and performs a bit better than the above as
422
/* This one comes from 'recode', and performs a bit better than the above as
423
423
per a few experiments. It is inspired from a hashing routine found in the
424
very old Cyber `snoop', itself written in typical Greg Mansfield style.
424
very old Cyber 'snoop', itself written in typical Greg Mansfield style.
425
425
(By the way, what happened to this excellent man? Is he still alive?) */
459
459
/* Round a given CANDIDATE number up to the nearest prime, and return that
460
460
prime. Primes lower than 10 are merely skipped. */
462
static size_t _GL_ATTRIBUTE_CONST
463
463
next_prime (size_t candidate)
465
465
/* Skip small primes. */
540
540
TUNING, or return 0 if there is no possible way to allocate that
543
static size_t _GL_ATTRIBUTE_PURE
544
544
compute_bucket_size (size_t candidate, const Hash_tuning *tuning)
546
546
if (!tuning->is_n_buckets)
585
585
The user-supplied DATA_FREER function, when not NULL, may be later called
586
586
with the user data as an argument, just before the entry containing the
587
data gets freed. This happens from within `hash_free' or `hash_clear'.
587
data gets freed. This happens from within 'hash_free' or 'hash_clear'.
588
588
You should specify this function only if you want these functions to free
589
all of your `data' data. This is typically the case when your data is
589
all of your 'data' data. This is typically the case when your data is
590
590
simply an auxiliary struct that you have malloc'd to aggregate several
1021
/* Return -1 upon memory allocation failure.
1021
/* Insert ENTRY into hash TABLE if there is not already a matching entry.
1023
Return -1 upon memory allocation failure.
1022
1024
Return 1 if insertion succeeded.
1023
1025
Return 0 if there is already a matching entry in the table,
1024
1026
and in that case, if MATCHED_ENT is non-NULL, set *MATCHED_ENT
1030
1032
hash_insert, the only way to distinguish those cases is to compare
1031
1033
the return value and ENTRY. That works only when you can have two
1032
1034
different ENTRY values that point to data that compares "equal". Thus,
1033
when the ENTRY value is a simple scalar, you must use hash_insert0.
1034
ENTRY must not be NULL. */
1035
when the ENTRY value is a simple scalar, you must use
1036
hash_insert_if_absent. ENTRY must not be NULL. */
1036
hash_insert0 (Hash_table *table, void const *entry, void const **matched_ent)
1038
hash_insert_if_absent (Hash_table *table, void const *entry,
1039
void const **matched_ent)
1039
1042
struct hash_entry *bucket;
1119
/* hash_insert0 is the deprecated name for hash_insert_if_absent.
1122
hash_insert0 (Hash_table *table, void const *entry, void const **matched_ent)
1124
return hash_insert_if_absent (table, entry, matched_ent);
1116
1127
/* If ENTRY matches an entry already in the hash table, return the pointer
1117
1128
to the entry from the table. Otherwise, insert ENTRY and return ENTRY.
1118
1129
Return NULL if the storage required for insertion cannot be allocated.
1123
1134
hash_insert (Hash_table *table, void const *entry)
1125
1136
void const *matched_ent;
1126
int err = hash_insert0 (table, entry, &matched_ent);
1137
int err = hash_insert_if_absent (table, entry, &matched_ent);
1127
1138
return (err == -1
1129
1140
: (void *) (err == 0 ? matched_ent : entry));