~ubuntu-branches/ubuntu/natty/diffutils/natty

« back to all changes in this revision

Viewing changes to lib/hash.c

Committer: Bazaar Package Importer
Author(s): Santiago Vila
Date: 2010-05-04 20:38:00 UTC
mfrom: (2.1.7 sid)
Revision ID: james.westby@ubuntu.com-20100504203800-f67xd9rsa9xl9qqj

Tags: 1:3.0-1

New upstream release.

files added:
.tarball-version

ChangeLog-2008

GNUmakefile

TODO

bootstrap

build-aux

build-aux/announce-gen

build-aux/arg-nonnull.h

build-aux/c++defs.h

build-aux/compile

build-aux/config.guess

build-aux/config.rpath

build-aux/config.sub

build-aux/depcomp

build-aux/do-release-commit-and-tag

build-aux/gendocs.sh

build-aux/git-version-gen

build-aux/gitlog-to-changelog

build-aux/gnu-web-doc-update

build-aux/gnupload

build-aux/install-sh

build-aux/mdate-sh

build-aux/missing

build-aux/mkinstalldirs

build-aux/texinfo.tex

build-aux/update-copyright

build-aux/useless-if-before-free

build-aux/vc-list-files

build-aux/warn-on-use.h

cfg.mk

debian/source

debian/source/format

dist-check.mk

gnulib-tests

gnulib-tests/Makefile.am

gnulib-tests/Makefile.in

gnulib-tests/argmatch.c

gnulib-tests/argmatch.h

gnulib-tests/binary-io.h

gnulib-tests/gnulib.mk

gnulib-tests/hash-pjw.c

gnulib-tests/hash-pjw.h

gnulib-tests/init.sh

gnulib-tests/locale

gnulib-tests/locale/fr

gnulib-tests/locale/fr/LC_MESSAGES

gnulib-tests/locale/fr/LC_MESSAGES/test-quotearg.mo

gnulib-tests/locale/fr/LC_MESSAGES/test-quotearg.po

gnulib-tests/macros.h

gnulib-tests/pathmax.h

gnulib-tests/putenv.c

gnulib-tests/same-inode.h

gnulib-tests/setenv.c

gnulib-tests/signature.h

gnulib-tests/sleep.c

gnulib-tests/symlink.c

gnulib-tests/test-alloca-opt.c

gnulib-tests/test-argmatch.c

gnulib-tests/test-binary-io.c

gnulib-tests/test-binary-io.sh

gnulib-tests/test-bitrotate.c

gnulib-tests/test-btowc.c

gnulib-tests/test-btowc1.sh

gnulib-tests/test-btowc2.sh

gnulib-tests/test-c-ctype.c

gnulib-tests/test-c-stack.c

gnulib-tests/test-c-stack.sh

gnulib-tests/test-c-stack2.sh

gnulib-tests/test-c-strcase.sh

gnulib-tests/test-c-strcasecmp.c

gnulib-tests/test-c-strncasecmp.c

gnulib-tests/test-dirname.c

gnulib-tests/test-dup2.c

gnulib-tests/test-environ.c

gnulib-tests/test-errno.c

gnulib-tests/test-exclude.c

gnulib-tests/test-exclude1.sh

gnulib-tests/test-exclude2.sh

gnulib-tests/test-exclude3.sh

gnulib-tests/test-exclude4.sh

gnulib-tests/test-exclude5.sh

gnulib-tests/test-exclude6.sh

gnulib-tests/test-exclude7.sh

gnulib-tests/test-fcntl-h.c

gnulib-tests/test-fcntl.c

gnulib-tests/test-fnmatch.c

gnulib-tests/test-freopen-safer.c

gnulib-tests/test-freopen.c

gnulib-tests/test-getdtablesize.c

gnulib-tests/test-getopt.c

gnulib-tests/test-getopt.h

gnulib-tests/test-getopt_long.h

gnulib-tests/test-gettimeofday.c

gnulib-tests/test-hash.c

gnulib-tests/test-iconv-h.c

gnulib-tests/test-iconv.c

gnulib-tests/test-inttypes.c

gnulib-tests/test-langinfo.c

gnulib-tests/test-lstat.c

gnulib-tests/test-lstat.h

gnulib-tests/test-malloca.c

gnulib-tests/test-mbrtowc.c

gnulib-tests/test-mbrtowc1.sh

gnulib-tests/test-mbrtowc2.sh

gnulib-tests/test-mbrtowc3.sh

gnulib-tests/test-mbrtowc4.sh

gnulib-tests/test-mbscasecmp.c

gnulib-tests/test-mbscasecmp.sh

gnulib-tests/test-mbsinit.c

gnulib-tests/test-mbsinit.sh

gnulib-tests/test-mbsrtowcs.c

gnulib-tests/test-mbsrtowcs1.sh

gnulib-tests/test-mbsrtowcs2.sh

gnulib-tests/test-mbsrtowcs3.sh

gnulib-tests/test-mbsrtowcs4.sh

gnulib-tests/test-mbsstr1.c

gnulib-tests/test-mbsstr2.c

gnulib-tests/test-mbsstr2.sh

gnulib-tests/test-mbsstr3.c

gnulib-tests/test-mbsstr3.sh

gnulib-tests/test-memchr.c

gnulib-tests/test-nl_langinfo.c

gnulib-tests/test-nl_langinfo.sh

gnulib-tests/test-open.c

gnulib-tests/test-open.h

gnulib-tests/test-quotearg.c

gnulib-tests/test-quotearg.sh

gnulib-tests/test-setenv.c

gnulib-tests/test-sigaction.c

gnulib-tests/test-signal.c

gnulib-tests/test-sleep.c

gnulib-tests/test-stat-time.c

gnulib-tests/test-stat.c

gnulib-tests/test-stat.h

gnulib-tests/test-stdbool.c

gnulib-tests/test-stddef.c

gnulib-tests/test-stdint.c

gnulib-tests/test-stdio.c

gnulib-tests/test-stdlib.c

gnulib-tests/test-strerror.c

gnulib-tests/test-striconv.c

gnulib-tests/test-string.c

gnulib-tests/test-strings.c

gnulib-tests/test-symlink.c

gnulib-tests/test-symlink.h

gnulib-tests/test-sys_stat.c

gnulib-tests/test-sys_time.c

gnulib-tests/test-sys_wait.c

gnulib-tests/test-time.c

gnulib-tests/test-unistd.c

gnulib-tests/test-unsetenv.c

gnulib-tests/test-update-copyright.sh

gnulib-tests/test-usleep.c

gnulib-tests/test-vc-list-files-cvs.sh

gnulib-tests/test-vc-list-files-git.sh

gnulib-tests/test-version-etc.c

gnulib-tests/test-version-etc.sh

gnulib-tests/test-wchar.c

gnulib-tests/test-wcrtomb.c

gnulib-tests/test-wcrtomb.sh

gnulib-tests/test-wctype.c

gnulib-tests/test-wcwidth.c

gnulib-tests/test-xalloc-die.c

gnulib-tests/test-xalloc-die.sh

gnulib-tests/test-xstrtol.c

gnulib-tests/test-xstrtol.sh

gnulib-tests/test-xstrtoul.c

gnulib-tests/test-xstrtoumax.c

gnulib-tests/test-xstrtoumax.sh

gnulib-tests/uniwidth

gnulib-tests/uniwidth/test-uc_width.c

gnulib-tests/uniwidth/test-uc_width2.c

gnulib-tests/uniwidth/test-uc_width2.sh

gnulib-tests/unsetenv.c

gnulib-tests/usleep.c

gnulib-tests/wctob.c

gnulib-tests/zerosize-ptr.h

lib/alloca.in.h

lib/basename-lgpl.c

lib/bitrotate.h

lib/btowc.c

lib/c-ctype.c

lib/c-ctype.h

lib/c-strcase.h

lib/c-strcasecmp.c

lib/c-strncasecmp.c

lib/config.charset

lib/config.hin

lib/diffseq.h

lib/dirname-lgpl.c

lib/dirname.c

lib/dup2.c

lib/errno.in.h

lib/fcntl.c

lib/fcntl.in.h

lib/file-type.c

lib/file-type.h

lib/fnmatch.in.h

lib/fnmatch_loop.c

lib/freopen-safer.c

lib/freopen.c

lib/getdtablesize.c

lib/getopt.in.h

lib/getopt_int.h

lib/gettime.c

lib/gettimeofday.c

lib/gnulib.mk

lib/hash.c

lib/hash.h

lib/iconv.in.h

lib/iconv_open-aix.gperf

lib/iconv_open-aix.h

lib/iconv_open-hpux.gperf

lib/iconv_open-hpux.h

lib/iconv_open-irix.gperf

lib/iconv_open-irix.h

lib/iconv_open-osf.gperf

lib/iconv_open-osf.h

lib/iconv_open-solaris.gperf

lib/iconv_open-solaris.h

lib/iconv_open.c

lib/ignore-value.h

lib/intprops.h

lib/inttypes.in.h

lib/langinfo.in.h

lib/localcharset.c

lib/localcharset.h

lib/lstat.c

lib/malloca.c

lib/malloca.h

lib/malloca.valgrind

lib/mbchar.c

lib/mbchar.h

lib/mbiter.h

lib/mbrlen.c

lib/mbrtowc.c

lib/mbscasecmp.c

lib/mbsinit.c

lib/mbslen.c

lib/mbsrtowcs-state.c

lib/mbsrtowcs.c

lib/mbsstr.c

lib/mbuiter.h

lib/memchr.valgrind

lib/mktime-internal.h

lib/mktime.c

lib/nl_langinfo.c

lib/open.c

lib/progname.c

lib/progname.h

lib/propername.c

lib/propername.h

lib/quote.c

lib/quote.h

lib/quotearg.c

lib/quotearg.h

lib/ref-add.sin

lib/ref-del.sin

lib/regcomp.c

lib/regex.h

lib/regex_internal.c

lib/regex_internal.h

lib/regexec.c

lib/sh-quote.c

lib/sh-quote.h

lib/sig-handler.h

lib/sigaction.c

lib/signal.in.h

lib/sigprocmask.c

lib/stat-macros.h

lib/stat-time.h

lib/stat.c

lib/stdarg.in.h

lib/stdbool.in.h

lib/stddef.in.h

lib/stdint.in.h

lib/stdio--.h

lib/stdio-safer.h

lib/stdio-write.c

lib/stdio.in.h

lib/stdlib.in.h

lib/str-kmp.h

lib/streq.h

lib/strerror.c

lib/strftime.h

lib/striconv.c

lib/striconv.h

lib/string.in.h

lib/strings.in.h

lib/stripslash.c

lib/strncasecmp.c

lib/strndup.c

lib/strnlen.c

lib/strnlen1.c

lib/strnlen1.h

lib/strptime.c

lib/strtoll.c

lib/sys_stat.in.h

lib/sys_time.in.h

lib/sys_wait.in.h

lib/tempname.h

lib/time.in.h

lib/time_r.c

lib/timegm.c

lib/timespec.h

lib/trim.c

lib/trim.h

lib/uinttostr.c

lib/unistd.in.h

lib/unitypes.h

lib/uniwidth

lib/uniwidth.h

lib/uniwidth/cjk.h

lib/uniwidth/width.c

lib/verify.h

lib/version-etc-fsf.c

lib/version-etc.c

lib/version-etc.h

lib/wchar.in.h

lib/wcrtomb.c

lib/wctype.in.h

lib/wcwidth.c

lib/xalloc-die.c

lib/xfreopen.c

lib/xfreopen.h

lib/xstriconv.c

lib/xstriconv.h

lib/xstrndup.c

lib/xstrndup.h

lib/xstrtol-error.c

lib/xstrtoul.c

m4/00gnulib.m4

m4/alloca.m4

m4/argmatch.m4

m4/btowc.m4

m4/clock_time.m4

m4/config-h.m4

m4/dirname.m4

m4/double-slash-root.m4

m4/dup2.m4

m4/eealloc.m4

m4/environ.m4

m4/errno_h.m4

m4/exclude.m4

m4/extensions.m4

m4/fcntl-o.m4

m4/fcntl.m4

m4/fcntl_h.m4

m4/file-type.m4

m4/freopen.m4

m4/getdtablesize.m4

m4/getopt.m4

m4/gettext_gl.m4

m4/gettime.m4

m4/gettimeofday.m4

m4/glibc21_gl.m4

m4/gnu-make.m4

m4/gnulib-common.m4

m4/gnulib-comp.m4

m4/hard-locale.m4

m4/hash.m4

m4/iconv_h.m4

m4/iconv_open.m4

m4/include_next.m4

m4/inline.m4

m4/intlmacosx.m4

m4/inttostr.m4

m4/inttypes-pri.m4

m4/langinfo_h.m4

m4/lib-ld_gl.m4

m4/lib-prefix_gl.m4

m4/libsigsegv.m4

m4/localcharset.m4

m4/locale-fr.m4

m4/locale-ja.m4

m4/locale-tr.m4

m4/locale-zh.m4

m4/longlong_gl.m4

m4/lstat.m4

m4/malloca.m4

m4/manywarnings.m4

m4/mbchar.m4

m4/mbiter.m4

m4/mbrlen.m4

m4/mbsinit.m4

m4/mbslen.m4

m4/mbsrtowcs.m4

m4/memchr.m4

m4/mkstemp.m4

m4/mktime.m4

m4/mmap-anon.m4

m4/mode_t.m4

m4/multiarch.m4

m4/nl_langinfo.m4

m4/nls.m4

m4/onceonly.m4

m4/open.m4

m4/pathmax.m4

m4/po_gl.m4

m4/putenv.m4

m4/quote.m4

m4/quotearg.m4

m4/setenv.m4

m4/sigaction.m4

m4/signal_h.m4

m4/signalblocking.m4

m4/sleep.m4

m4/ssize_t.m4

m4/stat-time.m4

m4/stat.m4

m4/stdarg.m4

m4/stddef_h.m4

m4/stdint.m4

m4/stdio-safer.m4

m4/stdio_h.m4

m4/stdlib_h.m4

m4/strcase.m4

m4/strerror.m4

m4/string_h.m4

m4/strings_h.m4

m4/strndup.m4

m4/strnlen.m4

m4/strptime.m4

m4/strtoimax.m4

m4/strtol.m4

m4/strtoll.m4

m4/strtoul.m4

m4/strtoull.m4

m4/strtoumax.m4

m4/symlink.m4

m4/sys_stat_h.m4

m4/sys_time_h.m4

m4/sys_wait_h.m4

m4/tempname.m4

m4/time_h.m4

m4/time_r.m4

m4/timegm.m4

m4/timespec.m4

m4/tm_gmtoff.m4

m4/unistd_h.m4

m4/unlocked-io.m4

m4/usleep.m4

m4/version-etc.m4

m4/warn-on-use.m4

m4/warnings.m4

m4/wchar_h.m4

m4/wchar_t.m4

m4/wcrtomb.m4

m4/wctob.m4

m4/wctype_h.m4

m4/wcwidth.m4

m4/wint_t.m4

m4/xalloc.m4

m4/xstrndup.m4

m4/xstrtol.m4

maint.mk

po/ca.gmo

po/ca.po

po/el.gmo

po/el.po

po/fi.gmo

po/fi.po

po/ga.gmo

po/ga.po

po/hu.gmo

po/hu.po

po/it.gmo

po/it.po

po/lv.gmo

po/lv.po

po/ms.gmo

po/ms.po

po/nl.gmo

po/nl.po

po/pt_BR.gmo

po/pt_BR.po

po/ro.gmo

po/ro.po

po/sr.gmo

po/sr.po

po/stamp-po

po/uk.gmo

po/uk.po

po/vi.gmo

po/vi.po

po/zh_CN.gmo

po/zh_CN.po

tests

tests/Makefile.am

tests/Makefile.in

tests/basic

tests/binary

tests/function-line-vs-leading-space

tests/help-version

tests/init.sh

tests/label-vs-func

tests/no-newline-at-eof

tests/stdin

tests/t-local.sh

files removed:
INSTALLME

config

config.hin

config/config.guess

config/config.rpath

config/config.sub

config/depcomp

config/install-sh

config/mdate-sh

config/missing

config/mkinstalldirs

config/texinfo.tex

lib/fnmatch.hin

lib/freesoft.c

lib/freesoft.h

lib/getopt.h

lib/posix

lib/posix/Makefile.am

lib/posix/Makefile.in

lib/posix/regex.h

lib/posixver.c

lib/posixver.h

lib/quotesys.c

lib/quotesys.h

lib/realloc.c

lib/regex.hin

lib/setmode.c

lib/setmode.h

lib/waitpid.c

m4/Makefile.am

m4/Makefile.am.in

m4/Makefile.in

m4/README

m4/c-bs-a.m4

m4/check-decl.m4

m4/gettext.m4

m4/glibc21.m4

m4/gnu-source.m4

m4/jm-glibc-io.m4

m4/lcmessage.m4

m4/lib-ld.m4

m4/lib-prefix.m4

m4/longlong.m4

m4/prereq.m4

m4/realloc.m4

m4/setmode.m4

m4/st_mtim.m4

m4/xstrtoumax.m4

po/en_GB.gmo

po/en_GB.po

src/version.c

files modified:
ABOUT-NLS

AUTHORS

COPYING

ChangeLog

INSTALL

Makefile.am

Makefile.in

NEWS

README

THANKS

aclocal.m4

configure

configure.ac

debian/changelog

debian/control

debian/copyright

debian/rules

debian/watch

exgettext

lib/Makefile.am

lib/Makefile.in

lib/alloca.c

lib/basename.c

lib/c-stack.c

lib/c-stack.h

lib/cmpbuf.c

lib/cmpbuf.h

lib/dirname.h

lib/error.c

lib/error.h

lib/exclude.c

lib/exclude.h

lib/exitfail.c

lib/exitfail.h

lib/fnmatch.c

lib/getopt.c

lib/getopt1.c

lib/gettext.h

lib/hard-locale.c

lib/hard-locale.h

lib/imaxtostr.c

lib/inttostr.c

lib/inttostr.h

lib/malloc.c

lib/memchr.c

lib/mkstemp.c

lib/offtostr.c

lib/prepargs.c

lib/regex.c

lib/strcasecmp.c

lib/strftime.c

lib/strtoimax.c

lib/strtol.c

lib/strtoul.c

lib/strtoull.c

lib/tempname.c

lib/umaxtostr.c

lib/unlocked-io.h

lib/xalloc.h

lib/xmalloc.c

lib/xstrtol.c

lib/xstrtol.h

lib/xstrtoumax.c

m4/c-stack.m4

m4/codeset.m4

m4/dos.m4

m4/error.m4

m4/fnmatch.m4

m4/iconv.m4

m4/inttypes.m4

m4/lib-link.m4

m4/malloc.m4

m4/mbrtowc.m4

m4/mbstate_t.m4

m4/progtest.m4

m4/regex.m4

m4/stdbool.m4

m4/strftime.m4

m4/vararrays.m4

man/Makefile.am

man/Makefile.in

man/cmp.1

man/diff.1

man/diff3.1

man/sdiff.1

ms/Makefile.am

ms/Makefile.in

ms/README

ms/config.bat

ms/config.sed

ms/config.site

po/LINGUAS

po/Makefile.in.in

po/Makevars

po/POTFILES.in

po/Rules-quot

po/cs.gmo

po/cs.po

po/da.gmo

po/da.po

po/de.gmo

po/de.po

po/diffutils.pot

po/eo.gmo

po/eo.po

po/es.gmo

po/es.po

po/fr.gmo

po/fr.po

po/gl.gmo

po/gl.po

po/he.gmo

po/he.po

po/id.gmo

po/id.po

po/ja.gmo

po/ja.po

po/pl.gmo

po/pl.po

po/ru.gmo

po/ru.po

po/sv.gmo

po/sv.po

po/tr.gmo

po/tr.po

po/zh_TW.gmo

po/zh_TW.po

src/Makefile.am

src/Makefile.in

src/analyze.c

src/cmp.c

src/context.c

src/diff.c

src/diff.h

src/diff3.c

src/dir.c

src/ed.c

src/ifdef.c

src/io.c

src/normal.c

src/sdiff.c

src/side.c

src/system.h

src/util.c

Show diffs side-by-side

added added

removed removed

lib/hash.c

/* hash - hashing table processing.

Written by Jim Meyering, 1992.

This program is free software: you can redistribute it and/or modify

it under the terms of the GNU General Public License as published by

the Free Software Foundation; either version 3 of the License, or

(at your option) any later version.

This program is distributed in the hope that it will be useful,

but WITHOUT ANY WARRANTY; without even the implied warranty of

MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

GNU General Public License for more details.

You should have received a copy of the GNU General Public License

along with this program. If not, see <http://www.gnu.org/licenses/>. */

/* A generic hash table package. */

/* Define USE_OBSTACK to 1 if you want the allocator to use obstacks instead

of malloc. If you change USE_OBSTACK, you have to recompile! */

#include <config.h>

#include "hash.h"

#include "bitrotate.h"

#include "xalloc.h"

#include <stdint.h>

#include <stdio.h>

#include <stdlib.h>

#if USE_OBSTACK

# include "obstack.h"

# ifndef obstack_chunk_alloc

# define obstack_chunk_alloc malloc

# endif

# ifndef obstack_chunk_free

# define obstack_chunk_free free

# endif

#endif

struct hash_entry

{

void *data;

struct hash_entry *next;

};

struct hash_table

{

/* The array of buckets starts at BUCKET and extends to BUCKET_LIMIT-1,

for a possibility of N_BUCKETS. Among those, N_BUCKETS_USED buckets

are not empty, there are N_ENTRIES active entries in the table. */

struct hash_entry *bucket;

struct hash_entry const *bucket_limit;

size_t n_buckets;

size_t n_buckets_used;

size_t n_entries;

/* Tuning arguments, kept in a physically separate structure. */

const Hash_tuning *tuning;

/* Three functions are given to `hash_initialize', see the documentation

block for this function. In a word, HASHER randomizes a user entry

into a number up from 0 up to some maximum minus 1; COMPARATOR returns

true if two user entries compare equally; and DATA_FREER is the cleanup

function for a user entry. */

Hash_hasher hasher;

Hash_comparator comparator;

Hash_data_freer data_freer;

/* A linked list of freed struct hash_entry structs. */

struct hash_entry *free_entry_list;

#if USE_OBSTACK

/* Whenever obstacks are used, it is possible to allocate all overflowed

entries into a single stack, so they all can be freed in a single

operation. It is not clear if the speedup is worth the trouble. */

struct obstack entry_stack;

#endif

};

/* A hash table contains many internal entries, each holding a pointer to

some user-provided data (also called a user entry). An entry indistinctly

refers to both the internal entry and its associated user entry. A user

entry contents may be hashed by a randomization function (the hashing

function, or just `hasher' for short) into a number (or `slot') between 0

and the current table size. At each slot position in the hash table,

starts a linked chain of entries for which the user data all hash to this

slot. A bucket is the collection of all entries hashing to the same slot.

A good `hasher' function will distribute entries rather evenly in buckets.

In the ideal case, the length of each bucket is roughly the number of

entries divided by the table size. Finding the slot for a data is usually

done in constant time by the `hasher', and the later finding of a precise

entry is linear in time with the size of the bucket. Consequently, a

100

larger hash table size (that is, a larger number of buckets) is prone to

101

yielding shorter chains, *given* the `hasher' function behaves properly.

102

103

Long buckets slow down the lookup algorithm. One might use big hash table

104

sizes in hope to reduce the average length of buckets, but this might

105

become inordinate, as unused slots in the hash table take some space. The

106

best bet is to make sure you are using a good `hasher' function (beware

107

that those are not that easy to write! :-), and to use a table size

108

larger than the actual number of entries. */

109

110

/* If an insertion makes the ratio of nonempty buckets to table size larger

111

than the growth threshold (a number between 0.0 and 1.0), then increase

112

the table size by multiplying by the growth factor (a number greater than

113

1.0). The growth threshold defaults to 0.8, and the growth factor

114

defaults to 1.414, meaning that the table will have doubled its size

115

every second time 80% of the buckets get used. */

116

#define DEFAULT_GROWTH_THRESHOLD 0.8

117

#define DEFAULT_GROWTH_FACTOR 1.414

118

119

/* If a deletion empties a bucket and causes the ratio of used buckets to

120

table size to become smaller than the shrink threshold (a number between

121

0.0 and 1.0), then shrink the table by multiplying by the shrink factor (a

122

number greater than the shrink threshold but smaller than 1.0). The shrink

123

threshold and factor default to 0.0 and 1.0, meaning that the table never

124

shrinks. */

125

#define DEFAULT_SHRINK_THRESHOLD 0.0

126

#define DEFAULT_SHRINK_FACTOR 1.0

127

128

/* Use this to initialize or reset a TUNING structure to

129

some sensible values. */

130

static const Hash_tuning default_tuning =

131

{

132

DEFAULT_SHRINK_THRESHOLD,

133

DEFAULT_SHRINK_FACTOR,

134

DEFAULT_GROWTH_THRESHOLD,

135

DEFAULT_GROWTH_FACTOR,

136

false

137

};

138

139

/* Information and lookup. */

140

141

/* The following few functions provide information about the overall hash

142

table organization: the number of entries, number of buckets and maximum

143

length of buckets. */

144

145

/* Return the number of buckets in the hash table. The table size, the total

146

number of buckets (used plus unused), or the maximum number of slots, are

147

the same quantity. */

148

149

size_t

150

hash_get_n_buckets (const Hash_table *table)

151

{

152

return table->n_buckets;

153

}

154

155

/* Return the number of slots in use (non-empty buckets). */

156

157

size_t

158

hash_get_n_buckets_used (const Hash_table *table)

159

{

160

return table->n_buckets_used;

161

}

162

163

/* Return the number of active entries. */

164

165

size_t

166

hash_get_n_entries (const Hash_table *table)

167

{

168

return table->n_entries;

169

}

170

171

/* Return the length of the longest chain (bucket). */

172

173

size_t

174

hash_get_max_bucket_length (const Hash_table *table)

175

{

176

struct hash_entry const *bucket;

177

size_t max_bucket_length = 0;

178

179

for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)

180

{

181

if (bucket->data)

182

{

183

struct hash_entry const *cursor = bucket;

184

size_t bucket_length = 1;

185

186

while (cursor = cursor->next, cursor)

187

bucket_length++;

188

189

if (bucket_length > max_bucket_length)

190

max_bucket_length = bucket_length;

191

}

192

}

193

194

return max_bucket_length;

195

}

196

197

/* Do a mild validation of a hash table, by traversing it and checking two

198

statistics. */

199

200

bool

201

hash_table_ok (const Hash_table *table)

202

{

203

struct hash_entry const *bucket;

204

size_t n_buckets_used = 0;

205

size_t n_entries = 0;

206

207

for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)

208

{

209

if (bucket->data)

210

{

211

struct hash_entry const *cursor = bucket;

212

213

/* Count bucket head. */

214

n_buckets_used++;

215

n_entries++;

216

217

/* Count bucket overflow. */

218

while (cursor = cursor->next, cursor)

219

n_entries++;

220

}

221

}

222

223

if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries)

224

return true;

225

226

return false;

227

}

228

229

void

230

hash_print_statistics (const Hash_table *table, FILE *stream)

231

{

232

size_t n_entries = hash_get_n_entries (table);

233

size_t n_buckets = hash_get_n_buckets (table);

234

size_t n_buckets_used = hash_get_n_buckets_used (table);

235

size_t max_bucket_length = hash_get_max_bucket_length (table);

236

237

fprintf (stream, "# entries: %lu\n", (unsigned long int) n_entries);

238

fprintf (stream, "# buckets: %lu\n", (unsigned long int) n_buckets);

239

fprintf (stream, "# buckets used: %lu (%.2f%%)\n",

240

(unsigned long int) n_buckets_used,

241

(100.0 * n_buckets_used) / n_buckets);

242

fprintf (stream, "max bucket length: %lu\n",

243

(unsigned long int) max_bucket_length);

244

}

245

246

/* If ENTRY matches an entry already in the hash table, return the

247

entry from the table. Otherwise, return NULL. */

248

249

void *

250

hash_lookup (const Hash_table *table, const void *entry)

251

{

252

struct hash_entry const *bucket

253

= table->bucket + table->hasher (entry, table->n_buckets);

254

struct hash_entry const *cursor;

255

256

if (! (bucket < table->bucket_limit))

257

abort ();

258

259

if (bucket->data == NULL)

260

return NULL;

261

262

for (cursor = bucket; cursor; cursor = cursor->next)

263

if (entry == cursor->data || table->comparator (entry, cursor->data))

264

return cursor->data;

265

266

return NULL;

267

}

268

269

/* Walking. */

270

271

/* The functions in this page traverse the hash table and process the

272

contained entries. For the traversal to work properly, the hash table

273

should not be resized nor modified while any particular entry is being

274

processed. In particular, entries should not be added, and an entry

275

may be removed only if there is no shrink threshold and the entry being

276

removed has already been passed to hash_get_next. */

277

278

/* Return the first data in the table, or NULL if the table is empty. */

279

280

void *

281

hash_get_first (const Hash_table *table)

282

{

283

struct hash_entry const *bucket;

284

285

if (table->n_entries == 0)

286

return NULL;

287

288

for (bucket = table->bucket; ; bucket++)

289

if (! (bucket < table->bucket_limit))

290

abort ();

291

else if (bucket->data)

292

return bucket->data;

293

}

294

295

/* Return the user data for the entry following ENTRY, where ENTRY has been

296

returned by a previous call to either `hash_get_first' or `hash_get_next'.

297

Return NULL if there are no more entries. */

298

299

void *

300

hash_get_next (const Hash_table *table, const void *entry)

301

{

302

struct hash_entry const *bucket

303

= table->bucket + table->hasher (entry, table->n_buckets);

304

struct hash_entry const *cursor;

305

306

if (! (bucket < table->bucket_limit))

307

abort ();

308

309

/* Find next entry in the same bucket. */

310

for (cursor = bucket; cursor; cursor = cursor->next)

311

if (cursor->data == entry && cursor->next)

312

return cursor->next->data;

313

314

/* Find first entry in any subsequent bucket. */

315

while (++bucket < table->bucket_limit)

316

if (bucket->data)

317

return bucket->data;

318

319

/* None found. */

320

return NULL;

321

}

322

323

/* Fill BUFFER with pointers to active user entries in the hash table, then

324

return the number of pointers copied. Do not copy more than BUFFER_SIZE

325

pointers. */

326

327

size_t

328

hash_get_entries (const Hash_table *table, void **buffer,

329

size_t buffer_size)

330

{

331

size_t counter = 0;

332

struct hash_entry const *bucket;

333

struct hash_entry const *cursor;

334

335

for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)

336

{

337

if (bucket->data)

338

{

339

for (cursor = bucket; cursor; cursor = cursor->next)

340

{

341

if (counter >= buffer_size)

342

return counter;

343

buffer[counter++] = cursor->data;

344

}

345

}

346

}

347

348

return counter;

349

}

350

351

/* Call a PROCESSOR function for each entry of a hash table, and return the

352

number of entries for which the processor function returned success. A

353

pointer to some PROCESSOR_DATA which will be made available to each call to

354

the processor function. The PROCESSOR accepts two arguments: the first is

355

the user entry being walked into, the second is the value of PROCESSOR_DATA

356

as received. The walking continue for as long as the PROCESSOR function

357

returns nonzero. When it returns zero, the walking is interrupted. */

358

359

size_t

360

hash_do_for_each (const Hash_table *table, Hash_processor processor,

361

void *processor_data)

362

{

363

size_t counter = 0;

364

struct hash_entry const *bucket;

365

struct hash_entry const *cursor;

366

367

for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)

368

{

369

if (bucket->data)

370

{

371

for (cursor = bucket; cursor; cursor = cursor->next)

372

{

373

if (! processor (cursor->data, processor_data))

374

return counter;

375

counter++;

376

}

377

}

378

}

379

380

return counter;

381

}

382

383

/* Allocation and clean-up. */

384

385

/* Return a hash index for a NUL-terminated STRING between 0 and N_BUCKETS-1.

386

This is a convenience routine for constructing other hashing functions. */

387

388

#if USE_DIFF_HASH

389

390

/* About hashings, Paul Eggert writes to me (FP), on 1994-01-01: "Please see

391

B. J. McKenzie, R. Harries & T. Bell, Selecting a hashing algorithm,

392

Software--practice & experience 20, 2 (Feb 1990), 209-224. Good hash

393

algorithms tend to be domain-specific, so what's good for [diffutils'] io.c

394

may not be good for your application." */

395

396

size_t

397

hash_string (const char *string, size_t n_buckets)

398

{

399

# define HASH_ONE_CHAR(Value, Byte) \

400

((Byte) + rotl_sz (Value, 7))

401

402

size_t value = 0;

403

unsigned char ch;

404

405

for (; (ch = *string); string++)

406

value = HASH_ONE_CHAR (value, ch);

407

return value % n_buckets;

408

409

# undef HASH_ONE_CHAR

410

}

411

412

#else /* not USE_DIFF_HASH */

413

414

/* This one comes from `recode', and performs a bit better than the above as

415

per a few experiments. It is inspired from a hashing routine found in the

416

very old Cyber `snoop', itself written in typical Greg Mansfield style.

417

(By the way, what happened to this excellent man? Is he still alive?) */

418

419

size_t

420

hash_string (const char *string, size_t n_buckets)

421

{

422

size_t value = 0;

423

unsigned char ch;

424

425

for (; (ch = *string); string++)

426

value = (value * 31 + ch) % n_buckets;

427

return value;

428

}

429

430

#endif /* not USE_DIFF_HASH */

431

432

/* Return true if CANDIDATE is a prime number. CANDIDATE should be an odd

433

number at least equal to 11. */

434

435

static bool

436

is_prime (size_t candidate)

437

{

438

size_t divisor = 3;

439

size_t square = divisor * divisor;

440

441

while (square < candidate && (candidate % divisor))

442

{

443

divisor++;

444

square += 4 * divisor;

445

divisor++;

446

}

447

448

return (candidate % divisor ? true : false);

449

}

450

451

/* Round a given CANDIDATE number up to the nearest prime, and return that

452

prime. Primes lower than 10 are merely skipped. */

453

454

static size_t

455

next_prime (size_t candidate)

456

{

457

/* Skip small primes. */

458

if (candidate < 10)

459

candidate = 10;

460

461

/* Make it definitely odd. */

462

candidate |= 1;

463

464

while (SIZE_MAX != candidate && !is_prime (candidate))

465

candidate += 2;

466

467

return candidate;

468

}

469

470

void

471

hash_reset_tuning (Hash_tuning *tuning)

472

{

473

*tuning = default_tuning;

474

}

475

476

/* If the user passes a NULL hasher, we hash the raw pointer. */

477

static size_t

478

raw_hasher (const void *data, size_t n)

479

{

480

/* When hashing unique pointers, it is often the case that they were

481

generated by malloc and thus have the property that the low-order

482

bits are 0. As this tends to give poorer performance with small

483

tables, we rotate the pointer value before performing division,

484

in an attempt to improve hash quality. */

485

size_t val = rotr_sz ((size_t) data, 3);

486

return val % n;

487

}

488

489

/* If the user passes a NULL comparator, we use pointer comparison. */

490

static bool

491

raw_comparator (const void *a, const void *b)

492

{

493

return a == b;

494

}

495

496

497

/* For the given hash TABLE, check the user supplied tuning structure for

498

reasonable values, and return true if there is no gross error with it.

499

Otherwise, definitively reset the TUNING field to some acceptable default

500

in the hash table (that is, the user loses the right of further modifying

501

tuning arguments), and return false. */

502

503

static bool

504

check_tuning (Hash_table *table)

505

{

506

const Hash_tuning *tuning = table->tuning;

507

float epsilon;

508

if (tuning == &default_tuning)

509

return true;

510

511

/* Be a bit stricter than mathematics would require, so that

512

rounding errors in size calculations do not cause allocations to

513

fail to grow or shrink as they should. The smallest allocation

514

is 11 (due to next_prime's algorithm), so an epsilon of 0.1

515

should be good enough. */

516

epsilon = 0.1f;

517

518

if (epsilon < tuning->growth_threshold

519

&& tuning->growth_threshold < 1 - epsilon

520

&& 1 + epsilon < tuning->growth_factor

521

&& 0 <= tuning->shrink_threshold

522

&& tuning->shrink_threshold + epsilon < tuning->shrink_factor

523

&& tuning->shrink_factor <= 1

524

&& tuning->shrink_threshold + epsilon < tuning->growth_threshold)

525

return true;

526

527

table->tuning = &default_tuning;

528

return false;

529

}

530

531

/* Compute the size of the bucket array for the given CANDIDATE and

532

TUNING, or return 0 if there is no possible way to allocate that

533

many entries. */

534

535

static size_t

536

compute_bucket_size (size_t candidate, const Hash_tuning *tuning)

537

{

538

if (!tuning->is_n_buckets)

539

{

540

float new_candidate = candidate / tuning->growth_threshold;

541

if (SIZE_MAX <= new_candidate)

542

return 0;

543

candidate = new_candidate;

544

}

545

candidate = next_prime (candidate);

546

if (xalloc_oversized (candidate, sizeof (struct hash_entry *)))

547

return 0;

548

return candidate;

549

}

550

551

/* Allocate and return a new hash table, or NULL upon failure. The initial

552

number of buckets is automatically selected so as to _guarantee_ that you

553

may insert at least CANDIDATE different user entries before any growth of

554

the hash table size occurs. So, if have a reasonably tight a-priori upper

555

bound on the number of entries you intend to insert in the hash table, you

556

may save some table memory and insertion time, by specifying it here. If

557

the IS_N_BUCKETS field of the TUNING structure is true, the CANDIDATE

558

argument has its meaning changed to the wanted number of buckets.

559

560

TUNING points to a structure of user-supplied values, in case some fine

561

tuning is wanted over the default behavior of the hasher. If TUNING is

562

NULL, the default tuning parameters are used instead. If TUNING is

563

provided but the values requested are out of bounds or might cause

564

rounding errors, return NULL.

565

566

The user-supplied HASHER function, when not NULL, accepts two

567

arguments ENTRY and TABLE_SIZE. It computes, by hashing ENTRY contents, a

568

slot number for that entry which should be in the range 0..TABLE_SIZE-1.

569

This slot number is then returned.

570

571

The user-supplied COMPARATOR function, when not NULL, accepts two

572

arguments pointing to user data, it then returns true for a pair of entries

573

that compare equal, or false otherwise. This function is internally called

574

on entries which are already known to hash to the same bucket index,

575

but which are distinct pointers.

576

577

The user-supplied DATA_FREER function, when not NULL, may be later called

578

with the user data as an argument, just before the entry containing the

579

data gets freed. This happens from within `hash_free' or `hash_clear'.

580

You should specify this function only if you want these functions to free

581

all of your `data' data. This is typically the case when your data is

582

simply an auxiliary struct that you have malloc'd to aggregate several

583

values. */

584

585

Hash_table *

586

hash_initialize (size_t candidate, const Hash_tuning *tuning,

587

Hash_hasher hasher, Hash_comparator comparator,

588

Hash_data_freer data_freer)

589

{

590

Hash_table *table;

591

592

if (hasher == NULL)

593

hasher = raw_hasher;

594

if (comparator == NULL)

595

comparator = raw_comparator;

596

597

table = malloc (sizeof *table);

598

if (table == NULL)

599

return NULL;

600

601

if (!tuning)

602

tuning = &default_tuning;

603

table->tuning = tuning;

604

if (!check_tuning (table))

605

{

606

/* Fail if the tuning options are invalid. This is the only occasion

607

when the user gets some feedback about it. Once the table is created,

608

if the user provides invalid tuning options, we silently revert to

609

using the defaults, and ignore further request to change the tuning

610

options. */

611

goto fail;

612

}

613

614

table->n_buckets = compute_bucket_size (candidate, tuning);

615

if (!table->n_buckets)

616

goto fail;

617

618

table->bucket = calloc (table->n_buckets, sizeof *table->bucket);

619

if (table->bucket == NULL)

620

goto fail;

621

table->bucket_limit = table->bucket + table->n_buckets;

622

table->n_buckets_used = 0;

623

table->n_entries = 0;

624

625

table->hasher = hasher;

626

table->comparator = comparator;

627

table->data_freer = data_freer;

628

629

table->free_entry_list = NULL;

630

#if USE_OBSTACK

631

obstack_init (&table->entry_stack);

632

#endif

633

return table;

634

635

fail:

636

free (table);

637

return NULL;

638

}

639

640

/* Make all buckets empty, placing any chained entries on the free list.

641

Apply the user-specified function data_freer (if any) to the datas of any

642

affected entries. */

643

644

void

645

hash_clear (Hash_table *table)

646

{

647

struct hash_entry *bucket;

648

649

for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)

650

{

651

if (bucket->data)

652

{

653

struct hash_entry *cursor;

654

struct hash_entry *next;

655

656

/* Free the bucket overflow. */

657

for (cursor = bucket->next; cursor; cursor = next)

658

{

659

if (table->data_freer)

660

table->data_freer (cursor->data);

661

cursor->data = NULL;

662

663

next = cursor->next;

664

/* Relinking is done one entry at a time, as it is to be expected

665

that overflows are either rare or short. */

666

cursor->next = table->free_entry_list;

667

table->free_entry_list = cursor;

668

}

669

670

/* Free the bucket head. */

671

if (table->data_freer)

672

table->data_freer (bucket->data);

673

bucket->data = NULL;

674

bucket->next = NULL;

675

}

676

}

677

678

table->n_buckets_used = 0;

679

table->n_entries = 0;

680

}

681

682

/* Reclaim all storage associated with a hash table. If a data_freer

683

function has been supplied by the user when the hash table was created,

684

this function applies it to the data of each entry before freeing that

685

entry. */

686

687

void

688

hash_free (Hash_table *table)

689

{

690

struct hash_entry *bucket;

691

struct hash_entry *cursor;

692

struct hash_entry *next;

693

694

/* Call the user data_freer function. */

695

if (table->data_freer && table->n_entries)

696

{

697

for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)

698

{

699

if (bucket->data)

700

{

701

for (cursor = bucket; cursor; cursor = cursor->next)

702

table->data_freer (cursor->data);

703

}

704

}

705

}

706

707

#if USE_OBSTACK

708

709

obstack_free (&table->entry_stack, NULL);

710

711

#else

712

713

/* Free all bucket overflowed entries. */

714

for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)

715

{

716

for (cursor = bucket->next; cursor; cursor = next)

717

{

718

next = cursor->next;

719

free (cursor);

720

}

721

}

722

723

/* Also reclaim the internal list of previously freed entries. */

724

for (cursor = table->free_entry_list; cursor; cursor = next)

725

{

726

next = cursor->next;

727

free (cursor);

728

}

729

730

#endif

731

732

/* Free the remainder of the hash table structure. */

733

free (table->bucket);

734

free (table);

735

}

736

737

/* Insertion and deletion. */

738

739

/* Get a new hash entry for a bucket overflow, possibly by recycling a

740

previously freed one. If this is not possible, allocate a new one. */

741

742

static struct hash_entry *

743

allocate_entry (Hash_table *table)

744

{

745

struct hash_entry *new;

746

747

if (table->free_entry_list)

748

{

749

new = table->free_entry_list;

750

table->free_entry_list = new->next;

751

}

752

else

753

{

754

#if USE_OBSTACK

755

new = obstack_alloc (&table->entry_stack, sizeof *new);

756

#else

757

new = malloc (sizeof *new);

758

#endif

759

}

760

761

return new;

762

}

763

764

/* Free a hash entry which was part of some bucket overflow,

765

saving it for later recycling. */

766

767

static void

768

free_entry (Hash_table *table, struct hash_entry *entry)

769

{

770

entry->data = NULL;

771

entry->next = table->free_entry_list;

772

table->free_entry_list = entry;

773

}

774

775

/* This private function is used to help with insertion and deletion. When

776

ENTRY matches an entry in the table, return a pointer to the corresponding

777

user data and set *BUCKET_HEAD to the head of the selected bucket.

778

Otherwise, return NULL. When DELETE is true and ENTRY matches an entry in

779

the table, unlink the matching entry. */

780

781

static void *

782

hash_find_entry (Hash_table *table, const void *entry,

783

struct hash_entry **bucket_head, bool delete)

784

{

785

struct hash_entry *bucket

786

= table->bucket + table->hasher (entry, table->n_buckets);

787

struct hash_entry *cursor;

788

789

if (! (bucket < table->bucket_limit))

790

abort ();

791

792

*bucket_head = bucket;

793

794

/* Test for empty bucket. */

795

if (bucket->data == NULL)

796

return NULL;

797

798

/* See if the entry is the first in the bucket. */

799

if (entry == bucket->data || table->comparator (entry, bucket->data))

800

{

801

void *data = bucket->data;

802

803

if (delete)

804

{

805

if (bucket->next)

806

{

807

struct hash_entry *next = bucket->next;

808

809

/* Bump the first overflow entry into the bucket head, then save

810

the previous first overflow entry for later recycling. */

811

*bucket = *next;

812

free_entry (table, next);

813

}

814

else

815

{

816

bucket->data = NULL;

817

}

818

}

819

820

return data;

821

}

822

823

/* Scan the bucket overflow. */

824

for (cursor = bucket; cursor->next; cursor = cursor->next)

825

{

826

if (entry == cursor->next->data

827

|| table->comparator (entry, cursor->next->data))

828

{

829

void *data = cursor->next->data;

830

831

if (delete)

832

{

833

struct hash_entry *next = cursor->next;

834

835

/* Unlink the entry to delete, then save the freed entry for later

836

recycling. */

837

cursor->next = next->next;

838

free_entry (table, next);

839

}

840

841

return data;

842

}

843

}

844

845

/* No entry found. */

846

return NULL;

847

}

848

849

/* Internal helper, to move entries from SRC to DST. Both tables must

850

share the same free entry list. If SAFE, only move overflow

851

entries, saving bucket heads for later, so that no allocations will

852

occur. Return false if the free entry list is exhausted and an

853

allocation fails. */

854

855

static bool

856

transfer_entries (Hash_table *dst, Hash_table *src, bool safe)

857

{

858

struct hash_entry *bucket;

859

struct hash_entry *cursor;

860

struct hash_entry *next;

861

for (bucket = src->bucket; bucket < src->bucket_limit; bucket++)

862

if (bucket->data)

863

{

864

void *data;

865

struct hash_entry *new_bucket;

866

867

/* Within each bucket, transfer overflow entries first and

868

then the bucket head, to minimize memory pressure. After

869

all, the only time we might allocate is when moving the

870

bucket head, but moving overflow entries first may create

871

free entries that can be recycled by the time we finally

872

get to the bucket head. */

873

for (cursor = bucket->next; cursor; cursor = next)

874

{

875

data = cursor->data;

876

new_bucket = (dst->bucket + dst->hasher (data, dst->n_buckets));

877

878

if (! (new_bucket < dst->bucket_limit))

879

abort ();

880

881

next = cursor->next;

882

883

if (new_bucket->data)

884

{

885

/* Merely relink an existing entry, when moving from a

886

bucket overflow into a bucket overflow. */

887

cursor->next = new_bucket->next;

888

new_bucket->next = cursor;

889

}

890

else

891

{

892

/* Free an existing entry, when moving from a bucket

893

overflow into a bucket header. */

894

new_bucket->data = data;

895

dst->n_buckets_used++;

896

free_entry (dst, cursor);

897

}

898

}

899

/* Now move the bucket head. Be sure that if we fail due to

900

allocation failure that the src table is in a consistent

901

state. */

902

data = bucket->data;

903

bucket->next = NULL;

904

if (safe)

905

continue;

906

new_bucket = (dst->bucket + dst->hasher (data, dst->n_buckets));

907

908

if (! (new_bucket < dst->bucket_limit))

909

abort ();

910

911

if (new_bucket->data)

912

{

913

/* Allocate or recycle an entry, when moving from a bucket

914

header into a bucket overflow. */

915

struct hash_entry *new_entry = allocate_entry (dst);

916

917

if (new_entry == NULL)

918

return false;

919

920

new_entry->data = data;

921

new_entry->next = new_bucket->next;

922

new_bucket->next = new_entry;

923

}

924

else

925

{

926

/* Move from one bucket header to another. */

927

new_bucket->data = data;

928

dst->n_buckets_used++;

929

}

930

bucket->data = NULL;

931

src->n_buckets_used--;

932

}

933

return true;

934

}

935

936

/* For an already existing hash table, change the number of buckets through

937

specifying CANDIDATE. The contents of the hash table are preserved. The

938

new number of buckets is automatically selected so as to _guarantee_ that

939

the table may receive at least CANDIDATE different user entries, including

940

those already in the table, before any other growth of the hash table size

941

occurs. If TUNING->IS_N_BUCKETS is true, then CANDIDATE specifies the

942

exact number of buckets desired. Return true iff the rehash succeeded. */

943

944

bool

945

hash_rehash (Hash_table *table, size_t candidate)

946

{

947

Hash_table storage;

948

Hash_table *new_table;

949

size_t new_size = compute_bucket_size (candidate, table->tuning);

950

951

if (!new_size)

952

return false;

953

if (new_size == table->n_buckets)

954

return true;

955

new_table = &storage;

956

new_table->bucket = calloc (new_size, sizeof *new_table->bucket);

957

if (new_table->bucket == NULL)

958

return false;

959

new_table->n_buckets = new_size;

960

new_table->bucket_limit = new_table->bucket + new_size;

961

new_table->n_buckets_used = 0;

962

new_table->n_entries = 0;

963

new_table->tuning = table->tuning;

964

new_table->hasher = table->hasher;

965

new_table->comparator = table->comparator;

966

new_table->data_freer = table->data_freer;

967

968

/* In order for the transfer to successfully complete, we need

969

additional overflow entries when distinct buckets in the old

970

table collide into a common bucket in the new table. The worst

971

case possible is a hasher that gives a good spread with the old

972

size, but returns a constant with the new size; if we were to

973

guarantee table->n_buckets_used-1 free entries in advance, then

974

the transfer would be guaranteed to not allocate memory.

975

However, for large tables, a guarantee of no further allocation

976

introduces a lot of extra memory pressure, all for an unlikely

977

corner case (most rehashes reduce, rather than increase, the

978

number of overflow entries needed). So, we instead ensure that

979

the transfer process can be reversed if we hit a memory

980

allocation failure mid-transfer. */

981

982

/* Merely reuse the extra old space into the new table. */

983

#if USE_OBSTACK

984

new_table->entry_stack = table->entry_stack;

985

#endif

986

new_table->free_entry_list = table->free_entry_list;

987

988

if (transfer_entries (new_table, table, false))

989

{

990

/* Entries transferred successfully; tie up the loose ends. */

991

free (table->bucket);

992

table->bucket = new_table->bucket;

993

table->bucket_limit = new_table->bucket_limit;

994

table->n_buckets = new_table->n_buckets;

995

table->n_buckets_used = new_table->n_buckets_used;

996

table->free_entry_list = new_table->free_entry_list;

997

/* table->n_entries and table->entry_stack already hold their value. */

998

return true;

999

}

1000

1001

/* We've allocated new_table->bucket (and possibly some entries),

1002

exhausted the free list, and moved some but not all entries into

1003

new_table. We must undo the partial move before returning

1004

failure. The only way to get into this situation is if new_table

1005

uses fewer buckets than the old table, so we will reclaim some

1006

free entries as overflows in the new table are put back into

1007

distinct buckets in the old table.

1008

1009

There are some pathological cases where a single pass through the

1010

table requires more intermediate overflow entries than using two

1011

passes. Two passes give worse cache performance and takes

1012

longer, but at this point, we're already out of memory, so slow

1013

and safe is better than failure. */

1014

table->free_entry_list = new_table->free_entry_list;

1015

if (! (transfer_entries (table, new_table, true)

1016

&& transfer_entries (table, new_table, false)))

1017

abort ();

1018

/* table->n_entries already holds its value. */

1019

free (new_table->bucket);

1020

return false;

1021

}

1022

1023

/* If ENTRY matches an entry already in the hash table, return the pointer

1024

to the entry from the table. Otherwise, insert ENTRY and return ENTRY.

1025

Return NULL if the storage required for insertion cannot be allocated.

1026

This implementation does not support duplicate entries or insertion of

1027

NULL. */

1028

1029

void *

1030

hash_insert (Hash_table *table, const void *entry)

1031

{

1032

void *data;

1033

struct hash_entry *bucket;

1034

1035

/* The caller cannot insert a NULL entry. */

1036

if (! entry)

1037

abort ();

1038

1039

/* If there's a matching entry already in the table, return that. */

1040

if ((data = hash_find_entry (table, entry, &bucket, false)) != NULL)

1041

return data;

1042

1043

/* If the growth threshold of the buckets in use has been reached, increase

1044

the table size and rehash. There's no point in checking the number of

1045

entries: if the hashing function is ill-conditioned, rehashing is not

1046

likely to improve it. */

1047

1048

if (table->n_buckets_used

1049

> table->tuning->growth_threshold * table->n_buckets)

1050

{

1051

/* Check more fully, before starting real work. If tuning arguments

1052

became invalid, the second check will rely on proper defaults. */

1053

check_tuning (table);

1054

if (table->n_buckets_used

1055

> table->tuning->growth_threshold * table->n_buckets)

1056

{

1057

const Hash_tuning *tuning = table->tuning;

1058

float candidate =

1059

(tuning->is_n_buckets

1060

? (table->n_buckets * tuning->growth_factor)

1061

: (table->n_buckets * tuning->growth_factor

1062

* tuning->growth_threshold));

1063

1064

if (SIZE_MAX <= candidate)

1065

return NULL;

1066

1067

/* If the rehash fails, arrange to return NULL. */

1068

if (!hash_rehash (table, candidate))

1069

return NULL;

1070

1071

/* Update the bucket we are interested in. */

1072

if (hash_find_entry (table, entry, &bucket, false) != NULL)

1073

abort ();

1074

}

1075

}

1076

1077

/* ENTRY is not matched, it should be inserted. */

1078

1079

if (bucket->data)

1080

{

1081

struct hash_entry *new_entry = allocate_entry (table);

1082

1083

if (new_entry == NULL)

1084

return NULL;

1085

1086

/* Add ENTRY in the overflow of the bucket. */

1087

1088

new_entry->data = (void *) entry;

1089

new_entry->next = bucket->next;

1090

bucket->next = new_entry;

1091

table->n_entries++;

1092

return (void *) entry;

1093

}

1094

1095

/* Add ENTRY right in the bucket head. */

1096

1097

bucket->data = (void *) entry;

1098

table->n_entries++;

1099

table->n_buckets_used++;

1100

1101

return (void *) entry;

1102

}

1103

1104

/* If ENTRY is already in the table, remove it and return the just-deleted

1105

data (the user may want to deallocate its storage). If ENTRY is not in the

1106

table, don't modify the table and return NULL. */

1107

1108

void *

1109

hash_delete (Hash_table *table, const void *entry)

1110

{

1111

void *data;

1112

struct hash_entry *bucket;

1113

1114

data = hash_find_entry (table, entry, &bucket, true);

1115

if (!data)

1116

return NULL;

1117

1118

table->n_entries--;

1119

if (!bucket->data)

1120

{

1121

table->n_buckets_used--;

1122

1123

/* If the shrink threshold of the buckets in use has been reached,

1124

rehash into a smaller table. */

1125

1126

if (table->n_buckets_used

1127

< table->tuning->shrink_threshold * table->n_buckets)

1128

{

1129

/* Check more fully, before starting real work. If tuning arguments

1130

became invalid, the second check will rely on proper defaults. */

1131

check_tuning (table);

1132

if (table->n_buckets_used

1133

< table->tuning->shrink_threshold * table->n_buckets)

1134

{

1135

const Hash_tuning *tuning = table->tuning;

1136

size_t candidate =

1137

(tuning->is_n_buckets

1138

? table->n_buckets * tuning->shrink_factor

1139

: (table->n_buckets * tuning->shrink_factor

1140

* tuning->growth_threshold));

1141

1142

if (!hash_rehash (table, candidate))

1143

{

1144

/* Failure to allocate memory in an attempt to

1145

shrink the table is not fatal. But since memory

1146

is low, we can at least be kind and free any

1147

spare entries, rather than keeping them tied up

1148

in the free entry list. */

1149

#if ! USE_OBSTACK

1150

struct hash_entry *cursor = table->free_entry_list;

1151

struct hash_entry *next;

1152

while (cursor)

1153

{

1154

next = cursor->next;

1155

free (cursor);

1156

cursor = next;

1157

}

1158

table->free_entry_list = NULL;

1159

#endif

1160

}

1161

}

1162

}

1163

}

1164

1165

return data;

1166

}

1167

1168

/* Testing. */

1169

1170

#if TESTING

1171

1172

void

1173

hash_print (const Hash_table *table)

1174

{

1175

struct hash_entry *bucket = (struct hash_entry *) table->bucket;

1176

1177

for ( ; bucket < table->bucket_limit; bucket++)

1178

{

1179

struct hash_entry *cursor;

1180

1181

if (bucket)

1182

printf ("%lu:\n", (unsigned long int) (bucket - table->bucket));

1183

1184

for (cursor = bucket; cursor; cursor = cursor->next)

1185

{

1186

char const *s = cursor->data;

1187

/* FIXME */

1188

if (s)

1189

printf (" %s\n", s);

1190

}

1191

}

1192

}

1193

1194

#endif /* TESTING */

Older »