2
* sdbm - ndbm work-alike hashed database library
3
* based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
4
* author: oz@nexus.yorku.ca
5
* status: public domain.
11
/*char sdbm_rcsid[] = "$Id: _sdbm.c 11708 2007-02-12 23:01:19Z shyouhei $";*/
18
* sdbm - ndbm work-alike hashed database library
19
* tuning and portability constructs [not nearly enough]
20
* author: oz@nexus.yorku.ca
30
#define SEEK_SET L_SET
31
#define memset(s,c,n) bzero(s, n) /* only when c is zero */
32
#define memcpy(s1,s2,n) bcopy(s2, s1, n)
33
#define memcmp(s1,s2,n) bcmp(s1,s2,n)
37
* important tuning parms (hah)
40
#define SEEDUPS /* always detect duplicates */
41
#define BADMESS /* generate a message for worst case:
42
cannot make room after SPLTMAX splits */
47
#define debug(x) printf x
53
#define GET_SHORT(p, i) (((unsigned)((unsigned char *)(p))[(i)*2] << 8) + (((unsigned char *)(p))[(i)*2 + 1]))
54
#define PUT_SHORT(p, i, s) (((unsigned char *)(p))[(i)*2] = (unsigned char)((s) >> 8), ((unsigned char *)(p))[(i)*2 + 1] = (unsigned char)(s))
56
#define GET_SHORT(p, i) ((p)[i])
57
#define PUT_SHORT(p, i, s) ((p)[i] = (s))
61
static int fitpair proto((char *, int));
62
static void putpair proto((char *, datum, datum));
63
static datum getpair proto((char *, datum));
64
static int delpair proto((char *, datum));
65
static int chkpage proto((char *));
66
static datum getnkey proto((char *, int));
67
static void splpage proto((char *, char *, long));
69
static int duppair proto((char *, datum));
77
#include <sys/types.h>
83
/*#include <memory.h>*/
106
#if !defined sun && !defined MSDOS && !defined _WIN32 && !defined __CYGWIN__ && !defined(errno)
113
static int getdbit proto((DBM *, long));
114
static int setdbit proto((DBM *, long));
115
static int getpage proto((DBM *, long));
116
static datum getnext proto((DBM *));
117
static int makroom proto((DBM *, long, int));
122
#define bad(x) ((x).dptr == NULL || (x).dsize < 0)
123
#define exhash(item) sdbm_hash((item).dptr, (item).dsize)
124
#define ioerr(db) ((db)->flags |= DBM_IOERR)
126
#define OFF_PAG(off) (long) (off) * PBLKSIZ
127
#define OFF_DIR(off) (long) (off) * DBLKSIZ
129
static long masks[] = {
130
000000000000L, 000000000001L, 000000000003L,
131
000000000007L, 000000000017L, 000000000037L,
132
000000000077L, 000000000177L, 000000000377L,
133
000000000777L, 000000001777L, 000000003777L,
134
000000007777L, 000000017777L, 000000037777L,
135
000000077777L, 000000177777L, 000000377777L,
136
000000777777L, 000001777777L, 000003777777L,
137
000007777777L, 000017777777L, 000037777777L,
138
000077777777L, 000177777777L, 000377777777L,
139
000777777777L, 001777777777L, 003777777777L,
140
007777777777L, 017777777777L
143
datum nullitem = {NULL, 0};
146
sdbm_open(file, flags, mode)
152
register char *dirname;
153
register char *pagname;
156
if (file == NULL || !*file)
157
return errno = EINVAL, (DBM *) NULL;
159
* need space for two seperate filenames
161
n = strlen(file) * 2 + strlen(DIRFEXT) + strlen(PAGFEXT) + 2;
163
if ((dirname = malloc((unsigned) n)) == NULL)
164
return errno = ENOMEM, (DBM *) NULL;
166
* build the file names
168
dirname = strcat(strcpy(dirname, file), DIRFEXT);
169
pagname = strcpy(dirname + strlen(dirname) + 1, file);
170
pagname = strcat(pagname, PAGFEXT);
172
db = sdbm_prep(dirname, pagname, flags, mode);
173
free((char *) dirname);
178
sdbm_prep(dirname, pagname, flags, mode)
187
if ((db = (DBM *) malloc(sizeof(DBM))) == NULL)
188
return errno = ENOMEM, (DBM *) NULL;
195
* adjust user flags so that WRONLY becomes RDWR,
196
* as required by this package. Also set our internal
199
if (flags & O_WRONLY)
200
flags = (flags & ~O_WRONLY) | O_RDWR;
201
if (flags & O_RDONLY)
202
db->flags = DBM_RDONLY;
204
* open the files in sequence, and stat the dirfile.
205
* If we fail anywhere, undo everything, return NULL.
208
if ((db->pagf = open(pagname, flags, mode)) > -1) {
209
if ((db->dirf = open(dirname, flags, mode)) > -1) {
211
* need the dirfile size to establish max bit number.
213
if (fstat(db->dirf, &dstat) == 0) {
215
* zero size: either a fresh database, or one with a single,
216
* unsplit data page: dirpage is all zeros.
218
db->dirbno = (!dstat.st_size) ? 0 : -1;
220
db->maxbno = dstat.st_size * (long) BYTESIZ;
222
(void) memset(db->pagbuf, 0, PBLKSIZ);
223
(void) memset(db->dirbuf, 0, DBLKSIZ);
229
(void) close(db->dirf);
231
(void) close(db->pagf);
244
(void) close(db->dirf);
245
(void) close(db->pagf);
255
if (db == NULL || bad(key))
256
return errno = EINVAL, nullitem;
258
if (getpage(db, exhash(key)))
259
return getpair(db->pagbuf, key);
261
return ioerr(db), nullitem;
269
if (db == NULL || bad(key))
270
return errno = EINVAL, -1;
272
return errno = EPERM, -1;
274
if (getpage(db, exhash(key))) {
275
if (!delpair(db->pagbuf, key))
278
* update the page file
280
if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
281
|| write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
282
return ioerr(db), -1;
287
return ioerr(db), -1;
291
sdbm_store(db, key, val, flags)
300
if (db == NULL || bad(key))
301
return errno = EINVAL, -1;
303
return errno = EPERM, -1;
305
need = key.dsize + val.dsize;
307
* is the pair too big (or too small) for this database ??
309
if (need < 0 || need > PAIRMAX)
310
return errno = EINVAL, -1;
312
if (getpage(db, (hash = exhash(key)))) {
314
* if we need to replace, delete the key/data pair
315
* first. If it is not there, ignore.
317
if (flags == DBM_REPLACE)
318
(void) delpair(db->pagbuf, key);
320
else if (duppair(db->pagbuf, key))
324
* if we do not have enough room, we have to split.
326
if (!fitpair(db->pagbuf, need))
327
if (!makroom(db, hash, need))
328
return ioerr(db), -1;
330
* we have enough room or split is successful. insert the key,
331
* and update the page file.
333
(void) putpair(db->pagbuf, key, val);
335
if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
336
|| write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
337
return ioerr(db), -1;
344
return ioerr(db), -1;
348
* makroom - make room by splitting the overfull page
349
* this routine will attempt to make room for SPLTMAX times before
353
makroom(db, hash, need)
360
#if defined MSDOS || (defined _WIN32 && !defined __CYGWIN__)
364
char *pag = db->pagbuf;
366
register int smax = SPLTMAX;
370
* split the current page
372
(void) splpage(pag, new, db->hmask + 1);
374
* address of the new page
376
newp = (hash & db->hmask) | (db->hmask + 1);
377
debug(("newp: %ld\n", newp));
379
* write delay, read avoidence/cache shuffle:
380
* select the page for incoming pair: if key is to go to the new page,
381
* write out the previous one, and copy the new one over, thus making
382
* it the current page. If not, simply write the new page, and we are
383
* still looking at the page of interest. current page is not updated
384
* here, as sdbm_store will do so, after it inserts the incoming pair.
387
#if defined MSDOS || (defined _WIN32 && !defined __CYGWIN__)
389
* Fill hole with 0 if made it.
390
* (hole is NOT read as 0)
392
oldtail = lseek(db->pagf, 0L, SEEK_END);
393
memset(zer, 0, PBLKSIZ);
394
while (OFF_PAG(newp) > oldtail) {
395
if (lseek(db->pagf, 0L, SEEK_END) < 0 ||
396
write(db->pagf, zer, PBLKSIZ) < 0) {
404
if (hash & (db->hmask + 1)) {
405
if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
406
|| write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
409
(void) memcpy(pag, new, PBLKSIZ);
411
else if (lseek(db->pagf, OFF_PAG(newp), SEEK_SET) < 0
412
|| write(db->pagf, new, PBLKSIZ) < 0)
415
if (!setdbit(db, db->curbit))
418
* see if we have enough room now
420
if (fitpair(pag, need))
423
* try again... update curbit and hmask as getpage would have
424
* done. because of our update of the current page, we do not
425
* need to read in anything. BUT we have to write the current
426
* [deferred] page out, as the window of failure is too great.
428
db->curbit = 2 * db->curbit +
429
((hash & (db->hmask + 1)) ? 2 : 1);
430
db->hmask |= (db->hmask + 1);
432
if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
433
|| write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
438
* if we are here, this is real bad news. After SPLTMAX splits,
439
* we still cannot fit the key. say goodnight.
442
(void) write(2, "sdbm: cannot insert after SPLTMAX attempts.\n", 44);
449
* the following two routines will break if
450
* deletions aren't taken into account. (ndbm bug)
457
return errno = EINVAL, nullitem;
461
(void) memset(db->pagbuf, 0, PBLKSIZ);
462
if (lseek(db->pagf, OFF_PAG(0), SEEK_SET) < 0
463
|| read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
464
return ioerr(db), nullitem;
477
return errno = EINVAL, nullitem;
482
* all important binary trie traversal
495
while (dbit < db->maxbno && getdbit(db, dbit))
496
dbit = 2 * dbit + ((hash & ((long) 1 << hbit++)) ? 2 : 1);
498
debug(("dbit: %d...", dbit));
501
db->hmask = masks[hbit];
503
pagb = hash & db->hmask;
505
* see if the block we need is already in memory.
506
* note: this lookaside cache has about 10% hit rate.
508
if (pagb != db->pagbno) {
510
* note: here, we assume a "hole" is read as 0s.
511
* if not, must zero pagbuf first.
513
(void) memset(db->pagbuf, 0, PBLKSIZ);
515
if (lseek(db->pagf, OFF_PAG(pagb), SEEK_SET) < 0
516
|| read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
518
if (!chkpage(db->pagbuf)) {
523
debug(("pag read: %d\n", pagb));
539
if (dirb != db->dirbno) {
540
if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
541
|| read(db->dirf, db->dirbuf, DBLKSIZ) < 0)
545
debug(("dir read: %d\n", dirb));
548
return db->dirbuf[c % DBLKSIZ] & (1 << (dbit % BYTESIZ));
562
if (dirb != db->dirbno) {
563
if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
564
|| read(db->dirf, db->dirbuf, DBLKSIZ) < 0)
568
debug(("dir read: %d\n", dirb));
571
db->dirbuf[c % DBLKSIZ] |= (1 << (dbit % BYTESIZ));
573
if (dbit >= db->maxbno)
574
db->maxbno += (long) DBLKSIZ * BYTESIZ;
576
if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
577
|| write(db->dirf, db->dirbuf, DBLKSIZ) < 0)
584
* getnext - get the next key in the page, and if done with
585
* the page, try the next page in sequence
595
key = getnkey(db->pagbuf, db->keyptr);
596
if (key.dptr != NULL)
599
* we either run out, or there is nothing on this page..
600
* try the next one... If we lost our position on the
601
* file, we will have to seek.
604
if (db->pagbno != db->blkptr++)
605
if (lseek(db->pagf, OFF_PAG(db->blkptr), SEEK_SET) < 0)
607
db->pagbno = db->blkptr;
608
if (read(db->pagf, db->pagbuf, PBLKSIZ) <= 0)
610
if (!chkpage(db->pagbuf)) {
615
return ioerr(db), nullitem;
620
* sdbm - ndbm work-alike hashed database library
621
* based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
622
* author: oz@nexus.yorku.ca
623
* status: public domain.
625
* page-level routines
629
/*char pair_rcsid[] = "$Id: _sdbm.c 11708 2007-02-12 23:01:19Z shyouhei $";*/
633
/*#include <memory.h>*/
636
#define exhash(item) sdbm_hash((item).dptr, (item).dsize)
641
static int seepair proto((char *, int, char *, int));
645
* +------------------------------+
646
* ino | n | keyoff | datoff | keyoff |
647
* +------------+--------+--------+
648
* | datoff | - - - ----> |
649
* +--------+---------------------+
650
* | F R E E A R E A |
651
* +--------------+---------------+
652
* | <---- - - - | data |
653
* +--------+-----+----+----------+
654
* | key | data | key |
655
* +--------+----------+----------+
657
* calculating the offsets for free area: if the number
658
* of entries (ino[0]) is zero, the offset to the END of
659
* the free area is the block size. Otherwise, it is the
660
* nth (ino[ino[0]]) entry's offset.
671
register short *ino = (short *) pag;
673
off = ((n = GET_SHORT(ino,0)) > 0) ? GET_SHORT(ino,n) : PBLKSIZ;
674
free = off - (n + 1) * sizeof(short);
675
need += 2 * sizeof(short);
677
debug(("free %d need %d\n", free, need));
683
putpair(pag, key, val)
690
register short *ino = (short *) pag;
692
off = ((n = GET_SHORT(ino,0)) > 0) ? GET_SHORT(ino,n) : PBLKSIZ;
694
* enter the key first
698
(void) memcpy(pag + off, key.dptr, key.dsize);
699
PUT_SHORT(ino,n + 1,off);
705
(void) memcpy(pag + off, val.dptr, val.dsize);
706
PUT_SHORT(ino,n + 2,off);
710
PUT_SHORT(ino,0,GET_SHORT(ino,0) + 2);
721
register short *ino = (short *) pag;
723
if ((n = GET_SHORT(ino,0)) == 0)
726
if ((i = seepair(pag, n, key.dptr, key.dsize)) == 0)
729
val.dptr = pag + GET_SHORT(ino,i + 1);
730
val.dsize = GET_SHORT(ino,i) - GET_SHORT(ino,i + 1);
740
register short *ino = (short *) pag;
741
return GET_SHORT(ino,0) > 0 &&
742
seepair(pag, GET_SHORT(ino,0), key.dptr, key.dsize) > 0;
753
register short *ino = (short *) pag;
756
if (GET_SHORT(ino,0) == 0 || num > GET_SHORT(ino,0))
759
off = (num > 1) ? GET_SHORT(ino,num - 1) : PBLKSIZ;
761
key.dptr = pag + GET_SHORT(ino,num);
762
key.dsize = off - GET_SHORT(ino,num);
774
register short *ino = (short *) pag;
776
if ((n = GET_SHORT(ino,0)) == 0)
779
if ((i = seepair(pag, n, key.dptr, key.dsize)) == 0)
782
* found the key. if it is the last entry
783
* [i.e. i == n - 1] we just adjust the entry count.
784
* hard case: move all data down onto the deleted pair,
785
* shift offsets onto deleted offsets, and adjust them.
790
register char *dst = pag + (i == 1 ? PBLKSIZ : GET_SHORT(ino,i - 1));
791
register char *src = pag + GET_SHORT(ino,i + 1);
792
register int zoo = dst - src;
794
debug(("free-up %d ", zoo));
796
* shift data/keys down
798
m = GET_SHORT(ino,i + 1) - GET_SHORT(ino,n);
800
#define MOVB *--dst = *--src
803
register int loop = (m + 8 - 1) >> 3;
805
switch (m & (8 - 1)) {
808
case 6: MOVB; case 5: MOVB;
809
case 4: MOVB; case 3: MOVB;
810
case 2: MOVB; case 1: MOVB;
816
memmove(dst, src, m);
823
* adjust offset index up
826
PUT_SHORT(ino,i, GET_SHORT(ino,i + 2) + zoo);
830
PUT_SHORT(ino, 0, GET_SHORT(ino, 0) - 2);
835
* search for the key in the page.
836
* return offset index in the range 0 < i < n.
837
* return 0 if not found.
840
seepair(pag, n, key, siz)
847
register int off = PBLKSIZ;
848
register short *ino = (short *) pag;
850
for (i = 1; i < n; i += 2) {
851
if (siz == off - GET_SHORT(ino,i) &&
852
memcmp(key, pag + GET_SHORT(ino,i), siz) == 0)
854
off = GET_SHORT(ino,i + 1);
860
splpage(pag, new, sbit)
869
register int off = PBLKSIZ;
871
register short *ino = (short *) cur;
873
(void) memcpy(cur, pag, PBLKSIZ);
874
(void) memset(pag, 0, PBLKSIZ);
875
(void) memset(new, 0, PBLKSIZ);
877
n = GET_SHORT(ino,0);
878
for (ino++; n > 0; ino += 2) {
879
key.dptr = cur + GET_SHORT(ino,0);
880
key.dsize = off - GET_SHORT(ino,0);
881
val.dptr = cur + GET_SHORT(ino,1);
882
val.dsize = GET_SHORT(ino,0) - GET_SHORT(ino,1);
884
* select the page pointer (by looking at sbit) and insert
886
(void) putpair((exhash(key) & sbit) ? new : pag, key, val);
888
off = GET_SHORT(ino,1);
892
debug(("%d split %d/%d\n", ((short *) cur)[0] / 2,
893
((short *) new)[0] / 2,
894
((short *) pag)[0] / 2));
899
* number of entries should be something
900
* reasonable, and all offsets in the index should be in order.
901
* this could be made more rigorous.
909
register short *ino = (short *) pag;
911
if ((n = GET_SHORT(ino,0)) < 0 || n > PBLKSIZ / sizeof(short))
916
for (ino++; n > 0; ino += 2) {
917
if (GET_SHORT(ino,0) > off || GET_SHORT(ino,1) > off ||
918
GET_SHORT(ino,1) > GET_SHORT(ino,0))
920
off = GET_SHORT(ino,1);
929
* sdbm - ndbm work-alike hashed database library
930
* based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
931
* author: oz@nexus.yorku.ca
932
* status: public domain. keep it that way.
938
* polynomial conversion ignoring overflows
939
* [this seems to work remarkably well, in fact better
940
* then the ndbm hash function. Replace at your own risk]
949
register unsigned long n = 0;
953
#define HASHC n = *str++ + 65599 * n
956
register int loop = (len + 8 - 1) >> 3;
958
switch(len & (8 - 1)) {
960
HASHC; case 7: HASHC;
961
case 6: HASHC; case 5: HASHC;
962
case 4: HASHC; case 3: HASHC;
963
case 2: HASHC; case 1: HASHC;
970
n = ((*str++) & 255) + 65587L * n;