1
// @(#) $Revision: 4.1 $ $Source: /judy/test/manual/StringCompare.c $
2
//=======================================================================
3
// Author Douglas L. Baskins, Jan 2003.
4
// Permission to use this code is freely granted, provided that this
5
// statement is retained.
6
// email - dougbaskins .at, yahoo.com
7
//=======================================================================
9
//=======================================================================
11
// This program will time various ADTs that store and retrieve strings.
12
// Currently there are 7 ADTs implemented:
15
1) Hash - this is the fastest one when table size matches data size
16
2) JudyHS - 2nd in speed and very scaleable.
17
3) JLHash - this uses a JudyL() array instead of hash table and is
18
very scaleable because the hash table size is ~4 billion.
19
4) JudySL - ordered and requires null terminated strings.
20
5) Ternary - code borrowed from Mr Dobbs, perhaps 2000
21
6) Redblack - code borrowed from J. Zobel, April 2001.
22
7) Splay - code borrowed from J. Zobel, April 2001.
24
Note: Splay, Redblack and Ternary methods are not very fast, so they
25
have not been completed and made bug free. I.E. no Delete and Free
26
Array routines. Ternary is fastest retrieve of these three, but uses
27
an extraordinary amount of memory.
29
//=======================================================================
33
// cc -O StringCompare.c -lm -lJudy -o StringCompare
35
// cc -O -DCPUMHZ=1299 StringCompare.c -lm -lJudy -o StringCompare
38
1) Use '-DCPUMHZ=1299' in cc line if better clock resolution is desired
39
and it compiles successfully. The 1299 is the cpu MHz : 1298.916 from
40
cat /proc/cpuinfo in a Linux system.
42
2) -static will generally get better performance because memcmp(),
43
memcpy() routines are usually slower with shared librarys.
48
// StringCompare -A Hash <options> textfile
49
// StringCompare -A JudyHS <options> textfile
50
// StringCompare -A JLHash <options> textfile
51
// StringCompare -A JudySL <options> textfile
52
// StringCompare -A Ternary <options> textfile
53
// StringCompare -A Redblack <options> textfile
54
// StringCompare -A Splay <options> textfile
56
#include <stdlib.h> // malloc(3)
57
#include <unistd.h> // getopt(3)
58
#include <stdio.h> // printf(3)
59
#include <fcntl.h> // open(2)
60
#include <string.h> // memcmp(3), memcpy(3)
61
#include <errno.h> // errno(3)
62
#include <sys/mman.h> // mmap(2)
63
#include <sys/time.h> // gettimeofday(2)
64
#include <math.h> // pow(3)
65
#include <sys/utsname.h> // uname(2)
66
#include <Judy.h> // Judy arrays
68
//=======================================================================
69
// D e f i n e: T O T U R N O F F A S S E R T I O N S !!!!!!!!
70
//=======================================================================
72
#include <assert.h> // assert(3)
74
//=======================================================================
75
// G L O B A L D A T A
76
//=======================================================================
78
int foolflag = 0; // fool compiler from optimizing
79
static Word_t gStored = 0; // number of strings inserted
80
static Word_t gChainln = 0; // links traversed during RETRIVE
82
static Word_t PtsPdec = 40; // default measurement points per decade
84
#define INFSTRGS 1000000000 // 1 billion strings is infinity
86
static Word_t nStrg = INFSTRGS; // infinity -- measure all strings
87
static Word_t TValues = 100000; // max measure points for RETRIVE tests
88
static int pFlag = 0; // pre-fault hash table pages into RAM
89
static int rFlag = 0; // do not randomize input file
90
static int aFlag = 0; // word align string buffers
91
static int DFlag = 0; // do the delete measurement
92
static int CFlag = 0; // build sequential Get buffers
93
static Word_t aCount = 0; // Count of missaligned string buffers
95
// define the maximum length of a string allowed
96
#define MAXSTRLEN (100000)
97
static int MLength = MAXSTRLEN;
98
static Word_t HTblsz; // 1M default hash table size
99
static int fileidx; // argv[fileidx] == file string
101
// for saving input string data
102
typedef struct STRING_
109
static Pdt_t PdtS_ = NULL; // memory for Cache access Gets
110
static uint8_t *Strbuf_ = NULL;
111
static Word_t Strsiz_ = 0;
113
// Roundup BYTES to an even number of words
116
On Linux 2.6.3-4mdkenterprise (Mandrake 10.0 Community) printing (even
117
to a file) makes the timings inaccurate. So, use -L2 or greater to
118
average (actually save min times) and print results after all tests are
121
#define Printf if (Pass == 0) printf
123
#define ROUNDUPWORD(BYTES) (((BYTES) + sizeof(Word_t) - 1) & (-sizeof(Word_t)))
124
#define BYTES2WORDS(BYTES) (((BYTES) + sizeof(Word_t) - 1) / (sizeof(Word_t)))
126
//=======================================================================
127
// T I M I N G M A C R O S
128
//=======================================================================
130
static double DeltaUSec; // Global for remembering delta times
132
// Some operating systems have get_cycles() in /usr/include/asm/timex.h
136
// For a 1.34 nS clock cycle processor (750Mhz)
138
#define CPUSPEED (1.0 / (CPUMHZ))
140
#include <asm/timex.h>
142
#define TIMER_vars(T) cycles_t __TVBeg_##T
144
#define STARTTm(T) __TVBeg_##T = get_cycles()
146
#define ENDTm(D,T) { (D) = (double)(get_cycles() - __TVBeg_##T) * CPUSPEED; }
150
#define TIMER_vars(T) struct timeval __TVBeg_##T, __TVEnd_##T
152
#define STARTTm(T) gettimeofday(&__TVBeg_##T, NULL)
156
gettimeofday(&__TVEnd_##T, NULL); \
157
(D) = (double)(__TVEnd_##T.tv_sec - __TVBeg_##T.tv_sec) * 1E6 + \
158
((double)(__TVEnd_##T.tv_usec - __TVBeg_##T.tv_usec)); \
163
//=======================================================================
164
// M E M O R Y S I Z E M A C R O S
165
//=======================================================================
167
// use mallinfo() instead of sbrk() for memory usage measurements
168
// this should include the RAM that was mmap()ed in malloc()
170
static Word_t DeltaMem; // for remembering
172
// Some mallocs have mallinfo()
173
// #define MALLINFO 1
176
#include <malloc.h> // mallinfo()
178
static struct mallinfo malStart;
180
#define STARTmem malStart = mallinfo()
181
#define ENDmem(DELTAMEM) \
183
struct mallinfo malEnd = mallinfo(); \
184
/* strange little dance from signed to unsigned to double */ \
185
unsigned int _un_int = malEnd.arena - malStart.arena; \
186
(DELTAMEM) = (double)_un_int; /* to double */ \
190
// this usually works for machines with less than 1-2Gb RAM.
191
// (it does NOT include memory ACQUIRED by mmap())
193
static char *malStart;
195
#define STARTmem (malStart = (char *)sbrk(0))
196
#define ENDmem(DELTAMEM) \
198
char *malEnd = (char *)sbrk(0); \
199
(DELTAMEM) = (double)(malEnd - malStart); \
201
#endif // NO MALLINFO
203
//=======================================================================
204
// F I L E O P E N and M A L L O C F A I L M A C R O S
205
//=======================================================================
209
printf("\n !! OOps - Open file error \"%s\": %s (errno = %d)\n", \
210
argv[fileidx], strerror(errno), errno); \
211
fprintf(stderr, " OOps - Open file error \"%s\": %s (errno = %d)\n",\
212
argv[fileidx], strerror(errno), errno); \
216
#define MALLOCERROR \
218
printf("\n !! OOps - malloc failed at Line = %d\n", __LINE__); \
219
fprintf(stderr, " OOps - malloc failed at Line = %d\n", __LINE__); \
223
//=======================================================================
224
// This alternate form of JudyMalloc() is used to keep track how much ram is
225
// used on some of the below ADT's
226
//=======================================================================
228
// JUDY INCLUDE FILES
231
// ****************************************************************************
232
// J U D Y M A L L O C
234
// Allocate RAM. This is the single location in Judy code that calls
235
// malloc(3C). Note: JPM accounting occurs at a higher level.
237
static Word_t TotalJudyMalloc = 0;
240
JudyMalloc(Word_t Words)
244
Addr = (Word_t)malloc(Words * sizeof(Word_t));
247
TotalJudyMalloc += Words;
253
// ****************************************************************************
257
JudyFree(void *PWord, Word_t Words)
260
assert((long)(TotalJudyMalloc - Words) >= 0L);
262
TotalJudyMalloc -= Words;
266
// ****************************************************************************
267
// J U D Y M A L L O C
269
// Higher-level "wrapper" for allocating objects that need not be in RAM,
270
// although at this time they are in fact only in RAM. Later we hope that some
271
// entire subtrees (at a JPM or branch) can be "virtual", so their allocations
272
// and frees should go through this level.
275
JudyMallocVirtual(Word_t Words)
277
return (JudyMalloc(Words));
279
} // JudyMallocVirtual()
281
// ****************************************************************************
285
JudyFreeVirtual(void *PWord, Word_t Words)
287
JudyFree(PWord, Words);
289
} // JudyFreeVirtual()
291
//=======================================================================
292
// Routine to get next size of Indexes
293
//=======================================================================
296
NextNumb(Word_t *PNumber, // pointer to returned next number
297
double *PDNumb, // Temp double of above
298
double DMult, // Multiplier
299
Word_t MaxNumb) // Max number to return
302
double PrevPDNumb = *PDNumb;
305
// Calc next number >= 1.0 beyond previous
309
DDiff = *PDNumb - PrevPDNumb;
313
// Return it in integer format
315
*PNumber += (Word_t)(DDiff + 0.5);
317
*PNumber = (Word_t)(*PDNumb + 0.5);
319
// Verify it did not exceed max number
320
if (*PNumber >= MaxNumb)
322
*PNumber = MaxNumb; // it did, so return max
323
return (1); // flag it
325
return (0); // more available
328
//=======================================================================
329
// M E A S U R E M E N T S T R U C T U R E
330
//=======================================================================
332
typedef struct _MEASUREMENTS_STRUCT *Pms_t;
333
typedef struct _MEASUREMENTS_STRUCT
335
Word_t ms_delta; // number of points in current group
336
double ms_Bytes; // average allocated memory/per string
337
double ms_mininsert; // Min Retrive number
338
double ms_minretrive; // Min Retrive number
341
static Pms_t Pms; // array of MEASUREMENTS_STRUCT
358
//=======================================================================
359
// R a n d o m i z e i n p u t s t r i n g s
360
//=======================================================================
363
Randomize(Pdt_t Pstrstr, Word_t Len)
367
// swap the "random" index with the sequential one
368
for (ii = 1; ii < Len; ii++)
373
// get "random" index
374
swapii = (Word_t)rand() % Len;
377
dttemp = Pstrstr[ii];
378
Pstrstr[ii] = Pstrstr[swapii];
379
Pstrstr[swapii] = dttemp;
383
//=======================================================================
384
// B u i l d s e q u e n c i a l s t r i n g b u f f e r
385
//=======================================================================
388
BuildSeqBuf(Pdt_t Pstrstr, Word_t Len)
390
Word_t SumStrings = 0;
395
assert(Len <= TValues);
397
// calculate how much memory needed for strings
398
for (ii = 0; ii < Len; ii++)
400
Strlen = Pstrstr[ii].dt_strlen;
402
SumStrings += ROUNDUPWORD(Strlen + 1);
404
SumStrings += Strlen + 1;
406
// check if old string buffer is big enough
407
if (SumStrings > Strsiz_)
412
SumStrings += SumStrings / 5; // bump 20%
414
Strbuf_ = (uint8_t *) malloc(SumStrings);
417
Strsiz_ = SumStrings;
419
for (ii = 0, string = Strbuf_; ii < Len; ii++)
421
Strlen = Pstrstr[ii].dt_strlen;
423
PdtS_[ii].dt_strlen = Strlen;
424
PdtS_[ii].dt_string = string;
426
memcpy(string, Pstrstr[ii].dt_string, Strlen + 1);
429
string += ROUNDUPWORD(Strlen + 1);
431
string += Strlen + 1;
436
//=======================================================================
437
// H A S H M E T H O D S T R U C T U R E S
438
//=======================================================================
440
// These structures are used in Hash() and JLHash() ADTs
442
// for storing length of string
444
// Hash chain structure (varible length depending on string)
445
// static part of the length
446
#define HSTRUCTOVD (sizeof(hrec_t) - sizeof(int))
447
typedef struct HASHREC_ *Phrec_t;
448
typedef struct HASHREC_
450
Phrec_t hr_Next; // collision chain link pointer
451
Word_t hr_Value; // Data associated with string
452
int hr_Strlen; // length of string 2 billion max
453
uint8_t hr_String[sizeof(int)]; // string is allocated with struct
457
// hash head structure to keep hash array information
458
typedef struct HASHINFO_
460
Pvoid_t hi_Htbl; // Hash table
461
Word_t hi_tblsize; // Hash table size (Words)
462
Word_t hi_TotalWords; // Hash array total words
463
Word_t hi_Pop1; // Hash array total population
465
} hinfo_t, *Phinfo_t;
467
// size in words of the header structure
468
#define HASHHEADSZ (sizeof(hinfo_t) / sizeof(Word_t))
470
//=======================================================================
471
// H A S H A L G O R I T H M
472
//=======================================================================
474
// For CPUs with a slow mod (%) use table size a power of 2. A test is
475
// made to see if the SIZE is a power of 2, and if so an .AND.(&) is used
476
// instead of a .MOD.(%) to trim the hash return size. Note: a SIZE == 0,
477
// results in no trimming of hash return size.
479
#define HASHSTR(STRING,LENGTH,SIZE) \
480
((SIZE) == ((SIZE) & -(SIZE))) ? \
481
(HashStr(STRING, LENGTH) & ((SIZE) -1)) : \
482
(HashStr(STRING, LENGTH) % (SIZE))
484
// String hash function. Hash string to a unsigned int (uint32_t) This
485
// one needs a fast 32 bit mpy, which is often very slow on older(RISC)
486
// machines. If you are sure you will not over populate the hash table,
487
// then a poorer/faster hash algorithm should be used. Replace with your
488
// own, milage may vary. This program measures the speed, whether used
492
HashStr(void *Str, Word_t Len)
495
uint32_t hashv = Len;
496
uint8_t *k = (uint8_t *) Str;
500
hashv = (A * hashv) + *k++;
506
//=======================================================================
507
// S T O R E and R E T R I V E R O U T I N E S
508
//=======================================================================
510
//=======================================================================
511
// H A S H M E T H O D U S I N G J U D Y L A S H A S H T A B L E
512
//=======================================================================
515
JLHashGet(Pvoid_t JLHash, uint8_t * String, Word_t Strlen)
517
Phinfo_t PHash = (Phinfo_t) JLHash;
518
Phrec_t Phrec, *PPhrec;
524
// get hash value, if mod(%) is slow (in some CPUs), make it a power of 2
525
hval = HASHSTR(String, Strlen, PHash->hi_tblsize);
527
JLG(PPhrec, PHash->hi_Htbl, hval); // use JudyL to get &pointer
530
return (NULL); // no table entry
532
// search for matching string
533
for (Phrec = *PPhrec; Phrec != NULL; Phrec = Phrec->hr_Next)
535
gChainln++; // Hash chain length
536
if (Phrec->hr_Strlen == Strlen) // length match?
538
if (memcmp(Phrec->hr_String, String, Strlen) == 0)
539
return (&(Phrec->hr_Value)); // match! pointer to Value
545
// Return pointer to struct hrec_t associated with string
548
JLHashIns(PPvoid_t PPHash, uint8_t * String, Word_t Strlen, Word_t TblSize)
550
Phrec_t Phrec, *PPhrec;
555
PHash = (Phinfo_t) * PPHash; // core-dump if calling error
556
if (PHash == NULL) // if hash table not allocated
558
// allocate the header
559
PHash = (Phinfo_t) JudyMalloc(HASHHEADSZ);
563
// Initialize the header struct
564
PHash->hi_tblsize = TblSize;
565
PHash->hi_TotalWords = HASHHEADSZ;
566
PHash->hi_Pop1 = 0; // none yet
567
PHash->hi_Htbl = NULL;
568
*PPHash = (Pvoid_t)PHash; // return header to caller
570
// get hash value, if mod(%) is slow (in some CPUs), make it a power of 2
571
hval = HASHSTR(String, Strlen, PHash->hi_tblsize);
573
// get pointer to hash table entry
574
JLI(PPhrec, PHash->hi_Htbl, hval); // JLI will exit if out of memory
576
// search for matching string
577
for (Phrec = *PPhrec; Phrec != NULL; Phrec = Phrec->hr_Next)
579
if (Phrec->hr_Strlen == Strlen) // string length match?
581
if (memcmp(Phrec->hr_String, String, Strlen) == 0)
583
return (&(Phrec->hr_Value)); // match! pointer to Value
588
// String match not found, so do an insert
590
Len = BYTES2WORDS(Strlen + HSTRUCTOVD);
591
Phrec = (Phrec_t) JudyMalloc(Len); // get memory for storing string
595
PHash->hi_TotalWords += Len; // keep track of total mallocs
597
Phrec->hr_Strlen = Strlen; // set string length
598
memcpy(Phrec->hr_String, String, Strlen);
600
Phrec->hr_Next = *PPhrec; // pointer to synonym
601
*PPhrec = Phrec; // place new struct in front of list
602
(PHash->hi_Pop1)++; // add one to population
603
Phrec->hr_Value = (Word_t)0; // zero the associated Value
605
return (&(Phrec->hr_Value)); // return pointer to Value
608
// Return 1 if successful, else 0
611
JLHashDel(PPvoid_t PPHash, uint8_t * String, Word_t Strlen)
613
Phrec_t Phrec, *PPhrec, *PPhrec1;
617
// avoid an core dump here
620
PHash = (Phinfo_t) (*PPHash); // get header
622
return (0); // not found
624
// get hash value, if mod(%) is slow (in some CPUs), make it a power of 2
625
hval = HASHSTR(String, Strlen, PHash->hi_tblsize);
627
// get pointer hash table entry
628
JLG(PPhrec, PHash->hi_Htbl, hval);
630
return (0); // hash entry not found
632
PPhrec1 = PPhrec; // save head hash entry ^
634
// search for matching string
635
for (Phrec = *PPhrec; Phrec != NULL; Phrec = Phrec->hr_Next)
637
if (Phrec->hr_Strlen == Strlen) // string length match?
639
if (memcmp(Phrec->hr_String, String, Strlen) == 0) // string match?
644
*PPhrec = Phrec->hr_Next; // put next in previous
646
Len = BYTES2WORDS(Strlen + HSTRUCTOVD);
647
JudyFree(Phrec, Len);
648
PHash->hi_TotalWords -= Len; // ram usage accounting
650
(PHash->hi_Pop1)--; // Decrement population
652
if (*PPhrec1 == NULL) // no chain left
654
// delete hash table entry
655
JLD(Rc, PHash->hi_Htbl, hval);
658
// If last element, free everything
659
if (PHash->hi_Pop1 == 0)
661
assert(PHash->hi_TotalWords == HASHHEADSZ);
663
JudyFree(PHash, HASHHEADSZ); // the header table
664
*PPHash = NULL; // from caller
666
return (1); // successful
669
PPhrec = &(Phrec->hr_Next); // previous = current
671
return (0); // string not found
674
// Free the whole JLHash structure
677
JLHashFreeArray(PPvoid_t PPHash)
679
Phrec_t Phrec, *PPhrec;
681
Word_t DeletedWords, Bytes;
682
Word_t Index = 0; // for First, Next loop
684
// avoid an core dump here
687
PHash = (Phinfo_t) (*PPHash); // get header
689
return ((Word_t)0); // not found
691
// get bytes of memory usage in (JudyL) Hash table
692
JLMU(Bytes, PHash->hi_Htbl);
694
DeletedWords = HASHHEADSZ; // start with header
696
// Get 1st table entry in Hash table
697
JLF(PPhrec, PHash->hi_Htbl, Index);
699
// found an entry in hash table?
700
while (PPhrec != NULL)
705
// walk the synonym linked list
706
while (Phrec != NULL)
709
Phrec_t Phrecfree = Phrec;
711
// number of words to free -- struct hrec_t
712
Len = BYTES2WORDS(Phrec->hr_Strlen + HSTRUCTOVD);
714
// sum total length of mallocs in words
717
(PHash->hi_Pop1)--; // Decrement population
719
// get pointer to next synonym on list
720
Phrec = Phrec->hr_Next;
722
// free the struct hrec_t
723
JudyFree(Phrecfree, Len);
725
// delete hash table entry
726
JLD(Rc, PHash->hi_Htbl, Index);
729
// get next hash table entry
730
JLN(PPhrec, PHash->hi_Htbl, Index);
732
assert(PHash->hi_TotalWords == DeletedWords);
733
assert(PHash->hi_Pop1 == 0);
735
JudyFree(PHash, HASHHEADSZ); // the header table
736
*PPHash = NULL; // set pointer null
738
// return total bytes freed
739
return ((DeletedWords * sizeof(Word_t)) + Bytes);
742
//=======================================================================
743
// H A S H M E T H O D
744
//=======================================================================
747
HashGet(Phinfo_t PHash, uint8_t * String, Word_t Strlen)
749
Phrec_t Phrec, *Htbl;
752
// avoid an core dump here
756
// get hash value, if mod(%) is slow (in some CPUs), make it a power of 2
757
hval = HASHSTR(String, Strlen, PHash->hi_tblsize);
759
// type Hash table pointer
760
Htbl = (Phrec_t *) PHash->hi_Htbl;
762
// search for matching string
763
for (Phrec = Htbl[hval]; Phrec != NULL; Phrec = Phrec->hr_Next)
765
gChainln++; // Hash chain length
766
if (Phrec->hr_Strlen == Strlen) // length match?
768
if (memcmp(Phrec->hr_String, String, Strlen) == 0)
769
return (&(Phrec->hr_Value)); // match! pointer to Value
775
// Return pointer to struct hrec_t associated with string
778
HashIns(Phinfo_t * PPHash, uint8_t * String, Word_t Strlen, Word_t TblSize)
780
Phrec_t Phrec, *Htbl;
785
PHash = *PPHash; // core-dump if calling error
786
if (PHash == NULL) // if hash table not allocated
788
// allocate the header
789
PHash = (Phinfo_t) JudyMalloc(HASHHEADSZ);
793
// allocate the hash table
794
PHash->hi_Htbl = (Pvoid_t)JudyMalloc(TblSize);
795
if (PHash->hi_Htbl == NULL)
798
// you cant beat this with modern compilers/librarys
799
memset(PHash->hi_Htbl, 0, TblSize * sizeof(Word_t));
801
// Initialize the header struct
802
PHash->hi_tblsize = TblSize;
803
PHash->hi_TotalWords = TblSize + HASHHEADSZ;
804
PHash->hi_Pop1 = 0; // none yet
805
*PPHash = PHash; // return header to caller
807
// get hash value, if mod(%) is slow (in some CPUs), make it a power of 2
808
hval = HASHSTR(String, Strlen, TblSize);
810
// type Hash table pointer
811
Htbl = (Phrec_t *) PHash->hi_Htbl;
813
// search for matching string in hash table entry
814
for (Phrec = Htbl[hval]; Phrec != NULL; Phrec = Phrec->hr_Next)
816
if (Phrec->hr_Strlen == Strlen) // string length match?
818
if (memcmp(Phrec->hr_String, String, Strlen) == 0) // string match?
820
return (&(Phrec->hr_Value)); // match! pointer to Value
824
// string not found, so do an insert
825
Len = BYTES2WORDS(Strlen + HSTRUCTOVD);
826
Phrec = (Phrec_t) JudyMalloc(Len);
829
PHash->hi_TotalWords += Len; // keep track of total mallocs
831
Phrec->hr_Strlen = Strlen; // set string length
832
memcpy(Phrec->hr_String, String, Strlen); // copy it
834
// place new allocation first in chain
835
Phrec->hr_Next = Htbl[hval];
838
(PHash->hi_Pop1)++; // add one to population
839
Phrec->hr_Value = (Word_t)0; // zero the associated Value
841
return (&(Phrec->hr_Value)); // return pointer to Value
844
// Delete entry in hash array, return 1 if successful, else 0
847
HashDel(Phinfo_t * PPHash, uint8_t * String, Word_t Strlen)
849
Phrec_t Phrec, *PPhrec, *Htbl;
853
// avoid an core dump here
856
PHash = *PPHash; // get header
858
return (0); // not found
860
// get hash value, if mod(%) is slow (in some CPUs), make it a power of 2
861
hval = HASHSTR(String, Strlen, PHash->hi_tblsize);
863
// type Hash table pointer
864
Htbl = (Phrec_t *) PHash->hi_Htbl;
866
// get pointer hash table entry
867
PPhrec = &Htbl[hval];
869
// search for matching string
870
for (Phrec = *PPhrec; Phrec != NULL; Phrec = Phrec->hr_Next)
872
if (Phrec->hr_Strlen == Strlen) // length match?
874
if (memcmp(Phrec->hr_String, String, Strlen) == 0)
878
// put next hrec_t in previous hrec_t
879
*PPhrec = Phrec->hr_Next;
881
Len = BYTES2WORDS(Strlen + HSTRUCTOVD);
882
JudyFree(Phrec, Len);
883
PHash->hi_TotalWords -= Len;
884
(PHash->hi_Pop1)--; // Decrement population
886
// If last element, free everything
887
if (PHash->hi_Pop1 == 0)
889
assert(PHash->hi_TotalWords ==
890
(HASHHEADSZ + PHash->hi_tblsize));
892
JudyFree(Htbl, PHash->hi_tblsize); // hash table
893
JudyFree(PHash, HASHHEADSZ); // header struct
894
*PPHash = NULL; // from caller
896
return (1); // successful
899
PPhrec = &(Phrec->hr_Next); // previous = current
901
return (0); // not found
905
HashFreeArray(Phinfo_t * PPHash)
908
Phrec_t Phrec, *Htbl;
912
// avoid an core dump here
915
PHash = (Phinfo_t) (*PPHash); // get header
919
// start accumulator of deleted memory
920
DeletedWords = HASHHEADSZ + PHash->hi_tblsize;
922
// type Hash table pointer
923
Htbl = (Phrec_t *) PHash->hi_Htbl;
925
// walk thru all table entrys
926
for (ii = 0; ii < PHash->hi_tblsize; ii++)
928
Phrec = Htbl[ii]; // next hash table entry
930
while (Phrec != NULL) // walk the synonym linked list
935
// get pointer to next synonym on list
937
Phrec = Phrec->hr_Next;
939
(PHash->hi_Pop1)--; // Decrement population
941
// number of words to free -- struct hrec_t
942
Len = BYTES2WORDS(Phrecfree->hr_Strlen + HSTRUCTOVD);
943
DeletedWords += Len; // sum words freed
945
// free the struct hrec_t
946
JudyFree(Phrecfree, Len);
950
// and free the hash table
951
JudyFree(Htbl, PHash->hi_tblsize);
952
assert(PHash->hi_TotalWords == DeletedWords);
953
assert(PHash->hi_Pop1 == 0);
955
JudyFree(PHash, HASHHEADSZ); // the header table
956
*PPHash = NULL; // set pointer null
958
// return total bytes freed
959
return (DeletedWords * sizeof(Word_t));
962
//=======================================================================
963
// S P L A Y M E T H O D
964
//=======================================================================
966
/* Author J. Zobel, April 2001.
967
Permission to use this code is freely granted, provided that this
968
statement is retained. */
972
typedef struct spwordrec
975
struct spwordrec *left, *right;
976
struct spwordrec *par;
979
typedef struct spansrec
981
struct spwordrec *root;
982
struct spwordrec *ans;
985
SPANSREC spans = { 0 };
987
#define ONELEVEL(PAR,CURR,DIR,RID) \
989
PAR->DIR = CURR->RID; \
991
PAR->DIR->PAR = PAR; \
997
#define ZIGZIG(GPAR,PAR,CURR,DIR,RID) \
999
CURR->PAR = GPAR->PAR; \
1000
if (CURR->PAR != NULL) \
1002
if (CURR->PAR->DIR == GPAR) \
1003
CURR->PAR->DIR = CURR; \
1005
CURR->PAR->RID = CURR; \
1007
GPAR->DIR = PAR->RID; \
1008
if (GPAR->DIR != NULL) \
1009
GPAR->DIR->PAR = GPAR; \
1010
PAR->DIR = CURR->RID; \
1011
if (CURR->RID != NULL) \
1012
CURR->RID->PAR = PAR; \
1019
#define ZIGZAG(GPAR,PAR,CURR,DIR,RID) \
1021
CURR->PAR = GPAR->PAR; \
1022
if (CURR->PAR != NULL) \
1024
if (CURR->PAR->DIR == GPAR) \
1025
CURR->PAR->DIR = CURR; \
1027
CURR->PAR->RID = CURR; \
1029
PAR->RID = CURR->DIR; \
1030
if (PAR->RID != NULL) \
1031
PAR->RID->PAR = PAR; \
1032
GPAR->DIR = CURR->RID; \
1033
if (GPAR->DIR != NULL) \
1034
GPAR->DIR->PAR = GPAR; \
1041
int scount = ROTATEFAC;
1043
/* Create a node to hold a word */
1046
spwcreate(char *word, SPTREEREC * par)
1050
tmp = (SPTREEREC *) malloc(sizeof(SPTREEREC));
1051
tmp->word = (char *)malloc(strlen(word) + 1);
1052
strcpy(tmp->word, word);
1053
tmp->left = tmp->right = NULL;
1057
gStored++; // count stored
1062
/* Search for word in a splay tree. If word is found, bring it to
1063
root, possibly intermittently. Structure ans is used to pass
1064
in the root, and to pass back both the new root (which may or
1065
may not be changed) and the looked-for record. */
1068
splaysearch(SPANSREC * ans, char *word)
1070
SPTREEREC *curr = ans->root, *par, *gpar;
1075
if (ans->root == NULL)
1080
while (curr != NULL && (val = strcmp(word, curr->word)) != 0)
1090
if (curr == ans->root)
1095
if (scount <= 0 && curr != NULL) /* Move node towards root */
1099
while ((par = curr->par) != NULL)
1101
if (par->left == curr)
1103
if ((gpar = par->par) == NULL)
1105
ONELEVEL(par, curr, left, right);
1107
else if (gpar->left == par)
1109
ZIGZIG(gpar, par, curr, left, right);
1113
ZIGZAG(gpar, par, curr, right, left);
1118
if ((gpar = par->par) == NULL)
1120
ONELEVEL(par, curr, right, left);
1122
else if (gpar->left == par)
1124
ZIGZAG(gpar, par, curr, left, right);
1128
ZIGZIG(gpar, par, curr, right, left);
1138
/* Insert word into a splay tree. If word is already present, bring it to
1139
root, possibly intermittently. Structure ans is used to pass
1140
in the root, and to pass back both the new root (which may or
1141
may not be changed) and the looked-for record. */
1144
splayinsert(SPANSREC * ans, char *word)
1146
SPTREEREC *curr = ans->root, *par, *gpar, *prev = NULL, *spwcreate();
1151
if (ans->root == NULL)
1153
ans->ans = ans->root = spwcreate(word, NULL);
1157
while (curr != NULL && (val = strcmp(word, curr->word)) != 0)
1169
curr = prev->right = spwcreate(word, prev);
1171
curr = prev->left = spwcreate(word, prev);
1176
if (scount <= 0) /* Move node towards root */
1180
while ((par = curr->par) != NULL)
1182
if (par->left == curr)
1184
if ((gpar = par->par) == NULL)
1186
ONELEVEL(par, curr, left, right);
1188
else if (gpar->left == par)
1190
ZIGZIG(gpar, par, curr, left, right);
1194
ZIGZAG(gpar, par, curr, right, left);
1199
if ((gpar = par->par) == NULL)
1201
ONELEVEL(par, curr, right, left);
1203
else if (gpar->left == par)
1205
ZIGZAG(gpar, par, curr, left, right);
1209
ZIGZIG(gpar, par, curr, right, left);
1218
//=======================================================================
1219
// R E D B L A C K M E T H O D
1220
//=======================================================================
1222
/* Author J. Zobel, April 2001.
1223
Permission to use this code is freely granted, provided that this
1224
statement is retained. */
1226
typedef struct rbwordrec
1229
struct rbwordrec *left, *right;
1230
struct rbwordrec *par;
1234
typedef struct rbansrec
1236
struct rbwordrec *root;
1237
struct rbwordrec *ans;
1240
BRANSREC rbans = { 0 };
1245
/* Find word in a redblack tree */
1248
redblacksearch(BRANSREC * ans, char *word)
1250
RBTREEREC *curr = ans->root;
1253
if (ans->root != NULL)
1255
while (curr != NULL && (val = strcmp(word, curr->word)) != 0)
1269
/* Rotate the right child of par upwards */
1271
/* Could be written as a macro, but not really necessary
1272
as it is only called on insertion */
1275
leftrotate(BRANSREC * ans, RBTREEREC * par)
1277
RBTREEREC *curr, *gpar;
1279
if ((curr = par->right) != NULL)
1281
par->right = curr->left;
1282
if (curr->left != NULL)
1283
curr->left->par = par;
1284
curr->par = par->par;
1285
if ((gpar = par->par) == NULL)
1289
if (par == gpar->left)
1299
/* Rotate the left child of par upwards */
1301
rightrotate(BRANSREC * ans, RBTREEREC * par)
1303
RBTREEREC *curr, *gpar;
1305
if ((curr = par->left) != NULL)
1307
par->left = curr->right;
1308
if (curr->right != NULL)
1309
curr->right->par = par;
1310
curr->par = par->par;
1311
if ((gpar = par->par) == NULL)
1315
if (par == gpar->left)
1325
/* Create a node to hold a word */
1328
rbwcreate(char *word, RBTREEREC * par)
1332
tmp = (RBTREEREC *) malloc(sizeof(RBTREEREC));
1333
tmp->word = (char *)malloc(strlen(word) + 1);
1334
strcpy(tmp->word, word);
1335
tmp->left = tmp->right = NULL;
1339
gStored++; // count stored
1344
/* Insert word into a redblack tree */
1347
redblackinsert(BRANSREC * ans, char *word)
1349
RBTREEREC *curr = ans->root, *par, *gpar, *prev = NULL, *rbwcreate();
1352
if (ans->root == NULL)
1354
ans->ans = ans->root = rbwcreate(word, NULL);
1357
while (curr != NULL && (val = strcmp(word, curr->word)) != 0)
1369
/* Insert a new node, rotate up if necessary */
1372
curr = prev->right = rbwcreate(word, prev);
1374
curr = prev->left = rbwcreate(word, prev);
1377
while ((par = curr->par) != NULL
1378
&& (gpar = par->par) != NULL && curr->par->colour == RED)
1380
if (par == gpar->left)
1382
if (gpar->right != NULL && gpar->right->colour == RED)
1384
par->colour = BLACK;
1385
gpar->right->colour = BLACK;
1391
if (curr == par->right)
1394
leftrotate(ans, curr);
1397
par->colour = BLACK;
1398
if ((gpar = par->par) != NULL)
1401
rightrotate(ans, gpar);
1407
if (gpar->left != NULL && gpar->left->colour == RED)
1409
par->colour = BLACK;
1410
gpar->left->colour = BLACK;
1416
if (curr == par->left)
1419
rightrotate(ans, curr);
1422
par->colour = BLACK;
1423
if ((gpar = par->par) != NULL)
1426
leftrotate(ans, gpar);
1431
if (curr->par == NULL)
1433
ans->root->colour = BLACK;
1439
//=======================================================================
1440
// T E R N A R Y M E T H O D
1441
//=======================================================================
1443
typedef struct tnode *Tptr;
1444
typedef struct tnode
1447
Tptr lokid, eqkid, hikid;
1452
TernaryIns(Tptr * p, uint8_t * s, int Len)
1457
while ((pp = *p) != NULL)
1459
if ((d = *s - pp->splitchar) == 0)
1463
printf("Oops duplicate Ternary string %s\n", instr);
1475
*p = (Tptr) malloc(sizeof(Tnode));
1478
pp->lokid = pp->eqkid = pp->hikid = 0;
1481
pp->eqkid = (Tptr) instr;
1482
gStored++; // number of strings stored
1490
TernaryGet(Tptr p, uint8_t * s, int Len)
1494
if (*s < p->splitchar)
1496
else if (*s == p->splitchar)
1508
//=======================================================================
1509
// M E A S U R E A D T S P E E D and M E M O R Y U S A G E
1510
//=======================================================================
1512
//Word_t TotalJudyMalloc;
1514
#define GETSTRING(PCurStr, Strlen)
1517
main(int argc, char *argv[])
1519
TIMER_vars(tm); // declare timer variables
1520
FILE *fid; // to read file.
1521
int Chr; // char read from fgetc
1522
Pdt_t Pdt, Pdts; // array of lengths and pointers to str
1523
uint8_t *PCurStr; // Current string pointer
1524
Word_t LineCnt; // line counter
1525
int Strlen; // = strlen();
1526
Word_t StrTot; // Total len of strings
1527
Word_t StrNumb; // current line number
1528
Word_t ReadLin; // strings to read
1529
double Mult; // multiplier between groups
1530
Word_t Groups; // number of measurement groups
1531
Word_t grp; // current group
1532
Pvoid_t JudySL = NULL; // JudySL root pointer
1533
Pvoid_t JudyHS = NULL; // JudyHS root pointer
1534
Pvoid_t JLHash = NULL; // JLHash root pointer
1535
Phinfo_t HRoot = NULL; // hash table root pointer
1536
Tptr Ternary = { 0 }; // Ternary struct root pointer
1538
Method_t Method = M_invalid; // the method to measure
1539
Word_t lines = 0; // to shut up compiler
1540
Word_t Bytes = 0; // Bytes deallocated from FreeArray
1541
Word_t StringMemory; // Bytes allocated for input data
1546
extern char *optarg;
1550
setbuf(stdout, NULL);
1552
//============================================================
1553
// PARSE INPUT PARAMETERS
1554
//============================================================
1556
while ((Opt = getopt(argc, argv, "A:H:L:n:T:P:M:praDC")) != -1)
1561
if (Method != M_invalid)
1563
printf("\nOnly ONE '-A<ADT>' is allowed!!!!!!\n");
1567
if (strcmp(optarg, "Print") == 0)
1569
if (strcmp(optarg, "Hash") == 0)
1572
HTblsz = 1LU << 20; // default 1.0+ million
1574
if (strcmp(optarg, "JLHash") == 0)
1577
HTblsz = 0; // max 2^32
1579
if (strcmp(optarg, "JudySL") == 0)
1581
if (strcmp(optarg, "Splay") == 0)
1583
if (strcmp(optarg, "Redblack") == 0)
1584
Method = M_Redblack;
1585
if (strcmp(optarg, "JudyHS") == 0)
1587
if (strcmp(optarg, "Ternary") == 0)
1591
case 'H': // Size of Hash table
1592
HTblsz = strtoul(optarg, NULL, 0);
1595
case 'L': // Number of Loops
1596
Passes = atoi(optarg);
1599
printf("\n !! OOps - Number of Loops must be > 0\n");
1604
case 'n': // Max population of arrays
1605
nStrg = strtoul(optarg, NULL, 0); // Size of Linear Array
1608
printf("\n !! OOps - Number of strings must be > 0\n");
1613
case 'T': // Maximum retrieve tests for timing
1614
TValues = strtoul(optarg, NULL, 0);
1617
case 'P': // measurement points per decade
1618
PtsPdec = strtoul(optarg, NULL, 0);
1621
case 'M': // maximum length of input string
1622
MLength = atoi(optarg);
1625
case 'p': // pre-initialize Hash table
1629
case 'r': // do not randomize input
1633
("\n !! OOps '-r' and '-C' flag are mutually exclusive\n");
1640
case 'a': // word align string buffers
1644
case 'D': // do a delete at end
1648
case 'C': // build sequential Get string buffers
1652
("\n !! OOps '-C' and '-r' flag are mutually exclusive\n");
1667
("\n !! OOps -- '-A <ADT>' I.E. '-AHash' or '-AJudyHS' is a required option\n");
1674
printf("\n !! OOps -- No input file specified\n");
1681
printf("$ %s -A<ADT> -n# -H# -P# -T# -p -r -a InputStringFile\n\n",
1684
printf("'InputStringFile' is text file of strings to use in test\n\n");
1686
("-A <ADT> is Hash|JLHash|JudySL|JudyHS|Splay|Redblack|Ternary|Print\n");
1688
printf("-n <#> max number of strings to use in tests (all)\n");
1689
printf("-H <#> is number elements in Hash table\n");
1690
printf("-P <#> number of measurement points per decade (40)\n");
1692
("-T <#> Change the 'Get' number_of_strings to measure per data point\n");
1693
printf("-D Use 'Delete' routine instead of 'FreeArray' routine\n");
1694
printf("-p pre-zero hash table to fault in all pages\n");
1695
printf("-r Do not randomize Insert and Get order of strings\n");
1696
printf("-C Build contigious string buffers for 'Get' tests\n");
1697
printf("-a Word_t align the start address of input strings\n");
1698
printf("-M <#> Change the maximum 'strlen(String)' of input Strings\n");
1704
// calculate max number mask used in hash routine
1706
//============================================================
1707
// PRINT COMMAND NAME + RUN ARGUMENTS
1708
//============================================================
1710
printf("# %s", argv[0]);
1711
if (nStrg != INFSTRGS)
1712
printf(" -n%lu", nStrg);
1719
printf(" -A JLHash");
1722
printf(" -A JudySL");
1725
printf(" -A JudyHS");
1728
printf(" -A Splay");
1731
printf(" -A Redblack");
1734
printf(" -A Ternary");
1740
printf(" -H%lu", HTblsz);
1741
printf(" -P%lu", PtsPdec);
1742
printf(" -L%d", Passes);
1751
printf(" -M%d", MLength);
1752
printf(" %s", argv[fileidx]);
1755
// print some header
1757
printf("# This file is in a format to input to 'jbgraph'\n");
1758
printf("# XLABEL Stored\n");
1759
printf("# YLABEL Microseconds / Index\n");
1760
printf("# COLHEAD 1 Total Insert attempts\n");
1761
printf("# COLHEAD 2 Number Gets\n");
1762
printf("# COLHEAD 3 Duplicate strings\n");
1763
printf("# COLHEAD 4 Insert Time (uS)\n");
1764
printf("# COLHEAD 5 Get Time (uS)\n");
1765
printf("# COLHEAD 6 Hash Chain Length\n");
1766
printf("# COLHEAD 7 Average RAM/String\n");
1768
// uname(2) strings describing the machine
1770
struct utsname ubuf; // for system name
1772
if (uname(&ubuf) == -1)
1773
printf("# Uname(2) failed\n");
1775
printf("# %s %s %s %s %s\n", ubuf.sysname, ubuf.nodename,
1776
ubuf.release, ubuf.version, ubuf.machine);
1778
if (sizeof(Word_t) == 8)
1779
printf("# 64 Bit CPU\n");
1780
else if (sizeof(Word_t) == 4)
1781
printf("# 32 Bit CPU\n");
1783
printf("# Processor speed compiled at %d Mhz\n", CPUMHZ);
1786
if (Method == M_Hash)
1787
printf("# Hash record struct: sizeof(hrec_t) = %d\n", sizeof(hrec_t));
1788
if (Method == M_Ternary)
1789
printf("# Ternary record struct: sizeof(Tnode) = %d\n", sizeof(Tnode));
1793
if ((fid = fopen(argv[fileidx], "r")) == NULL)
1796
for (StrTot = Strlen = LineCnt = 0; (Chr = fgetc(fid)) != EOF;)
1800
if (Strlen) // eat zero length lines
1802
if (Strlen > MLength)
1804
LineCnt++; // increase string count
1805
Strlen++; // add a \0 for JudySL
1807
if (aFlag) // for word alignment
1808
StrTot += ROUNDUPWORD(Strlen);
1810
StrTot += Strlen; // memory needed to store strings
1812
if (LineCnt == nStrg) // shorten if required by -n option
1825
nStrg = LineCnt; // adj if necessary
1827
// get struct to keep track of the strings
1828
StringMemory = sizeof(dt_t) * nStrg;
1829
Pdt = (Pdt_t) malloc(sizeof(dt_t) * nStrg);
1833
// get memory to store the strings
1834
StringMemory += StrTot;
1835
PCurStr = (uint8_t *) malloc(StrTot);
1836
if (PCurStr == NULL)
1839
// BRING FILE INTO RAM, COUNT LINES and CHECK LENGTH
1841
//============================================================
1842
// CALCULATE NUMBER OF MEASUREMENT GROUPS -- points per decade
1843
//============================================================
1845
// Calculate Multiplier for number of points per decade
1846
Mult = pow(10.0, 1.0 / (double)PtsPdec);
1849
Word_t numb, prevnumb;
1851
// Count number of measurements needed (10K max)
1853
for (Groups = 2; Groups < 10000; Groups++)
1854
if (NextNumb(&numb, &sum, Mult, nStrg))
1857
// Get memory for measurements
1858
Pms = (Pms_t) calloc(Groups, sizeof(ms_t));
1862
// Now calculate number of Indexes for each measurement point
1865
for (grp = 0; grp < Groups; grp++)
1867
Pms[grp].ms_delta = numb - prevnumb;
1868
Pms[grp].ms_mininsert = 10000000.0; // infinity
1869
Pms[grp].ms_minretrive = 10000000.0; // infinity
1870
Pms[grp].ms_Bytes = 0.0;
1874
NextNumb(&numb, &sum, Mult, nStrg);
1876
} // Groups = number of sizes
1878
// print remaining header
1880
if (Method == M_Hash)
1882
printf("# Allocate Hash table = %lu elements\n", HTblsz);
1884
if (Method == M_JLHash)
1887
printf("# JLHash table virtual size = %lu\n", HTblsz);
1889
printf("# JLHash table virtual size = 4294967296\n");
1892
//=======================================================================
1893
// Read text input file into RAM
1894
//=======================================================================
1896
if ((fid = fopen(argv[fileidx], "r")) == NULL)
1899
for (Strlen = LineCnt = 0; LineCnt < nStrg;)
1904
if (Strlen) // eat zero length lines
1906
if (Strlen > MLength)
1908
Pdt[LineCnt].dt_string = PCurStr - Strlen;
1909
Pdt[LineCnt].dt_strlen = Strlen;
1913
*PCurStr++ = '\0'; // for JudySL
1914
if (aFlag) // for word alignment
1915
PCurStr = (uint8_t *) ROUNDUPWORD((Word_t)PCurStr);
1917
if ((Word_t)PCurStr % sizeof(Word_t))
1923
if (Strlen < MLength)
1927
Chr = ' '; // for JudySL
1928
*PCurStr++ = (uint8_t) Chr;
1934
assert(nStrg == LineCnt);
1936
printf("# %lu (%.1f%%) non-Word_t aligned string buffers\n",
1937
aCount, (double)aCount / (double)LineCnt * 100.0);
1939
printf("# Ram used for input data = %lu bytes\n", StringMemory);
1941
printf("# Average string length = %.1f bytes\n",
1942
(double)(StrTot - LineCnt) / LineCnt);
1944
// Allocate memory for Cached assess to 'Get' (largest delta). This flag
1945
// will put the 'randomized' 'Get' order strings in a sequential buffer.
1946
// Modern processors will 'read ahead' with an access to RAM is sequential
1947
// -- thus saving the 'Get' having to bring the string into cache.
1950
PdtS_ = (Pdt_t) malloc(TValues * sizeof(dt_t));
1954
// now guess how much memory will be needed for the strings
1955
Strsiz_ = ((StrTot / nStrg) * TValues);
1956
Strsiz_ += Strsiz_; // bump %20
1958
Strbuf_ = (uint8_t *) malloc(Strsiz_);
1959
if (Strbuf_ == NULL)
1963
("# %lu bytes malloc() for 'cached' strings for Get measurement\n",
1967
//=======================================================================
1968
// TIME GETSTRING() from Cache (most of the time)
1969
//=======================================================================
1971
STARTTm(tm); // start timer
1972
for (LineCnt = 0; LineCnt < nStrg; LineCnt++)
1974
GETSTRING(PCurStr, Strlen);
1975
Strlen = Pdt[LineCnt].dt_strlen;
1976
PCurStr = Pdt[LineCnt].dt_string;
1978
if (strlen(PCurStr) != Strlen) // bring string into Cache
1980
// necessary to prevent cc from optimizing out
1981
printf(" !! OOps Bug, wrong string length\n");
1985
ENDTm(DeltaUSec, tm); // end timer
1988
("# Access Time = %6.3f uS average per string (mostly from Cache)\n",
1991
//=======================================================================
1992
// TIME GETSTRING() + HASHSTR() from Cache (most of the time)
1993
//=======================================================================
1995
STARTTm(tm); // start timer
1996
for (LineCnt = 0; LineCnt < nStrg; LineCnt++)
1999
GETSTRING(PCurStr, Strlen);
2000
PCurStr = Pdt[LineCnt].dt_string;
2001
Strlen = Pdt[LineCnt].dt_strlen;
2002
hval = HASHSTR(PCurStr, Strlen, HTblsz);
2004
printf("OOps foolflag is set, hval = %d\n", hval);
2006
ENDTm(DeltaUSec, tm); // end timer
2009
("# HashStr() Time = %6.3f uS average per string (mostly from Cache)\n",
2012
// randomize the input strings (adjacent strings will not be on same page)
2016
Randomize(Pdt, nStrg); // Randomize ALL to be stored
2018
//=======================================================================
2019
// TIME GETSTRING() from RAM (most of the time)
2020
//=======================================================================
2022
STARTTm(tm); // start timer
2023
for (LineCnt = 0; LineCnt < nStrg; LineCnt++)
2025
GETSTRING(PCurStr, Strlen);
2026
Strlen = Pdt[LineCnt].dt_strlen;
2027
PCurStr = Pdt[LineCnt].dt_string;
2029
if (strlen(PCurStr) != Strlen) // bring string into Cache
2031
// necessary to prevent cc from optimizing out
2032
printf(" !! OOps Bug, wrong string length\n");
2036
ENDTm(DeltaUSec, tm); // end timer
2039
("# Access Time = %6.3f uS average per string (mostly from RAM)\n",
2042
//=======================================================================
2043
// TIME GETSTRING() + HASHSTR() from RAM (most of the time)
2044
//=======================================================================
2046
STARTTm(tm); // start timer
2047
for (LineCnt = 0; LineCnt < nStrg; LineCnt++)
2050
GETSTRING(PCurStr, Strlen);
2051
Strlen = Pdt[LineCnt].dt_strlen;
2052
PCurStr = Pdt[LineCnt].dt_string;
2053
hval = HASHSTR(PCurStr, Strlen, HTblsz);
2055
printf("OOps foolflag is set, hval = %u\n", hval);
2057
ENDTm(DeltaUSec, tm); // end timer
2060
("# HashStr() Time = %6.3f uS average per string (mostly from RAM)\n",
2064
//=======================================================================
2065
// Insert, Get and Delete loops
2066
//=======================================================================
2068
for (Pass = 0; Pass < Passes; Pass++)
2070
printf("# Pass %d\n", Pass);
2074
("# TotInserts DeltaGets DupStrs InsTime GetTime HChainLen Ram/String\n");
2075
gStored = 0; // number of strings inserted
2076
StrNumb = 0; // number of attempted strings inserted
2078
STARTmem; // current malloc() mem usage
2079
for (grp = 0; grp < Groups; grp++)
2082
Word_t Begin = gStored; // remember current STOREed
2083
Word_t Delta = Pms[grp].ms_delta;
2089
STARTTm(tm); // start timer
2090
for (lines = 0; lines < Delta; lines++, StrNumb++)
2092
GETSTRING(PCurStr, Strlen);
2093
PCurStr = Pdt[StrNumb].dt_string;
2094
Printf("%s\n", (char *)PCurStr);
2096
ENDTm(DeltaUSec, tm); // end timer
2101
STARTTm(tm); // start timer
2102
for (lines = 0; lines < Delta; lines++, StrNumb++)
2104
GETSTRING(PCurStr, Strlen);
2105
Strlen = Pdt[StrNumb].dt_strlen;
2106
PCurStr = Pdt[StrNumb].dt_string;
2108
PValue = HashIns(&HRoot, PCurStr, Strlen, HTblsz);
2109
if ((*PValue)++ == 0)
2110
gStored++; // number of strings stored
2112
ENDTm(DeltaUSec, tm); // end timer
2117
STARTTm(tm); // start timer
2118
for (lines = 0; lines < Delta; lines++, StrNumb++)
2120
GETSTRING(PCurStr, Strlen);
2121
Strlen = Pdt[StrNumb].dt_strlen;
2122
PCurStr = Pdt[StrNumb].dt_string;
2123
PValue = JLHashIns(&JLHash, PCurStr, Strlen, HTblsz);
2124
if ((*PValue)++ == 0)
2125
gStored++; // number of strings stored
2127
ENDTm(DeltaUSec, tm); // end timer
2132
STARTTm(tm); // start timer
2133
for (lines = 0; lines < Delta; lines++, StrNumb++)
2135
GETSTRING(PCurStr, Strlen);
2136
PCurStr = Pdt[StrNumb].dt_string;
2138
JSLI(PValue, JudySL, PCurStr); // insert string
2139
if ((*PValue)++ == 0)
2140
gStored++; // number of strings stored
2142
ENDTm(DeltaUSec, tm); // end timer
2147
STARTTm(tm); // start timer
2148
for (lines = 0; lines < Delta; lines++, StrNumb++)
2150
GETSTRING(PCurStr, Strlen);
2151
Strlen = Pdt[StrNumb].dt_strlen;
2152
PCurStr = Pdt[StrNumb].dt_string;
2154
JHSI(PValue, JudyHS, PCurStr, Strlen); // insert string
2155
if ((*PValue)++ == 0)
2156
gStored++; // number of strings stored
2158
ENDTm(DeltaUSec, tm); // end timer
2162
// NOTE: the ADT's below here are so slow, that I did not add much effort
2163
// to clean them up. (dlb)
2167
STARTTm(tm); // start timer
2168
for (lines = 0; lines < Delta; lines++, StrNumb++)
2170
GETSTRING(PCurStr, Strlen);
2171
PCurStr = Pdt[StrNumb].dt_string;
2173
splayinsert(&spans, (char *)PCurStr);
2175
ENDTm(DeltaUSec, tm); // end timer
2180
STARTTm(tm); // start timer
2181
for (lines = 0; lines < Delta; lines++, StrNumb++)
2183
GETSTRING(PCurStr, Strlen);
2184
PCurStr = Pdt[StrNumb].dt_string;
2186
redblackinsert(&rbans, (char *)PCurStr);
2188
ENDTm(DeltaUSec, tm); // end timer
2193
STARTTm(tm); // start timer
2194
for (lines = 0; lines < Delta; lines++, StrNumb++)
2196
GETSTRING(PCurStr, Strlen);
2197
Strlen = Pdt[StrNumb].dt_strlen;
2198
PCurStr = Pdt[StrNumb].dt_string;
2200
TernaryIns(&Ternary, PCurStr, Strlen);
2202
ENDTm(DeltaUSec, tm); // end timer
2206
assert(0); // cant happen
2209
ENDmem(DeltaMem); // current malloc() mem usage
2211
ReadLin = StrNumb; // adjust delta
2212
if (ReadLin > TValues)
2214
// if (Delta > TValues)
2215
// ReadLin = Delta; // use the Delta
2217
Printf(" %11lu", StrNumb); // Total stored
2218
Printf(" %10lu", ReadLin); // Number to read back
2219
Begin = gStored - Begin; // actual STORED
2220
assert(lines == Delta);
2221
Printf(" %8lu", Delta - Begin); // Duplicate strings
2223
// Average time per line to store (including duplicate strings)
2224
Mult = DeltaUSec / (double)Delta;
2226
if (Mult < Pms[grp].ms_mininsert)
2227
Pms[grp].ms_mininsert = Mult;
2229
Printf(" %7.3f", Mult);
2231
// Bytes allocated thru malloc()
2232
if (TotalJudyMalloc == 0)
2233
Pms[grp].ms_Bytes = (double)DeltaMem;
2235
Pms[grp].ms_Bytes = (double)(TotalJudyMalloc * sizeof(Word_t));
2237
Pms[grp].ms_Bytes /= (double)gStored;
2241
//=======================================================================
2243
//=======================================================================
2245
Pdts = Pdt; // Strings to 'Get'
2246
gChainln = 0; // total chain lengths
2250
Randomize(Pdt, StrNumb); // Randomize ONLY those stored
2254
// Allocate and make sequencial string buffer
2255
Pdts = BuildSeqBuf(Pdt, ReadLin);
2264
STARTTm(tm); // start timer
2265
for (lines = 0; lines < ReadLin; lines++)
2267
GETSTRING(PCurStr, Strlen);
2268
Strlen = Pdts[lines].dt_strlen;
2269
PCurStr = Pdts[lines].dt_string;
2271
PValue = HashGet(HRoot, PCurStr, Strlen); // get string
2272
assert(PValue != NULL);
2273
assert(*PValue > 0);
2275
ENDTm(DeltaUSec, tm); // end timer
2280
STARTTm(tm); // start timer
2281
for (lines = 0; lines < ReadLin; lines++)
2283
GETSTRING(PCurStr, Strlen);
2284
Strlen = Pdts[lines].dt_strlen;
2285
PCurStr = Pdts[lines].dt_string;
2287
PValue = JLHashGet(JLHash, PCurStr, Strlen); // get string
2288
assert(PValue != NULL);
2289
assert(*PValue > 0);
2291
ENDTm(DeltaUSec, tm); // end timer
2296
STARTTm(tm); // start timer
2297
for (lines = 0; lines < ReadLin; lines++)
2299
GETSTRING(PCurStr, Strlen);
2300
PCurStr = Pdts[lines].dt_string;
2302
JSLG(PValue, JudySL, PCurStr); // get string
2303
assert(PValue != NULL);
2304
assert(*PValue > 0);
2306
ENDTm(DeltaUSec, tm); // end timer
2311
STARTTm(tm); // start timer
2312
for (lines = 0; lines < ReadLin; lines++)
2314
GETSTRING(PCurStr, Strlen);
2315
Strlen = Pdts[lines].dt_strlen;
2316
PCurStr = Pdts[lines].dt_string;
2318
JHSG(PValue, JudyHS, PCurStr, Strlen); // get string
2319
assert(PValue != NULL);
2320
assert(*PValue > 0);
2322
ENDTm(DeltaUSec, tm); // end timer
2326
// NOTE: the ADT's below here are so slow, that I did not add much effort
2327
// to clean them up. (dlb)
2331
STARTTm(tm); // start timer
2332
for (lines = 0; lines < ReadLin; lines++)
2334
GETSTRING(PCurStr, Strlen);
2335
PCurStr = Pdts[lines].dt_string;
2337
splaysearch(&spans, (char *)PCurStr);
2339
ENDTm(DeltaUSec, tm); // end timer
2344
STARTTm(tm); // start timer
2345
for (lines = 0; lines < ReadLin; lines++)
2347
GETSTRING(PCurStr, Strlen);
2348
PCurStr = Pdts[lines].dt_string;
2350
redblacksearch(&rbans, (char *)PCurStr);
2352
ENDTm(DeltaUSec, tm); // end timer
2357
STARTTm(tm); // start timer
2358
for (lines = 0; lines < ReadLin; lines++)
2360
GETSTRING(PCurStr, Strlen);
2361
Strlen = Pdts[lines].dt_strlen;
2362
PCurStr = Pdts[lines].dt_string;
2364
if (TernaryGet(Ternary, PCurStr, Strlen) == 0)
2366
printf("\n OOps - Ternary Bug at Line = %d\n",
2371
ENDTm(DeltaUSec, tm); // end timer
2375
assert(0); // cant happen
2378
Mult = DeltaUSec / (double)ReadLin;
2381
if (Mult < Pms[grp].ms_minretrive)
2382
Pms[grp].ms_minretrive = Mult;
2384
Printf(" %7.3f", Mult); // RETRIVE per string
2385
Printf(" %9.6f", (double)gChainln / (double)ReadLin);
2387
// RAM USED PER STRING TO STORE DATA
2389
Printf(" %13.1f", (double)Pms[grp].ms_Bytes);
2393
if (Method == M_Print)
2396
Printf("# Total Duplicate strings = %lu\n", nStrg - gStored);
2398
//=======================================================================
2400
//=======================================================================
2402
DeltaUSec = -1.0; // set deleted flag
2406
Randomize(Pdt, StrNumb); // Randomize ONLY those stored
2414
Printf("# Begin JudySLDel() loop...\n");
2415
STARTTm(tm); // start timer
2416
for (lines = 0; lines < nStrg; lines++)
2419
GETSTRING(PCurStr, Strlen);
2420
PCurStr = Pdt[lines].dt_string;
2421
JSLD(Rc, JudySL, PCurStr); // delete string
2424
ENDTm(DeltaUSec, tm); // end timer
2428
Printf("# Begin JudySLFreeArray()...\n");
2429
STARTTm(tm); // start timer
2430
JSLFA(Bytes, JudySL);
2431
ENDTm(DeltaUSec, tm); // end timer
2440
Printf("# Begin JudyHSDel() loop...");
2441
STARTTm(tm); // start timer
2442
for (lines = 0; lines < nStrg; lines++)
2444
GETSTRING(PCurStr, Strlen);
2445
Strlen = Pdt[lines].dt_strlen;
2446
PCurStr = Pdt[lines].dt_string;
2447
JHSD(Rc, JudyHS, PCurStr, Strlen); // Delete string
2450
ENDTm(DeltaUSec, tm); // end timer
2454
Printf("# Begin JudyHSFreeArray()...\n");
2455
STARTTm(tm); // start timer
2456
JHSFA(Bytes, JudyHS);
2457
ENDTm(DeltaUSec, tm); // end timer
2465
Printf("# Begin HashDel() loop...\n");
2466
STARTTm(tm); // start timer
2467
for (lines = 0; lines < nStrg; lines++)
2469
GETSTRING(PCurStr, Strlen);
2470
Strlen = Pdt[lines].dt_strlen;
2471
PCurStr = Pdt[lines].dt_string;
2472
HashDel(&HRoot, PCurStr, Strlen); // Delete string
2474
ENDTm(DeltaUSec, tm); // end timer
2478
Printf("# Begin HashFreeArray()...\n");
2479
STARTTm(tm); // start timer
2480
Bytes = HashFreeArray(&HRoot);
2481
ENDTm(DeltaUSec, tm); // end timer
2489
Printf("# Begin JLHashDel() loop...\n");
2490
STARTTm(tm); // start timer
2491
for (lines = 0; lines < nStrg; lines++)
2493
GETSTRING(PCurStr, Strlen);
2494
Strlen = Pdt[lines].dt_strlen;
2495
PCurStr = Pdt[lines].dt_string;
2496
JLHashDel(&JLHash, PCurStr, Strlen); // Delete string
2498
ENDTm(DeltaUSec, tm); // end timer
2502
Printf("# Begin JLHashFreeArray()...\n");
2503
STARTTm(tm); // start timer
2504
Bytes = JLHashFreeArray(&JLHash);
2505
ENDTm(DeltaUSec, tm); // end timer
2510
printf("# Delete not implemented yet, so quit\n");
2511
Passes = 1; // No, delete, so quit
2514
// average time per line to delete (including duplicate strings)
2515
if (Bytes) // Measured freed bytes?
2517
Printf("# returned %lu bytes\n", Bytes);
2519
if (TotalJudyMalloc) // Any bytes left after free?
2522
("# !!! BUG, %lu bytes not deleted in *Free()\n",
2523
TotalJudyMalloc * sizeof(Word_t));
2525
if (DeltaUSec != -1.0) // Measured how long to free?
2527
Printf("# Free %lu strings, %0.3f uSecs Ave/string\n",
2528
gStored, DeltaUSec / (double)gStored);
2535
printf("# TotInserts 0 0 InsTime GetTime 0 Ram/String\n");
2537
for (grp = 0; grp < Groups; grp++)
2539
StrNumb += Pms[grp].ms_delta;
2541
printf(" %11lu", StrNumb); // Total stored
2542
printf(" 0 0"); // place holder
2543
printf(" %7.3f", Pms[grp].ms_mininsert);
2544
printf(" %7.3f", Pms[grp].ms_minretrive);
2545
printf(" 0"); // place holder
2546
printf(" %7.3f", Pms[grp].ms_Bytes);