14
/* WORDDATA page size */
15
/* !!!! Must no be less than 4096 and not greater than 65536*/
16
#define WORDDATA_PageSize 4096
17
/* WORDDATA Block size */
18
#define WORDDATA_BlockSize (WORDDATA_PageSize >> 8)
22
/* Round to WORDDATA_PageSize */
23
#define WORDDATA_RoundPageSize(n) (((n) + WORDDATA_PageSize - 1) & (~(WORDDATA_PageSize - 1)))
25
/* Round a number to the upper BlockSize */
26
#define WORDDATA_RoundBlockSize(n) (((n) + WORDDATA_BlockSize - 1) & (~(WORDDATA_BlockSize - 1)))
28
/* Let's asign the first block for the header */
29
#define WORDDATA_PageHeaderSize (WORDDATA_BlockSize)
31
/* Max data size to fit in a single page */
32
#define WORDDATA_MaxDataSize (WORDDATA_PageSize - WORDDATA_PageHeaderSize)
34
#define WORDDATA_PageData(pg) ((pg)->data + WORDDATA_PageHeaderSize)
35
#define WORDDATA_Data(pg,i) (WORDDATA_PageData((pg)) + (i) * SizeInt32)
37
#define WORDDATA_SetBlocksInUse(pg,num) ( *(int *)((pg)->data + 0 * SizeInt32) = PACKLONG(num))
38
#define WORDDATA_GetBlocksInUse(pg,num) ( (num) = UNPACKLONG(*(int *)((pg)->data + 0 * SizeInt32)))
40
#define WORDDATA_SetNumRecords(pg,num) ( *(int *)((pg)->data + 1 * SizeInt32) = PACKLONG(num))
41
#define WORDDATA_GetNumRecords(pg,num) ( (num) = UNPACKLONG(*(int *)((pg)->data + 1 * SizeInt32)))
44
int WORDDATA_WritePageToDisk(FILE *fp, WORDDATA_Page *pg)
46
WORDDATA_SetBlocksInUse(pg,pg->used_blocks);
47
WORDDATA_SetNumRecords(pg,pg->n);
48
fseek(fp,pg->page_number * WORDDATA_PageSize,SEEK_SET);
49
if ( fwrite(pg->data,WORDDATA_PageSize,1,fp) != 1 )
50
progerrno("Failed to write page to disk: ");
54
int WORDDATA_WritePage(WORDDATA *b, WORDDATA_Page *pg)
56
int hash = pg->page_number % WORDDATA_CACHE_SIZE;
59
if((tmp = b->cache[hash]))
63
if(tmp->page_number == pg->page_number)
67
tmp = tmp->next_cache;
70
pg->next_cache = b->cache[hash];
75
int WORDDATA_FlushCache(WORDDATA *b)
78
WORDDATA_Page *tmp, *next;
79
for(i = 0; i < WORDDATA_CACHE_SIZE; i++)
81
if((tmp = b->cache[i]))
85
next = tmp->next_cache;
88
WORDDATA_WritePageToDisk(b->fp, tmp);
91
if(tmp != b->cache[i])
96
b->cache[i]->next_cache = NULL;
102
int WORDDATA_CleanCache(WORDDATA *b)
105
WORDDATA_Page *tmp,*next;
106
for(i = 0; i < WORDDATA_CACHE_SIZE; i++)
108
if((tmp = b->cache[i]))
112
next = tmp->next_cache;
122
WORDDATA_Page *WORDDATA_ReadPageFromDisk(FILE *fp, unsigned long page_number)
124
WORDDATA_Page *pg = (WORDDATA_Page *)emalloc(sizeof(WORDDATA_Page) + WORDDATA_PageSize);
126
fseek(fp,page_number * WORDDATA_PageSize,SEEK_SET);
127
fread(pg->data,WORDDATA_PageSize, 1, fp);
129
WORDDATA_GetBlocksInUse(pg,pg->used_blocks);
130
WORDDATA_GetNumRecords(pg,pg->n);
132
pg->page_number = page_number;
137
WORDDATA_Page *WORDDATA_ReadPage(WORDDATA *b, unsigned long page_number)
139
int hash = page_number % WORDDATA_CACHE_SIZE;
141
if((tmp = b->cache[hash]))
145
if(tmp->page_number == page_number)
149
tmp = tmp->next_cache;
153
tmp = WORDDATA_ReadPageFromDisk(b->fp, page_number);
156
tmp->next_cache = b->cache[hash];
157
b->cache[hash] = tmp;
162
WORDDATA_Page *WORDDATA_NewPage(WORDDATA *b)
168
int size = WORDDATA_PageSize;
170
unsigned long page_number =0;
171
/* Let's see if we have a previous available page */
172
if(b->num_Reusable_Pages)
174
/* First, look for a page of the same size */
175
for(i = 0; i < b->num_Reusable_Pages ; i++)
177
if(size == b->Reusable_Pages[i].page_size)
180
/* If not found, let's try with a bigger one if exits */
181
if(i == b->num_Reusable_Pages)
183
for(i = 0; i < b->num_Reusable_Pages ; i++)
185
if(size < b->Reusable_Pages[i].page_size)
189
/* If we got one page return it */
190
if(i != b->num_Reusable_Pages)
192
page_number = b->Reusable_Pages[i].page_number;
193
if(size == b->Reusable_Pages[i].page_size)
195
for(++i;i<b->num_Reusable_Pages;i++)
198
b->Reusable_Pages[i-1].page_number=b->Reusable_Pages[i].page_number;
199
b->Reusable_Pages[i-1].page_size=b->Reusable_Pages[i].page_size;
201
b->num_Reusable_Pages--;
205
b->Reusable_Pages[i].page_number += size/WORDDATA_PageSize;
206
b->Reusable_Pages[i].page_size -= size;
210
/* If there is not any reusable page let's get it from disk */
213
/* Get file pointer */
214
if(fseek(fp,0,SEEK_END) !=0)
215
progerrno("Internal error seeking: ");
219
/* Round up file pointer */
220
offset = WORDDATA_RoundPageSize(offset);
222
/* Set new file pointer - data will be aligned */
223
if(fseek(fp,offset, SEEK_SET)!=0 || offset != ftell(fp))
224
progerrno("Internal error seeking: ");
226
if(fwrite("\0",1,size,fp)!=size || ((long)size + offset) != ftell(fp))
227
progerrno("Faild to write page data: ");
229
page_number = offset/WORDDATA_PageSize;
232
pg = (WORDDATA_Page *)emalloc(sizeof(WORDDATA_Page) + size);
233
memset(pg,0,sizeof(WORDDATA_Page) + size);
234
/* Reserve space in file */
237
pg->n = 0; /* Number of records */
239
pg->page_number = page_number;
244
hash = pg->page_number % WORDDATA_CACHE_SIZE;
245
pg->next_cache = b->cache[hash];
250
void WORDDATA_FreePage(WORDDATA *b, WORDDATA_Page *pg)
252
int hash = pg->page_number % WORDDATA_CACHE_SIZE;
255
tmp = b->cache[hash];
259
if (tmp->page_number != pg->page_number)
260
tmp = tmp->next_cache;
269
WORDDATA *WORDDATA_New(FILE *fp)
273
b = (WORDDATA *) emalloc(sizeof(WORDDATA));
275
b->last_put_page = 0;
276
b->last_del_page = 0;
277
b->last_get_page = 0;
279
for(i = 0; i < WORDDATA_CACHE_SIZE; i++)
282
b->num_Reusable_Pages = 0;
287
WORDDATA *WORDDATA_Open(FILE *fp)
291
b = WORDDATA_New(fp);
296
void WORDDATA_Close(WORDDATA *bt)
298
WORDDATA_FlushCache(bt);
299
WORDDATA_CleanCache(bt);
304
unsigned long WORDDATA_PutBig(WORDDATA *b, unsigned int len, unsigned char *data)
307
int size = WORDDATA_RoundPageSize(sizeof(unsigned long) + len);
308
unsigned long p_len = (unsigned long)PACKLONG((unsigned long)len);
311
unsigned long page_number = 0;
313
/* Let's see if we have a previous available page */
314
if(b->num_Reusable_Pages)
316
/* First, look for a page of the same size */
317
for(i = 0; i < b->num_Reusable_Pages ; i++)
319
if(size == b->Reusable_Pages[i].page_size)
322
/* If not found, let's try with a bigger one if exits */
323
if(i == b->num_Reusable_Pages)
325
for(i = 0; i < b->num_Reusable_Pages ; i++)
327
if(size < b->Reusable_Pages[i].page_size)
331
/* If we got one page return it */
332
if(i != b->num_Reusable_Pages)
334
page_number = b->Reusable_Pages[i].page_number;
335
if(size == b->Reusable_Pages[i].page_size)
337
for(++i;i<b->num_Reusable_Pages;i++)
340
b->Reusable_Pages[i-1].page_number=b->Reusable_Pages[i].page_number;
341
b->Reusable_Pages[i-1].page_size=b->Reusable_Pages[i].page_size;
343
b->num_Reusable_Pages--;
347
b->Reusable_Pages[i].page_number += size/WORDDATA_PageSize;
348
b->Reusable_Pages[i].page_size -= size;
354
/* Get file pointer */
355
if(fseek(fp,0,SEEK_END) !=0)
356
progerrno("Internal error seeking: ");
359
/* Round up file pointer */
360
offset = WORDDATA_RoundPageSize(offset);
364
offset = page_number * WORDDATA_PageSize;
366
/* Set new file pointer - data will be aligned */
367
if(fseek(fp,offset, SEEK_SET)!=0 || offset != ftell(fp))
368
progerrno("Internal error seeking: ");
370
id = ((unsigned long) (offset / WORDDATA_PageSize)) << 8;
372
/* Write packed length */
373
fwrite(&p_len,1,SizeInt32,fp);
375
fwrite(data,1,len,fp);
379
/* Round up file pointer */
380
offset = WORDDATA_RoundPageSize(offset);
381
/* Set new file pointer - data will be aligned */
382
if(fseek(fp,offset, SEEK_SET)!=0 || offset != ftell(fp))
383
progerrno("Internal error seeking: ");
390
unsigned long WORDDATA_Put(WORDDATA *b, unsigned int len, unsigned char *data)
394
int i, r_id, r_len, tmp;
395
int last_id, free_id;
397
unsigned char buffer[WORDDATA_PageSize];
398
WORDDATA_Page *last_page=NULL;
399
/* Check if data fits in a single page */
400
/* We need 1 byte for the id plus two bytes for the size */
401
required_length = len + 1 + 2;
402
/* Round it to the upper block size */
403
required_length = WORDDATA_RoundBlockSize(required_length);
404
if(required_length > WORDDATA_MaxDataSize)
406
/* Store long record in file */
407
return WORDDATA_PutBig(b,len,data);
410
/* let's see if the data fits in the last page */
411
/* First - Check for a page with a Del Operation */
414
free_blocks = 255 - b->last_del_page->used_blocks;
415
if(!(required_length > (free_blocks * WORDDATA_BlockSize)))
417
last_page = b->last_del_page;
422
if( b->last_put_page)
424
/* Now check for the last page in a put operation */
425
free_blocks = 255 - b->last_put_page->used_blocks;
426
if(required_length > (free_blocks * WORDDATA_BlockSize))
428
WORDDATA_FreePage(b,b->last_put_page);
430
/* Save some memory - Do some flush flush of the data */
431
if(!(b->page_counter % WORDDATA_CACHE_SIZE))
433
WORDDATA_FlushCache(b);
434
WORDDATA_CleanCache(b);
436
b->last_get_page = b->last_put_page = b->last_del_page = 0;
439
b->last_put_page = WORDDATA_NewPage(b);
444
/* Save some memory - Do some flush flush of the data */
445
if(!(b->page_counter % WORDDATA_CACHE_SIZE))
447
WORDDATA_FlushCache(b);
448
WORDDATA_CleanCache(b);
450
b->last_get_page = b->last_put_page = b->last_del_page = 0;
453
b->last_put_page = WORDDATA_NewPage(b);
455
last_page = b->last_put_page;
458
for(i = 0, free_id = 0, last_id = 0, p = WORDDATA_PageData(last_page); i < last_page->n; i++)
460
/* Get the record id */
462
/* Get the record length */
463
r_len = ((((int)(p[1])) << 8) + (int)(p[2]));
464
if((r_id - last_id) > 1) /* find a reusable id */
466
free_id = last_id + 1;
470
p += WORDDATA_RoundBlockSize((3 + r_len));
473
free_id = last_id + 1;
476
memcpy(q,WORDDATA_PageData(last_page), p - WORDDATA_PageData(last_page));
477
q += p - WORDDATA_PageData(last_page);
478
q[0] = (unsigned char) free_id;
479
q[1] = (unsigned char) (len >> 8);
480
q[2] = (unsigned char) (len & 0xff);
481
memcpy(q+3,data,len);
482
q += WORDDATA_RoundBlockSize((3 + len));
483
for(;i < last_page->n; i++)
485
/* Get the record length */
486
r_len = ((((int)(p[1])) << 8) + (int)(p[2]));
487
tmp = WORDDATA_RoundBlockSize((3 + r_len));
492
memcpy(WORDDATA_PageData(last_page),buffer,q - buffer);
494
last_page->used_blocks += required_length / WORDDATA_BlockSize;
495
WORDDATA_WritePage(b,last_page);
496
return (unsigned long)(b->lastid=(unsigned long)((last_page->page_number << 8) + free_id));
499
unsigned char *WORDDATA_GetBig(WORDDATA *b, unsigned long page_number, unsigned int *len)
501
long offset = (long) (page_number * WORDDATA_PageSize);
504
fseek(b->fp, offset, SEEK_SET);
505
fread(&p_len,1,SizeInt32,b->fp);
506
*len = UNPACKLONG(p_len);
507
data = (unsigned char *)emalloc(*len);
508
fread(data,1,*len,b->fp);
512
unsigned char *WORDDATA_Get(WORDDATA *b, unsigned long global_id, unsigned int *len)
514
unsigned long page_number = global_id >> 8;
515
int id = (int)(global_id & 0xff);
516
int r_id=-1,r_len=-1;
523
return WORDDATA_GetBig(b,page_number,len);
526
WORDDATA_FreePage(b,b->last_get_page);
528
b->last_get_page = WORDDATA_ReadPage(b,page_number);
530
for(i = 0, p = WORDDATA_PageData(b->last_get_page); i < b->last_get_page->n; i++)
534
/* Get the record length */
535
r_len = ((((int)(p[1])) << 8) + (int)(p[2]));
536
if(r_id == id) /* find the id */
538
p += WORDDATA_RoundBlockSize((3 + r_len));
543
data = (unsigned char *) emalloc(r_len);
544
memcpy(data , p + 3 , r_len);
556
void WORDDATA_DelBig(WORDDATA *b, unsigned long page_number, unsigned int *len)
558
long offset = (long) (page_number * WORDDATA_PageSize);
560
fseek(b->fp, offset, SEEK_SET);
561
fread(&p_len,1,SizeInt32,b->fp);
562
*len = UNPACKLONG(p_len) + sizeof(unsigned long);
564
if(b->num_Reusable_Pages < WORDDATA_MAX_REUSABLE_PAGES)
566
b->Reusable_Pages[b->num_Reusable_Pages].page_number = page_number;
567
b->Reusable_Pages[b->num_Reusable_Pages++].page_size = WORDDATA_RoundPageSize(*len);
571
void WORDDATA_Del(WORDDATA *b, unsigned long global_id, unsigned int *len)
573
unsigned long page_number = global_id >> 8;
574
int id = (int)(global_id & 0xff);
575
int r_id=-1,r_len=-1,tmp;
577
unsigned char *p, *q;
582
WORDDATA_DelBig(b,page_number,len);
586
WORDDATA_FreePage(b,b->last_del_page);
588
b->last_del_page = WORDDATA_ReadPage(b,page_number);
590
for(i = 0, p = WORDDATA_PageData(b->last_del_page); i < b->last_del_page->n; i++)
594
/* Get the record length */
595
r_len = ((((int)(p[1])) << 8) + (int)(p[2]));
596
if(r_id == id) /* id found */
598
p += WORDDATA_RoundBlockSize((3 + r_len));
604
deleted_length = WORDDATA_RoundBlockSize(r_len);
605
/* Move rest of worddata to put them contigous (Remove the hole) */
606
/* q points to the hole, p to the next record */
608
p += WORDDATA_RoundBlockSize((3 + r_len));
609
for(++i;i < b->last_del_page->n; i++)
611
/* Get the record length */
612
r_len = ((((int)(p[1])) << 8) + (int)(p[2]));
613
tmp = WORDDATA_RoundBlockSize((3 + r_len));
618
b->last_del_page->n--;
619
b->last_del_page->used_blocks -= deleted_length / WORDDATA_BlockSize;
620
if(!b->last_del_page->n)
622
if(b->num_Reusable_Pages < WORDDATA_MAX_REUSABLE_PAGES)
624
b->Reusable_Pages[b->num_Reusable_Pages].page_number = page_number;
625
b->Reusable_Pages[b->num_Reusable_Pages++].page_size = WORDDATA_PageSize;
627
b->last_del_page = 0;
631
WORDDATA_WritePage(b,b->last_del_page);
650
#define FILEMODE_READ "rb"
651
#define FILEMODE_WRITE "wb"
652
#define FILEMODE_READWRITE "rb+"
654
#define FILEMODE_READ "rb"
655
#define FILEMODE_WRITE "wb"
656
#define FILEMODE_READWRITE "rb+"
658
#define FILEMODE_READ "r"
659
#define FILEMODE_WRITE "w"
660
#define FILEMODE_READWRITE "r+"
668
static unsigned long nums[N_TEST];
673
fp = fopen("kkkkk",FILEMODE_WRITE);
675
fp = fopen("kkkkk",FILEMODE_READWRITE);
677
fwrite("aaa",1,3,fp);
679
printf("\n\nIndexing\n\n");
681
bt = WORDDATA_Open(fp);
682
for(i=0;i<N_TEST;i++)
684
nums[i] = WORDDATA_Put(bt,16,"1234567890123456");
685
if(memcmp(WORDDATA_Get(bt,nums[i],&len),"1234567890123456",16)!=0)
686
printf("\n\nmal %d\n\n",i);
689
WORDDATA_FlushCache(bt);
697
printf("\n\nUnfreed %d\n\n",num);
698
printf("\n\nSearching\n\n");
700
fp = fopen("kkkkk",FILEMODE_READ);
701
bt = WORDDATA_Open(fp);
703
for(i=0;i<N_TEST;i++)
705
if(memcmp(WORDDATA_Get(bt,nums[i],&len),"1234567890123456",16)!=0)
706
printf("\n\nmal %d\n\n",i);
714
printf("\n\nUnfreed %d\n\n",num);