1
/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8; fill-column: 160 -*- */
3
* Copyright (C) 1999-2008 Novell, Inc. (www.novell.com)
5
* Authors: Michael Zucchi <notzed@ximian.com>
7
* This program is free software; you can redistribute it and/or
8
* modify it under the terms of version 2 of the GNU Lesser General Public
9
* License as published by the Free Software Foundation.
11
* This program is distributed in the hope that it will be useful,
12
* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
* General Public License for more details.
16
* You should have received a copy of the GNU Lesser General Public
17
* License along with this program; if not, write to the
18
* Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19
* Boston, MA 02110-1301, USA.
34
#include <sys/types.h>
36
#include <glib/gstdio.h>
38
#include "camel-block-file.h"
39
#include "camel-list-utils.h"
40
#include "camel-mempool.h"
41
#include "camel-object.h"
42
#include "camel-partition-table.h"
43
#include "camel-text-index.h"
47
#define d(x) /*(printf ("%s (%d):%s: ", __FILE__, __LINE__, __PRETTY_FUNCTION__),(x))*/
52
#define CAMEL_TEXT_INDEX_MAX_WORDLEN (36)
54
#define CAMEL_TEXT_INDEX_LOCK(kf, lock) \
55
(g_static_rec_mutex_lock (&((CamelTextIndex *) kf)->priv->lock))
56
#define CAMEL_TEXT_INDEX_UNLOCK(kf, lock) \
57
(g_static_rec_mutex_unlock (&((CamelTextIndex *) kf)->priv->lock))
59
static gint text_index_compress_nosync (CamelIndex *idx);
61
/* ********************************************************************** */
63
struct _CamelTextIndexNamePrivate {
69
CamelTextIndexName *camel_text_index_name_new (CamelTextIndex *idx, const gchar *name, camel_key_t nameid);
71
/* ****************************** */
73
struct _CamelTextIndexCursorPrivate {
85
CamelTextIndexCursor *camel_text_index_cursor_new (CamelTextIndex *idx, camel_block_t data);
87
/* ****************************** */
89
struct _CamelTextIndexKeyCursorPrivate {
98
CamelTextIndexKeyCursor *camel_text_index_key_cursor_new (CamelTextIndex *idx, CamelKeyTable *table);
100
/* ********************************************************************** */
102
#define CAMEL_TEXT_INDEX_VERSION "TEXT.000"
103
#define CAMEL_TEXT_INDEX_KEY_VERSION "KEYS.000"
105
struct _CamelTextIndexPrivate {
106
CamelBlockFile *blocks;
109
CamelKeyTable *word_index;
110
CamelPartitionTable *word_hash;
112
CamelKeyTable *name_index;
113
CamelPartitionTable *name_hash;
115
/* Cache of words to write */
116
gint word_cache_limit;
117
gint word_cache_count;
118
CamelDList word_cache;
120
GStaticRecMutex lock;
123
/* Root block of text index */
124
struct _CamelTextIndexRoot {
125
struct _CamelBlockRoot root;
127
/* FIXME: the index root could contain a pointer to the hash root */
128
camel_block_t word_index_root; /* a keyindex containing the keyid -> word mapping */
129
camel_block_t word_hash_root; /* a partitionindex containing word -> keyid mapping */
131
camel_block_t name_index_root; /* same, for names */
132
camel_block_t name_hash_root;
134
guint32 words; /* total words */
135
guint32 names; /* total names */
136
guint32 deleted; /* deleted names */
137
guint32 keys; /* total key 'chunks' written, used with deleted to determine fragmentation */
140
struct _CamelTextIndexWord {
141
struct _CamelTextIndexWord *next;
142
struct _CamelTextIndexWord *prev;
144
camel_block_t data; /* where the data starts */
148
camel_key_t names[32];
151
/* ********************************************************************** */
153
/* ********************************************************************** */
155
G_DEFINE_TYPE (CamelTextIndex, camel_text_index, CAMEL_TYPE_INDEX)
158
text_index_dispose (GObject *object)
160
CamelTextIndexPrivate *priv;
162
priv = CAMEL_TEXT_INDEX (object)->priv;
164
/* Only run this the first time. */
165
if (priv->word_index != NULL)
166
camel_index_sync (CAMEL_INDEX (object));
168
if (priv->word_index != NULL) {
169
g_object_unref (priv->word_index);
170
priv->word_index = NULL;
173
if (priv->word_hash != NULL) {
174
g_object_unref (priv->word_hash);
175
priv->word_hash = NULL;
178
if (priv->name_index != NULL) {
179
g_object_unref (priv->name_index);
180
priv->name_index = NULL;
183
if (priv->name_hash != NULL) {
184
g_object_unref (priv->name_hash);
185
priv->name_hash = NULL;
188
if (priv->blocks != NULL) {
189
g_object_unref (priv->blocks);
193
if (priv->links != NULL) {
194
g_object_unref (priv->links);
198
/* Chain up to parent's dispose () method. */
199
G_OBJECT_CLASS (camel_text_index_parent_class)->dispose (object);
203
text_index_finalize (GObject *object)
205
CamelTextIndexPrivate *priv;
207
priv = CAMEL_TEXT_INDEX (object)->priv;
209
g_assert (camel_dlist_empty (&priv->word_cache));
210
g_assert (g_hash_table_size (priv->words) == 0);
212
g_hash_table_destroy (priv->words);
214
g_static_rec_mutex_free (&priv->lock);
216
/* Chain up to parent's finalize () method. */
217
G_OBJECT_CLASS (camel_text_index_parent_class)->finalize (object);
222
text_index_add_name_to_word (CamelIndex *idx,
226
struct _CamelTextIndexWord *w, *wp, *ww;
227
CamelTextIndexPrivate *p = CAMEL_TEXT_INDEX (idx)->priv;
230
struct _CamelTextIndexRoot *rb = (struct _CamelTextIndexRoot *) p->blocks->root;
232
w = g_hash_table_lookup (p->words, word);
234
wordid = camel_partition_table_lookup (p->word_hash, word);
237
wordid = camel_key_table_add (p->word_index, word, 0, 0);
239
g_warning ("Could not create key entry for word '%s': %s\n",
240
word, g_strerror (errno));
243
if (camel_partition_table_add (p->word_hash, word, wordid) == -1) {
244
g_warning ("Could not create hash entry for word '%s': %s\n",
245
word, g_strerror (errno));
249
camel_block_file_touch_block (p->blocks, p->blocks->root_block);
251
data = camel_key_table_lookup (p->word_index, wordid, NULL, NULL);
253
g_warning ("Could not find key entry for word '%s': %s\n",
254
word, g_strerror (errno));
259
w = g_malloc0 (sizeof (*w));
260
w->word = g_strdup (word);
265
w->names[0] = nameid;
266
g_hash_table_insert (p->words, w->word, w);
267
camel_dlist_addhead (&p->word_cache, (CamelDListNode *) w);
268
p->word_cache_count++;
269
ww = (struct _CamelTextIndexWord *) p->word_cache.tailpred;
271
while (wp && p->word_cache_count > p->word_cache_limit) {
272
io (printf ("writing key file entry '%s' [%x]\n", ww->word, ww->data));
273
if (camel_key_file_write (p->links, &ww->data, ww->used, ww->names) != -1) {
274
io (printf (" new data [%x]\n", ww->data));
276
camel_block_file_touch_block (p->blocks, p->blocks->root_block);
277
/* if this call fails - we still point to the old data - not fatal */
278
camel_key_table_set_data (
279
p->word_index, ww->wordid, ww->data);
280
camel_dlist_remove ((CamelDListNode *) ww);
281
g_hash_table_remove (p->words, ww->word);
284
p->word_cache_count--;
290
camel_dlist_remove ((CamelDListNode *) w);
291
camel_dlist_addhead (&p->word_cache, (CamelDListNode *) w);
292
w->names[w->used] = nameid;
294
if (w->used == G_N_ELEMENTS (w->names)) {
295
io (printf ("writing key file entry '%s' [%x]\n", w->word, w->data));
296
if (camel_key_file_write (p->links, &w->data, w->used, w->names) != -1) {
298
camel_block_file_touch_block (p->blocks, p->blocks->root_block);
299
/* if this call fails - we still point to the old data - not fatal */
300
camel_key_table_set_data (
301
p->word_index, w->wordid, w->data);
303
/* FIXME: what to on error? lost data? */
310
text_index_sync (CamelIndex *idx)
312
CamelTextIndexPrivate *p = CAMEL_TEXT_INDEX (idx)->priv;
313
struct _CamelTextIndexWord *ww;
314
struct _CamelTextIndexRoot *rb;
315
gint ret = 0, wfrag, nfrag;
317
d (printf ("sync: blocks = %p\n", p->blocks));
319
if (p->blocks == NULL || p->links == NULL
320
|| p->word_index == NULL || p->word_hash == NULL
321
|| p->name_index == NULL || p->name_hash == NULL)
324
rb = (struct _CamelTextIndexRoot *) p->blocks->root;
326
/* sync/flush word cache */
328
CAMEL_TEXT_INDEX_LOCK (idx, lock);
330
/* we sync, bump down the cache limits since we dont need them for reading */
331
p->blocks->block_cache_limit = 128;
332
/* this doesn't really need to be dropped, its only used in updates anyway */
333
p->word_cache_limit = 1024;
335
while ((ww = (struct _CamelTextIndexWord *) camel_dlist_remhead (&p->word_cache))) {
337
io (printf ("writing key file entry '%s' [%x]\n", ww->word, ww->data));
338
if (camel_key_file_write (p->links, &ww->data, ww->used, ww->names) != -1) {
339
io (printf (" new data [%x]\n", ww->data));
341
camel_block_file_touch_block (p->blocks, p->blocks->root_block);
342
camel_key_table_set_data (
343
p->word_index, ww->wordid, ww->data);
349
g_hash_table_remove (p->words, ww->word);
354
if (camel_key_table_sync (p->word_index) == -1
355
|| camel_key_table_sync (p->name_index) == -1
356
|| camel_partition_table_sync (p->word_hash) == -1
357
|| camel_partition_table_sync (p->name_hash) == -1)
360
/* only do the frag/compress check if we did some new writes on this index */
361
wfrag = rb->words ? (((rb->keys - rb->words) * 100)/ rb->words) : 0;
362
nfrag = rb->names ? ((rb->deleted * 100) / rb->names) : 0;
363
d (printf (" words = %d, keys = %d\n", rb->words, rb->keys));
366
if (wfrag > 30 || nfrag > 20)
367
ret = text_index_compress_nosync (idx);
370
ret = camel_block_file_sync (p->blocks);
372
CAMEL_TEXT_INDEX_UNLOCK (idx, lock);
377
static void tmp_name (const gchar *in, gchar *o)
381
s = strrchr (in, '/');
383
memcpy (o, in, s - in + 1);
384
memcpy (o+(s-in+1), ".#", 2);
385
strcpy (o + (s - in + 3), s + 1);
387
sprintf (o, ".#%s", in);
392
text_index_compress (CamelIndex *idx)
396
CAMEL_TEXT_INDEX_LOCK (idx, lock);
398
ret = camel_index_sync (idx);
400
ret = text_index_compress_nosync (idx);
402
CAMEL_TEXT_INDEX_UNLOCK (idx, lock);
407
/* Attempt to recover index space by compressing the indices */
409
text_index_compress_nosync (CamelIndex *idx)
411
CamelTextIndex *newidx;
412
CamelTextIndexPrivate *newp, *oldp;
413
camel_key_t oldkeyid, newkeyid;
416
camel_block_t data, newdata;
420
gchar *newpath, *savepath, *oldpath;
421
gsize count, newcount;
422
camel_key_t *records, newrecords[256];
423
struct _CamelTextIndexRoot *rb;
425
i = strlen (idx->path) + 16;
426
oldpath = alloca (i);
427
newpath = alloca (i);
428
savepath = alloca (i);
430
strcpy (oldpath, idx->path);
431
oldpath[strlen (oldpath)-strlen (".index")] = 0;
433
tmp_name (oldpath, newpath);
434
sprintf (savepath, "%s~", oldpath);
436
d (printf ("Old index: %s\n", idx->path));
437
d (printf ("Old path: %s\n", oldpath));
438
d (printf ("New: %s\n", newpath));
439
d (printf ("Save: %s\n", savepath));
441
newidx = camel_text_index_new (newpath, O_RDWR | O_CREAT);
445
newp = CAMEL_TEXT_INDEX (newidx)->priv;
446
oldp = CAMEL_TEXT_INDEX (idx)->priv;
448
CAMEL_TEXT_INDEX_LOCK (idx, lock);
450
rb = (struct _CamelTextIndexRoot *) newp->blocks->root;
458
* For each name we still have:
459
* Add it to the new index & setup remap table
462
* Copy word's data to a new file
463
* Add new word to index (*) (can we just copy blocks?) */
465
/* Copy undeleted names to new index file, creating new indices */
466
io (printf ("Copying undeleted names to new file\n"));
467
remap = g_hash_table_new (NULL, NULL);
470
while ((oldkeyid = camel_key_table_next (oldp->name_index, oldkeyid, &name, &flags, &data))) {
471
if ((flags&1) == 0) {
472
io (printf ("copying name '%s'\n", name));
473
newkeyid = camel_key_table_add (
474
newp->name_index, name, data, flags);
478
camel_partition_table_add (
479
newp->name_hash, name, newkeyid);
480
g_hash_table_insert (remap, GINT_TO_POINTER (oldkeyid), GINT_TO_POINTER (newkeyid));
482
io (printf ("deleted name '%s'\n", name));
489
/* Copy word data across, remapping/deleting and create new index for it */
490
/* We re-block the data into 256 entry lots while we're at it, since we only
491
* have to do 1 at a time and its cheap */
493
while ((oldkeyid = camel_key_table_next (oldp->word_index, oldkeyid, &name, &flags, &data))) {
494
io(printf ("copying word '%s'\n", name));
502
if (camel_key_file_read (oldp->links, &data, &count, &records) == -1) {
503
io (printf ("could not read from old keys at %d for word '%s'\n", (gint)data, name));
506
for (i = 0; i < count; i++) {
507
newkeyid = (camel_key_t) GPOINTER_TO_INT (g_hash_table_lookup (remap, GINT_TO_POINTER (records[i])));
509
newrecords[newcount++] = newkeyid;
510
if (newcount == G_N_ELEMENTS (newrecords)) {
511
if (camel_key_file_write (newp->links, &newdata, newcount, newrecords) == -1) {
523
if (camel_key_file_write (newp->links, &newdata, newcount, newrecords) == -1)
528
newkeyid = camel_key_table_add (
529
newp->word_index, name, newdata, flags);
532
camel_partition_table_add (
533
newp->word_hash, name, newkeyid);
539
camel_block_file_touch_block (newp->blocks, newp->blocks->root_block);
541
if (camel_index_sync (CAMEL_INDEX (newidx)) == -1)
544
/* Rename underlying files to match */
545
ret = camel_index_rename (idx, savepath);
549
/* If this fails, we'll pick up something during restart? */
550
ret = camel_index_rename ((CamelIndex *) newidx, oldpath);
552
#define myswap(a, b) { gpointer tmp = a; a = b; b = tmp; }
553
/* Poke the private data across to the new object */
554
/* And change the fd's over, etc? */
555
/* Yes: This is a hack */
556
myswap (newp->blocks, oldp->blocks);
557
myswap (newp->links, oldp->links);
558
myswap (newp->word_index, oldp->word_index);
559
myswap (newp->word_hash, oldp->word_hash);
560
myswap (newp->name_index, oldp->name_index);
561
myswap (newp->name_hash, oldp->name_hash);
562
myswap (((CamelIndex *) newidx)->path, ((CamelIndex *) idx)->path);
567
CAMEL_TEXT_INDEX_UNLOCK (idx, lock);
569
camel_index_delete ((CamelIndex *) newidx);
571
g_object_unref (newidx);
573
g_hash_table_destroy (remap);
575
/* clean up temp files always */
576
sprintf (savepath, "%s~.index", oldpath);
578
sprintf (newpath, "%s.data", savepath);
585
text_index_delete (CamelIndex *idx)
587
CamelTextIndexPrivate *p = CAMEL_TEXT_INDEX (idx)->priv;
590
if (camel_block_file_delete (p->blocks) == -1)
592
if (camel_key_file_delete (p->links) == -1)
599
text_index_rename (CamelIndex *idx,
602
CamelTextIndexPrivate *p = CAMEL_TEXT_INDEX (idx)->priv;
603
gchar *newlink, *newblock;
606
CAMEL_TEXT_INDEX_LOCK (idx, lock);
608
newblock = alloca (strlen (path) + 8);
609
sprintf (newblock, "%s.index", path);
610
ret = camel_block_file_rename (p->blocks, newblock);
612
CAMEL_TEXT_INDEX_UNLOCK (idx, lock);
616
newlink = alloca (strlen (path) + 16);
617
sprintf (newlink, "%s.index.data", path);
618
ret = camel_key_file_rename (p->links, newlink);
621
camel_block_file_rename (p->blocks, idx->path);
622
CAMEL_TEXT_INDEX_UNLOCK (idx, lock);
628
idx->path = g_strdup (newblock);
630
CAMEL_TEXT_INDEX_UNLOCK (idx, lock);
636
text_index_has_name (CamelIndex *idx,
639
CamelTextIndexPrivate *p = CAMEL_TEXT_INDEX (idx)->priv;
641
return camel_partition_table_lookup (p->name_hash, name) != 0;
644
static CamelIndexName *
645
text_index_add_name (CamelIndex *idx,
648
CamelTextIndexPrivate *p = CAMEL_TEXT_INDEX (idx)->priv;
651
struct _CamelTextIndexRoot *rb = (struct _CamelTextIndexRoot *) p->blocks->root;
653
CAMEL_TEXT_INDEX_LOCK (idx, lock);
655
/* if we're adding words, up the cache limits a lot */
657
p->blocks->block_cache_limit = 1024;
658
p->word_cache_limit = 8192;
661
/* If we have it already replace it */
662
keyid = camel_partition_table_lookup (p->name_hash, name);
664
/* TODO: We could just update the partition table's
665
* key pointer rather than having to delete it */
667
camel_key_table_set_flags (p->name_index, keyid, 1, 1);
668
camel_partition_table_remove (p->name_hash, name);
671
keyid = camel_key_table_add (p->name_index, name, 0, 0);
673
camel_partition_table_add (p->name_hash, name, keyid);
677
camel_block_file_touch_block (p->blocks, p->blocks->root_block);
679
/* TODO: if keyid == 0, we had a failure, we should somehow flag that, but for
680
* now just return a valid object but discard its results, see text_index_write_name */
682
CAMEL_TEXT_INDEX_UNLOCK (idx, lock);
684
idn = (CamelIndexName *) camel_text_index_name_new ((CamelTextIndex *) idx, name, keyid);
691
hash_write_word (gchar *word,
695
CamelTextIndexName *tin = (CamelTextIndexName *) idn;
697
text_index_add_name_to_word (idn->index, word, tin->priv->nameid);
701
text_index_write_name (CamelIndex *idx,
704
/* force 'flush' of any outstanding data */
705
camel_index_name_add_buffer (idn, NULL, 0);
707
/* see text_index_add_name for when this can be 0 */
708
if (((CamelTextIndexName *) idn)->priv->nameid != 0) {
709
CAMEL_TEXT_INDEX_LOCK (idx, lock);
711
g_hash_table_foreach (idn->words, (GHFunc) hash_write_word, idn);
713
CAMEL_TEXT_INDEX_UNLOCK (idx, lock);
719
static CamelIndexCursor *
720
text_index_find_name (CamelIndex *idx,
723
/* what was this for, umm */
728
text_index_delete_name (CamelIndex *idx,
731
CamelTextIndexPrivate *p = CAMEL_TEXT_INDEX (idx)->priv;
733
struct _CamelTextIndexRoot *rb = (struct _CamelTextIndexRoot *) p->blocks->root;
735
d (printf ("Delete name: %s\n", name));
737
/* probably doesn't really need locking, but oh well */
738
CAMEL_TEXT_INDEX_LOCK (idx, lock);
740
/* We just mark the key deleted, and remove it from the hash table */
741
keyid = camel_partition_table_lookup (p->name_hash, name);
744
camel_block_file_touch_block (p->blocks, p->blocks->root_block);
745
camel_key_table_set_flags (p->name_index, keyid, 1, 1);
746
camel_partition_table_remove (p->name_hash, name);
749
CAMEL_TEXT_INDEX_UNLOCK (idx, lock);
752
static CamelIndexCursor *
753
text_index_find (CamelIndex *idx,
756
CamelTextIndexPrivate *p = CAMEL_TEXT_INDEX (idx)->priv;
758
camel_block_t data = 0;
760
CamelIndexCursor *idc;
762
CAMEL_TEXT_INDEX_LOCK (idx, lock);
764
keyid = camel_partition_table_lookup (p->word_hash, word);
766
data = camel_key_table_lookup (
767
p->word_index, keyid, NULL, &flags);
772
CAMEL_TEXT_INDEX_UNLOCK (idx, lock);
774
idc = (CamelIndexCursor *) camel_text_index_cursor_new ((CamelTextIndex *) idx, data);
779
static CamelIndexCursor *
780
text_index_words (CamelIndex *idx)
782
CamelTextIndexPrivate *p = CAMEL_TEXT_INDEX (idx)->priv;
784
return (CamelIndexCursor *) camel_text_index_key_cursor_new ((CamelTextIndex *) idx, p->word_index);
787
static CamelIndexCursor *
788
text_index_names (CamelIndex *idx)
790
CamelTextIndexPrivate *p = CAMEL_TEXT_INDEX (idx)->priv;
792
return (CamelIndexCursor *) camel_text_index_key_cursor_new ((CamelTextIndex *) idx, p->name_index);
796
camel_text_index_class_init (CamelTextIndexClass *class)
798
GObjectClass *object_class;
799
CamelIndexClass *index_class;
801
g_type_class_add_private (class, sizeof (CamelTextIndexPrivate));
803
object_class = G_OBJECT_CLASS (class);
804
object_class->dispose = text_index_dispose;
805
object_class->finalize = text_index_finalize;
807
index_class = CAMEL_INDEX_CLASS (class);
808
index_class->sync = text_index_sync;
809
index_class->compress = text_index_compress;
810
index_class->delete = text_index_delete;
811
index_class->rename = text_index_rename;
812
index_class->has_name = text_index_has_name;
813
index_class->add_name = text_index_add_name;
814
index_class->write_name = text_index_write_name;
815
index_class->find_name = text_index_find_name;
816
index_class->delete_name = text_index_delete_name;
817
index_class->find = text_index_find;
818
index_class->words = text_index_words;
819
index_class->names = text_index_names;
823
camel_text_index_init (CamelTextIndex *text_index)
825
text_index->priv = G_TYPE_INSTANCE_GET_PRIVATE (
826
text_index, CAMEL_TYPE_TEXT_INDEX, CamelTextIndexPrivate);
828
camel_dlist_init (&text_index->priv->word_cache);
829
text_index->priv->words = g_hash_table_new (g_str_hash, g_str_equal);
830
text_index->priv->word_cache_count = 0;
832
/* This cache size and the block cache size have been tuned for
833
* about the best with moderate memory usage. Doubling the memory
834
* usage barely affects performance. */
835
text_index->priv->word_cache_limit = 4096; /* 1024 = 128K */
837
g_static_rec_mutex_init (&text_index->priv->lock);
841
text_index_normalize (CamelIndex *idx,
847
/* Sigh, this is really expensive */
848
/*g_utf8_normalize (in, strlen (in), G_NORMALIZE_ALL);*/
849
word = g_utf8_strdown (in, -1);
855
camel_text_index_new (const gchar *path,
858
CamelTextIndex *idx = g_object_new (CAMEL_TYPE_TEXT_INDEX, NULL);
859
CamelTextIndexPrivate *p = idx->priv;
860
struct _CamelTextIndexRoot *rb;
864
camel_index_construct ((CamelIndex *) idx, path, flags);
865
camel_index_set_normalize ((CamelIndex *) idx, text_index_normalize, NULL);
867
p->blocks = camel_block_file_new (
868
idx->parent.path, flags, CAMEL_TEXT_INDEX_VERSION, CAMEL_BLOCK_SIZE);
869
if (p->blocks == NULL)
872
link = alloca (strlen (idx->parent.path) + 7);
873
sprintf (link, "%s.data", idx->parent.path);
874
p->links = camel_key_file_new (link, flags, CAMEL_TEXT_INDEX_KEY_VERSION);
876
if (p->links == NULL)
879
rb = (struct _CamelTextIndexRoot *) p->blocks->root;
881
if (rb->word_index_root == 0) {
882
bl = camel_block_file_new_block (p->blocks);
887
rb->word_index_root = bl->id;
888
camel_block_file_unref_block (p->blocks, bl);
889
camel_block_file_touch_block (p->blocks, p->blocks->root_block);
892
if (rb->word_hash_root == 0) {
893
bl = camel_block_file_new_block (p->blocks);
898
rb->word_hash_root = bl->id;
899
camel_block_file_unref_block (p->blocks, bl);
900
camel_block_file_touch_block (p->blocks, p->blocks->root_block);
903
if (rb->name_index_root == 0) {
904
bl = camel_block_file_new_block (p->blocks);
909
rb->name_index_root = bl->id;
910
camel_block_file_unref_block (p->blocks, bl);
911
camel_block_file_touch_block (p->blocks, p->blocks->root_block);
914
if (rb->name_hash_root == 0) {
915
bl = camel_block_file_new_block (p->blocks);
920
rb->name_hash_root = bl->id;
921
camel_block_file_unref_block (p->blocks, bl);
922
camel_block_file_touch_block (p->blocks, p->blocks->root_block);
925
p->word_index = camel_key_table_new (p->blocks, rb->word_index_root);
926
p->word_hash = camel_partition_table_new (p->blocks, rb->word_hash_root);
927
p->name_index = camel_key_table_new (p->blocks, rb->name_index_root);
928
p->name_hash = camel_partition_table_new (p->blocks, rb->name_hash_root);
930
if (p->word_index == NULL || p->word_hash == NULL
931
|| p->name_index == NULL || p->name_hash == NULL) {
932
g_object_unref (idx);
939
g_object_unref (idx);
943
/* returns 0 if the index exists, is valid, and synced, -1 otherwise */
945
camel_text_index_check (const gchar *path)
948
CamelBlockFile *blocks;
951
block = alloca (strlen (path) + 7);
952
sprintf (block, "%s.index", path);
953
blocks = camel_block_file_new (block, O_RDONLY, CAMEL_TEXT_INDEX_VERSION, CAMEL_BLOCK_SIZE);
954
if (blocks == NULL) {
955
io (printf ("Check failed: No block file: %s\n", g_strerror (errno)));
958
key = alloca (strlen (path) + 12);
959
sprintf (key, "%s.index.data", path);
960
keys = camel_key_file_new (key, O_RDONLY, CAMEL_TEXT_INDEX_KEY_VERSION);
962
io (printf ("Check failed: No key file: %s\n", g_strerror (errno)));
963
g_object_unref (blocks);
967
g_object_unref (keys);
968
g_object_unref (blocks);
974
camel_text_index_rename (const gchar *old,
977
gchar *oldname, *newname;
980
/* TODO: camel_text_index_rename should find out if we have an active index and use that instead */
982
oldname = alloca (strlen (old) + 12);
983
newname = alloca (strlen (new) + 12);
984
sprintf (oldname, "%s.index", old);
985
sprintf (newname, "%s.index", new);
987
if (g_rename (oldname, newname) == -1 && errno != ENOENT)
990
sprintf (oldname, "%s.index.data", old);
991
sprintf (newname, "%s.index.data", new);
993
if (g_rename (oldname, newname) == -1 && errno != ENOENT) {
995
sprintf (oldname, "%s.index", old);
996
sprintf (newname, "%s.index", new);
997
g_rename (newname, oldname);
1006
camel_text_index_remove (const gchar *old)
1011
/* TODO: needs to poke any active indices to remain unlinked */
1013
block = alloca (strlen (old) + 12);
1014
key = alloca (strlen (old) + 12);
1015
sprintf (block, "%s.index", old);
1016
sprintf (key, "%s.index.data", old);
1018
if (g_unlink (block) == -1 && errno != ENOENT && errno != ENOTDIR)
1020
if (g_unlink (key) == -1 && errno != ENOENT && errno != ENOTDIR)
1031
camel_text_index_info (CamelTextIndex *idx)
1033
CamelTextIndexPrivate *p = idx->priv;
1034
struct _CamelTextIndexRoot *rb = (struct _CamelTextIndexRoot *) p->blocks->root;
1037
printf ("Path: '%s'\n", idx->parent.path);
1038
printf ("Version: %u\n", idx->parent.version);
1039
printf ("Flags: %08x\n", idx->parent.flags);
1040
printf ("Total words: %u\n", rb->words);
1041
printf ("Total names: %u\n", rb->names);
1042
printf ("Total deleted: %u\n", rb->deleted);
1043
printf ("Total key blocks: %u\n", rb->keys);
1045
if (rb->words > 0) {
1046
frag = ((rb->keys - rb->words) * 100)/ rb->words;
1047
printf ("Word fragmentation: %d%%\n", frag);
1050
if (rb->names > 0) {
1051
frag = (rb->deleted * 100)/ rb->names;
1052
printf ("Name fragmentation: %d%%\n", frag);
1056
/* #define DUMP_RAW */
1059
enum { KEY_ROOT = 1, KEY_DATA = 2, PARTITION_MAP = 4, PARTITION_DATA = 8 };
1062
add_type (GHashTable *map,
1068
old = g_hash_table_lookup (map, id);
1072
if (old != 0 && old != type)
1073
g_warning ("block %x redefined as type %d, already type %d\n", id, type, old);
1074
g_hash_table_insert (map, id, GINT_TO_POINTER (type | old));
1078
add_partition (GHashTable *map,
1079
CamelBlockFile *blocks,
1083
CamelPartitionMapBlock *pm;
1087
add_type (map, id, PARTITION_MAP);
1088
bl = camel_block_file_get_block (blocks, id);
1090
g_warning ("couldn't get parition: %x\n", id);
1094
pm = (CamelPartitionMapBlock *) &bl->data;
1095
if (pm->used > G_N_ELEMENTS (pm->partition)) {
1096
g_warning ("Partition block %x invalid\n", id);
1097
camel_block_file_unref_block (blocks, bl);
1101
for (i = 0; i < pm->used; i++)
1102
add_type (map, pm->partition[i].blockid, PARTITION_DATA);
1105
camel_block_file_unref_block (blocks, bl);
1110
add_keys (GHashTable *map,
1111
CamelBlockFile *blocks,
1114
CamelBlock *rbl, *bl;
1115
CamelKeyRootBlock *root;
1118
add_type (map, id, KEY_ROOT);
1119
rbl = camel_block_file_get_block (blocks, id);
1121
g_warning ("couldn't get key root: %x\n", id);
1124
root = (CamelKeyRootBlock *) &rbl->data;
1128
add_type (map, id, KEY_DATA);
1129
bl = camel_block_file_get_block (blocks, id);
1131
g_warning ("couldn't get key: %x\n", id);
1135
kb = (CamelKeyBlock *) &bl->data;
1137
camel_block_file_unref_block (blocks, bl);
1140
camel_block_file_unref_block (blocks, rbl);
1144
dump_raw (GHashTable *map,
1149
gchar *p, c, *e, *a, *o;
1150
gint v, n, len, i, type;
1151
gchar hex[16] = "0123456789ABCDEF";
1153
camel_block_t id, total;
1155
fd = g_open (path, O_RDONLY | O_BINARY, 0);
1160
while ((len = read (fd, buf, 1024)) == 1024) {
1163
type = g_hash_table_lookup (map, id);
1166
printf (" - unknown -\n");
1169
printf (" - invalid -\n");
1172
CamelKeyRootBlock *r = (CamelKeyRootBlock *) buf;
1173
printf ("Key root:\n");
1174
printf ("First: %08x Last: %08x Free: %08x\n", r->first, r->last, r->free);
1177
CamelKeyBlock *k = (CamelKeyBlock *) buf;
1178
printf ("Key data:\n");
1179
printf ("Next: %08x Used: %u\n", k->next, k->used);
1180
for (i = 0; i < k->used; i++) {
1182
len = sizeof (k->u.keydata);
1184
len = k->u.keys[i - 1].offset;
1185
len -= k->u.keys[i].offset;
1186
printf ("[%03d]: %08x %5d %06x %3d '%.*s'\n", i,
1187
k->u.keys[i].data, k->u.keys[i].offset, k->u.keys[i].flags,
1188
len, len, k->u.keydata + k->u.keys[i].offset);
1191
case PARTITION_MAP: {
1192
CamelPartitionMapBlock *m = (CamelPartitionMapBlock *) buf;
1193
printf ("Partition map\n");
1194
printf ("Next: %08x Used: %u\n", m->next, m->used);
1195
for (i = 0; i < m->used; i++) {
1196
printf ("[%03d]: %08x -> %08x\n", i, m->partition[i].hashid, m->partition[i].blockid);
1199
case PARTITION_DATA: {
1200
CamelPartitionKeyBlock *k = (CamelPartitionKeyBlock *) buf;
1201
printf ("Partition data\n");
1202
printf ("Used: %u\n", k->used);
1206
printf ("--raw--\n");
1211
sprintf (line, "%08x: ", total);
1216
while (len && i < 16) {
1218
*a++ = isprint (c)?c:'.';
1219
*o++ = hex[(c>>4)&0x0f];
1227
printf ("%s\n", line);
1237
camel_text_index_dump (CamelTextIndex *idx)
1239
CamelTextIndexPrivate *p = idx->priv;
1247
/* Iterate over all names in the file first */
1249
printf ("UID's in index\n");
1252
while ((keyid = camel_key_table_next (p->name_index, keyid, &word, &flags, &data))) {
1253
if ((flags & 1) == 0)
1254
printf (" %s\n", word);
1256
printf (" %s (deleted)\n", word);
1260
printf ("Word's in index\n");
1263
while ((keyid = camel_key_table_next (p->word_index, keyid, &word, &flags, &data))) {
1264
CamelIndexCursor *idc;
1266
printf ("Word: '%s':\n", word);
1268
idc = camel_index_find ((CamelIndex *) idx, word);
1269
while ((name = camel_index_cursor_next (idc))) {
1270
printf (" %s", name);
1273
g_object_unref (idc);
1277
/* a more low-level dump routine */
1278
GHashTable *block_type = g_hash_table_new (NULL, NULL);
1283
add_keys (block_type, p->blocks, p->word_index->rootid);
1284
add_keys (block_type, p->blocks, p->name_index->rootid);
1286
add_partition (block_type, p->blocks, p->word_hash->rootid);
1287
add_partition (block_type, p->blocks, p->name_hash->rootid);
1289
dump_raw (block_type, p->blocks->path);
1290
g_hash_table_destroy (block_type);
1294
/* more debug stuff */
1296
camel_text_index_validate (CamelTextIndex *idx)
1298
CamelTextIndexPrivate *p = idx->priv;
1305
camel_key_t *records;
1308
GHashTable *names, *deleted, *words, *keys, *name_word, *word_word;
1310
names = g_hash_table_new (NULL, NULL);
1311
deleted = g_hash_table_new (NULL, NULL);
1313
name_word = g_hash_table_new (g_str_hash, g_str_equal);
1315
words = g_hash_table_new (NULL, NULL);
1316
keys = g_hash_table_new (NULL, NULL);
1318
word_word = g_hash_table_new (g_str_hash, g_str_equal);
1320
/* Iterate over all names in the file first */
1322
printf ("Checking UID consistency\n");
1325
while ((keyid = camel_key_table_next (p->name_index, keyid, &word, &flags, &data))) {
1326
if ((oldword = g_hash_table_lookup (names, GINT_TO_POINTER (keyid))) != NULL
1327
|| (oldword = g_hash_table_lookup (deleted, GINT_TO_POINTER (keyid))) != NULL) {
1328
printf ("Warning, name '%s' duplicates key (%x) with name '%s'\n", word, keyid, oldword);
1331
g_hash_table_insert (name_word, word, GINT_TO_POINTER (1));
1332
if ((flags & 1) == 0) {
1333
g_hash_table_insert (names, GINT_TO_POINTER (keyid), word);
1335
g_hash_table_insert (deleted, GINT_TO_POINTER (keyid), word);
1340
printf ("Checking WORD member consistency\n");
1343
while ((keyid = camel_key_table_next (p->word_index, keyid, &word, &flags, &data))) {
1344
CamelIndexCursor *idc;
1347
/* first, check for duplicates of keyid, and data */
1348
if ((oldword = g_hash_table_lookup (words, GINT_TO_POINTER (keyid))) != NULL) {
1349
printf ("Warning, word '%s' duplicates key (%x) with name '%s'\n", word, keyid, oldword);
1353
g_hash_table_insert (words, GINT_TO_POINTER (keyid), word);
1357
/* This may not be an issue if things have been removed over time,
1358
* though it is a problem if its a fresh index */
1359
printf ("Word '%s' has no data associated with it\n", word);
1361
if ((oldword = g_hash_table_lookup (keys, GUINT_TO_POINTER (data))) != NULL) {
1362
printf ("Warning, word '%s' duplicates data (%x) with name '%s'\n", word, data, oldword);
1364
g_hash_table_insert (keys, GUINT_TO_POINTER (data), word);
1368
if (g_hash_table_lookup (word_word, word) != NULL) {
1369
printf ("Warning, word '%s' occurs more than once\n", word);
1371
g_hash_table_insert (word_word, word, word);
1374
used = g_hash_table_new (g_str_hash, g_str_equal);
1376
idc = camel_index_find ((CamelIndex *) idx, word);
1377
while ((name = camel_index_cursor_next (idc))) {
1378
if (g_hash_table_lookup (name_word, name) == NULL) {
1379
printf ("word '%s' references non-existant name '%s'\n", word, name);
1381
if (g_hash_table_lookup (used, name) != NULL) {
1382
printf ("word '%s' uses word '%s' more than once\n", word, name);
1384
g_hash_table_insert (used, g_strdup (name), (gpointer) 1);
1387
g_object_unref (idc);
1389
g_hash_table_foreach (used, (GHFunc) g_free, NULL);
1390
g_hash_table_destroy (used);
1392
printf ("word '%s'\n", word);
1395
printf (" data %x ", data);
1396
if (camel_key_file_read (p->links, &data, &count, &records) == -1) {
1397
printf ("Warning, read failed for word '%s', at data '%u'\n", word, data);
1400
printf ("(%d)\n", (gint)count);
1406
g_hash_table_destroy (names);
1407
g_hash_table_destroy (deleted);
1408
g_hash_table_destroy (words);
1409
g_hash_table_destroy (keys);
1411
g_hash_table_foreach (name_word, (GHFunc) g_free, NULL);
1412
g_hash_table_destroy (name_word);
1414
g_hash_table_foreach (word_word, (GHFunc) g_free, NULL);
1415
g_hash_table_destroy (word_word);
1418
/* ********************************************************************** */
1419
/* CamelTextIndexName */
1420
/* ********************************************************************** */
1422
G_DEFINE_TYPE (CamelTextIndexName, camel_text_index_name, CAMEL_TYPE_INDEX_NAME)
1425
text_index_name_finalize (GObject *object)
1427
CamelTextIndexNamePrivate *priv;
1429
priv = CAMEL_TEXT_INDEX_NAME (object)->priv;
1431
g_hash_table_destroy (CAMEL_TEXT_INDEX_NAME (object)->parent.words);
1433
g_string_free (priv->buffer, TRUE);
1434
camel_mempool_destroy (priv->pool);
1436
/* Chain up to parent's finalize() method. */
1437
G_OBJECT_CLASS (camel_text_index_name_parent_class)->finalize (object);
1441
text_index_name_add_word (CamelIndexName *idn,
1444
CamelTextIndexNamePrivate *p = ((CamelTextIndexName *) idn)->priv;
1446
if (g_hash_table_lookup (idn->words, word) == NULL) {
1447
gchar *w = camel_mempool_strdup (p->pool, word);
1449
g_hash_table_insert (idn->words, w, w);
1454
* Because it doesn't hang/loop forever on bad data
1455
* Used to clean up utf8 before it gets further */
1457
static inline guint32
1458
camel_utf8_next (const guchar **ptr,
1459
const guchar *ptrend)
1461
register guchar *p = (guchar *) * ptr;
1469
while ((c = *p++)) {
1473
} else if ((c&0xe0) == 0xc0) {
1476
} else if ((c&0xf0) == 0xe0) {
1479
} else if ((c&0xf8) == 0xf0) {
1482
} else if ((c&0xfc) == 0xf8) {
1485
} else if ((c&0xfe) == 0xfc) {
1489
/* Invalid, ignore and look for next start gchar if room */
1496
/* bad data or truncated buffer */
1500
while (l && ((c = *p) & 0xc0) == 0x80) {
1503
v = (v << 6) | (c & 0x3f);
1512
/* else look for a start gchar again */
1519
text_index_name_add_buffer (CamelIndexName *idn,
1520
const gchar *buffer,
1523
CamelTextIndexNamePrivate *p = CAMEL_TEXT_INDEX_NAME (idn)->priv;
1524
const guchar *ptr, *ptrend;
1529
if (buffer == NULL) {
1530
if (p->buffer->len) {
1531
camel_index_name_add_word (idn, p->buffer->str);
1532
g_string_truncate (p->buffer, 0);
1537
ptr = (const guchar *) buffer;
1538
ptrend = (const guchar *) buffer + len;
1539
while ((c = camel_utf8_next (&ptr, ptrend))) {
1540
if (g_unichar_isalnum (c)) {
1541
c = g_unichar_tolower (c);
1542
utf8len = g_unichar_to_utf8 (c, utf8);
1544
g_string_append (p->buffer, utf8);
1546
if (p->buffer->len > 0 && p->buffer->len <= CAMEL_TEXT_INDEX_MAX_WORDLEN) {
1547
text_index_name_add_word (idn, p->buffer->str);
1548
/*camel_index_name_add_word (idn, p->buffer->str);*/
1551
g_string_truncate (p->buffer, 0);
1559
camel_text_index_name_class_init (CamelTextIndexNameClass *class)
1561
GObjectClass *object_class;
1562
CamelIndexNameClass *index_name_class;
1564
g_type_class_add_private (class, sizeof (CamelTextIndexNamePrivate));
1566
object_class = G_OBJECT_CLASS (class);
1567
object_class->finalize = text_index_name_finalize;
1569
index_name_class = CAMEL_INDEX_NAME_CLASS (class);
1570
index_name_class->add_word = text_index_name_add_word;
1571
index_name_class->add_buffer = text_index_name_add_buffer;
1575
camel_text_index_name_init (CamelTextIndexName *text_index_name)
1577
text_index_name->priv = G_TYPE_INSTANCE_GET_PRIVATE (text_index_name, CAMEL_TYPE_TEXT_INDEX_NAME, CamelTextIndexNamePrivate);
1579
text_index_name->parent.words = g_hash_table_new (
1580
g_str_hash, g_str_equal);
1582
text_index_name->priv->buffer = g_string_new ("");
1583
text_index_name->priv->pool =
1584
camel_mempool_new (256, 128, CAMEL_MEMPOOL_ALIGN_BYTE);
1587
CamelTextIndexName *
1588
camel_text_index_name_new (CamelTextIndex *idx,
1592
CamelTextIndexName *idn = g_object_new (CAMEL_TYPE_TEXT_INDEX_NAME, NULL);
1593
CamelIndexName *cin = &idn->parent;
1594
CamelTextIndexNamePrivate *p = idn->priv;
1596
cin->index = g_object_ref (idx);
1597
cin->name = camel_mempool_strdup (p->pool, name);
1603
/* ********************************************************************** */
1604
/* CamelTextIndexCursor */
1605
/* ********************************************************************** */
1607
G_DEFINE_TYPE (CamelTextIndexCursor, camel_text_index_cursor, CAMEL_TYPE_INDEX_CURSOR)
1610
text_index_cursor_finalize (GObject *object)
1612
CamelTextIndexCursorPrivate *priv;
1614
priv = CAMEL_TEXT_INDEX_CURSOR (object)->priv;
1616
g_free (priv->records);
1617
g_free (priv->current);
1619
/* Chain up to parent's finalize() method. */
1620
G_OBJECT_CLASS (camel_text_index_cursor_parent_class)->finalize (object);
1623
static const gchar *
1624
text_index_cursor_next (CamelIndexCursor *idc)
1626
CamelTextIndexCursorPrivate *p = CAMEL_TEXT_INDEX_CURSOR (idc)->priv;
1627
CamelTextIndexPrivate *tip = CAMEL_TEXT_INDEX (idc->index)->priv;
1630
c (printf ("Going to next cursor for word with data '%08x' next %08x\n", p->first, p->next));
1633
while (p->record_index >= p->record_count) {
1634
g_free (p->records);
1636
p->record_index = 0;
1637
p->record_count = 0;
1640
if (camel_key_file_read (tip->links, &p->next, &p->record_count, &p->records) == -1)
1644
g_free (p->current);
1645
camel_key_table_lookup (
1646
tip->name_index, p->records[p->record_index],
1647
&p->current, &flags);
1649
g_free (p->current);
1653
} while (p->current == NULL);
1659
text_index_cursor_reset (CamelIndexCursor *idc)
1661
CamelTextIndexCursorPrivate *p = CAMEL_TEXT_INDEX_CURSOR (idc)->priv;
1663
g_free (p->records);
1665
g_free (p->current);
1667
p->record_count = 0;
1668
p->record_index = 0;
1673
camel_text_index_cursor_class_init (CamelTextIndexCursorClass *class)
1675
GObjectClass *object_class;
1676
CamelIndexCursorClass *index_cursor_class;
1678
g_type_class_add_private (class, sizeof (CamelTextIndexCursorPrivate));
1680
object_class = G_OBJECT_CLASS (class);
1681
object_class->finalize = text_index_cursor_finalize;
1683
index_cursor_class = CAMEL_INDEX_CURSOR_CLASS (class);
1684
index_cursor_class->next = text_index_cursor_next;
1685
index_cursor_class->reset = text_index_cursor_reset;
1689
camel_text_index_cursor_init (CamelTextIndexCursor *text_index_cursor)
1691
text_index_cursor->priv = G_TYPE_INSTANCE_GET_PRIVATE (text_index_cursor, CAMEL_TYPE_TEXT_INDEX_CURSOR, CamelTextIndexCursorPrivate);
1694
CamelTextIndexCursor *
1695
camel_text_index_cursor_new (CamelTextIndex *idx,
1698
CamelTextIndexCursor *idc = g_object_new (CAMEL_TYPE_TEXT_INDEX_CURSOR, NULL);
1699
CamelIndexCursor *cic = &idc->parent;
1700
CamelTextIndexCursorPrivate *p = idc->priv;
1702
cic->index = g_object_ref (idx);
1705
p->record_count = 0;
1706
p->record_index = 0;
1711
/* ********************************************************************** */
1712
/* CamelTextIndexKeyCursor */
1713
/* ********************************************************************** */
1715
G_DEFINE_TYPE (CamelTextIndexKeyCursor, camel_text_index_key_cursor, CAMEL_TYPE_INDEX_CURSOR)
1718
text_index_key_cursor_dispose (GObject *object)
1720
CamelTextIndexKeyCursorPrivate *priv;
1722
priv = CAMEL_TEXT_INDEX_KEY_CURSOR (object)->priv;
1724
if (priv->table != NULL) {
1725
g_object_unref (priv->table);
1729
/* Chain up parent's dispose() method. */
1730
G_OBJECT_CLASS (camel_text_index_key_cursor_parent_class)->dispose (object);
1734
text_index_key_cursor_finalize (GObject *object)
1736
CamelTextIndexKeyCursorPrivate *priv;
1738
priv = CAMEL_TEXT_INDEX_KEY_CURSOR (object)->priv;
1740
g_free (priv->current);
1742
/* Chain up to parent's finalize() method. */
1743
G_OBJECT_CLASS (camel_text_index_key_cursor_parent_class)->finalize (object);
1746
static const gchar *
1747
text_index_key_cursor_next (CamelIndexCursor *idc)
1749
CamelTextIndexKeyCursorPrivate *p = CAMEL_TEXT_INDEX_KEY_CURSOR (idc)->priv;
1751
c (printf ("Going to next cursor for keyid %08x\n", p->keyid));
1753
g_free (p->current);
1756
while ((p->keyid = camel_key_table_next (p->table, p->keyid, &p->current, &p->flags, &p->data))) {
1757
if ((p->flags & 1) == 0) {
1760
g_free (p->current);
1769
text_index_key_cursor_reset (CamelIndexCursor *idc)
1771
CamelTextIndexKeyCursorPrivate *p = CAMEL_TEXT_INDEX_KEY_CURSOR (idc)->priv;
1776
g_free (p->current);
1781
camel_text_index_key_cursor_class_init (CamelTextIndexKeyCursorClass *class)
1783
GObjectClass *object_class;
1784
CamelIndexCursorClass *index_cursor_class;
1786
g_type_class_add_private (class, sizeof (CamelTextIndexKeyCursorPrivate));
1788
object_class = G_OBJECT_CLASS (class);
1789
object_class->dispose = text_index_key_cursor_dispose;
1790
object_class->finalize = text_index_key_cursor_finalize;
1792
index_cursor_class = CAMEL_INDEX_CURSOR_CLASS (class);
1793
index_cursor_class->next = text_index_key_cursor_next;
1794
index_cursor_class->reset = text_index_key_cursor_reset;
1798
camel_text_index_key_cursor_init (CamelTextIndexKeyCursor *text_index_key_cursor)
1800
text_index_key_cursor->priv = G_TYPE_INSTANCE_GET_PRIVATE (text_index_key_cursor, CAMEL_TYPE_TEXT_INDEX_KEY_CURSOR, CamelTextIndexKeyCursorPrivate);
1801
text_index_key_cursor->priv->keyid = 0;
1802
text_index_key_cursor->priv->flags = 0;
1803
text_index_key_cursor->priv->data = 0;
1804
text_index_key_cursor->priv->current = NULL;
1807
CamelTextIndexKeyCursor *
1808
camel_text_index_key_cursor_new (CamelTextIndex *idx,
1809
CamelKeyTable *table)
1811
CamelTextIndexKeyCursor *idc = g_object_new (CAMEL_TYPE_TEXT_INDEX_KEY_CURSOR, NULL);
1812
CamelIndexCursor *cic = &idc->parent;
1813
CamelTextIndexKeyCursorPrivate *p = idc->priv;
1815
cic->index = g_object_ref (idx);
1816
p->table = g_object_ref (table);
1821
/* ********************************************************************** */
1827
struct _CamelIndexRoot {
1828
struct _CamelBlockRoot root;
1830
camel_block_t word_root; /* a keyindex containing the keyid -> word mapping */
1831
camel_block_t word_hash_root; /* a partitionindex containing word -> keyid mapping */
1833
camel_block_t name_root; /* same, for names */
1834
camel_block_t name_hash_root;
1837
gchar wordbuffer[] = "This is a buffer of multiple words. Some of the words are duplicates"
1838
" while other words are the same, some are in difFerenT Different different case cAsE casE,"
1839
" with,with:with;with-with'with\"'\"various punctuation as well. So much for those Words. and 10"
1840
" numbers in a row too 1,2,3,4,5,6,7,8,9,10! Yay!.";
1849
CamelPartitionTable *cpi;
1850
CamelBlock *keyroot, *partroot;
1851
struct _CamelIndexRoot *root;
1853
gchar line[256], *key;
1855
gint index = 0, flags, data;
1858
CamelIndexName *idn;
1859
CamelIndexCursor *idc;
1863
printf ("Camel text index tester!\n");
1865
g_thread_init (NULL);
1866
camel_init (NULL, 0);
1868
idx = (CamelIndex *)camel_text_index_new ("textindex", O_CREAT|O_RDWR|O_TRUNC);
1871
camel_index_compress (idx);
1876
for (i = 0; i < 100; i++) {
1879
sprintf (name, "%d", i);
1880
printf ("Adding words to name '%s'\n", name);
1881
idn = camel_index_add_name (idx, name);
1882
camel_index_name_add_buffer (idn, wordbuffer, sizeof (wordbuffer) - 1);
1883
camel_index_write_name (idx, idn);
1884
g_object_unref (idn);
1887
printf ("Looking up which names contain word 'word'\n");
1888
idc = camel_index_find (idx, "words");
1889
while ((word = camel_index_cursor_next (idc)) != NULL) {
1890
printf (" name is '%s'\n", word);
1892
g_object_unref (idc);
1895
printf ("Looking up which names contain word 'truncate'\n");
1896
idc = camel_index_find (idx, "truncate");
1897
while ((word = camel_index_cursor_next (idc)) != NULL) {
1898
printf (" name is '%s'\n", word);
1900
g_object_unref (idc);
1903
camel_index_sync (idx);
1904
g_object_unref (idx);
1907
bs = camel_block_file_new ("blocks", "TESTINDX", CAMEL_BLOCK_SIZE);
1909
root = (struct _CamelIndexRoot *) bs->root;
1910
if (root->word_root == 0) {
1911
keyroot = camel_block_file_new_block (bs);
1912
root->word_root = keyroot->id;
1913
camel_block_file_touch_block (bs, bs->root_block);
1915
if (root->word_hash_root == 0) {
1916
partroot = camel_block_file_new_block (bs);
1917
root->word_hash_root = partroot->id;
1918
camel_block_file_touch_block (bs, bs->root_block);
1921
ki = camel_key_table_new (bs, root->word_root);
1922
cpi = camel_partition_table_new (bs, root->word_hash_root);
1924
fp = fopen ("/usr/dict/words", "r");
1930
while (fgets (line, sizeof (line), fp) != NULL) {
1931
line[strlen (line) - 1] = 0;
1933
/* see if its already there */
1934
keyid = camel_partition_table_lookup (cpi, line);
1936
m (printf ("Adding word '%s' %d\n", line, index));
1938
keyid = camel_key_table_add (ki, line, index, 0);
1939
m (printf (" key = %08x\n", keyid));
1941
camel_partition_table_add (cpi, line, keyid);
1943
m (printf ("Lookup word '%s'\n", line));
1944
keyid = camel_partition_table_lookup (cpi, line);
1945
m (printf (" key = %08x\n", keyid));
1948
m (printf ("Lookup key %08x\n", keyid));
1950
camel_key_table_set_flags (ki, keyid, index, 1);
1952
data = camel_key_table_lookup (ki, keyid, &key, &flags);
1953
m (printf (" word = '%s' %d %04x\n", key, data, flags));
1955
g_assert (data == index && strcmp (key, line) == 0);
1962
printf ("Scanning again\n");
1963
fseek (fp, SEEK_SET, 0);
1965
while (fgets (line, sizeof (line), fp) != NULL) {
1966
line[strlen (line) - 1] = 0;
1967
m (printf ("Lookup word '%s' %d\n", line, index));
1968
keyid = camel_partition_table_lookup (cpi, line);
1969
m (printf (" key = %08d\n", keyid));
1971
m (printf ("Lookup key %08x\n", keyid));
1972
data = camel_key_table_lookup (ki, keyid, &key, &flags);
1973
m (printf (" word = '%s' %d\n", key, data));
1975
g_assert (data == index && strcmp (key, line) == 0);
1983
printf ("Freeing partition index\n");
1984
camel_partition_table_free (cpi);
1986
printf ("Syncing block file\n");
1987
camel_block_file_sync (bs);