2
* Copyright (c) 1997, 98, 2000, 01
5
* This program is free software; you can redistribute it and/or modify
6
* it under the terms of the GNU General Public License as published by
7
* the Free Software Foundation; either version 2, or (at your option)
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU General Public License for more details.
2
* Copyright (c) 1997-2006 Motoyuki Kasahara
4
* Redistribution and use in source and binary forms, with or without
5
* modification, are permitted provided that the following conditions
7
* 1. Redistributions of source code must retain the above copyright
8
* notice, this list of conditions and the following disclaimer.
9
* 2. Redistributions in binary form must reproduce the above copyright
10
* notice, this list of conditions and the following disclaimer in the
11
* documentation and/or other materials provided with the distribution.
12
* 3. Neither the name of the project nor the names of its contributors
13
* may be used to endorse or promote products derived from this software
14
* without specific prior written permission.
16
* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19
* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
20
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
16
29
#include "build-pre.h"
59
72
* Unexported functions.
61
static EB_Error_Code eb_hit_list_word EB_P((EB_Book *, EB_Search_Context *,
62
int, EB_Hit *, int *));
63
static EB_Error_Code eb_hit_list_keyword EB_P((EB_Book *, EB_Search_Context *,
64
int, EB_Hit *, int *));
65
static EB_Error_Code eb_hit_list_multi EB_P((EB_Book *, EB_Search_Context *,
66
int, EB_Hit *, int *));
67
static void eb_and_hit_lists EB_P((EB_Hit [], int *, int, int,
68
EB_Hit [][EB_TMP_MAX_HITS], int []));
74
static EB_Error_Code eb_hit_list_word(EB_Book *book,
75
EB_Search_Context *context, int max_hit_count, EB_Hit *hit_list,
77
static EB_Error_Code eb_hit_list_keyword(EB_Book *book,
78
EB_Search_Context *context, int max_hit_count, EB_Hit *hit_list,
80
static EB_Error_Code eb_hit_list_multi(EB_Book *book,
81
EB_Search_Context *context, int max_hit_count, EB_Hit *hit_list,
83
static void eb_and_hit_lists(EB_Hit and_list[EB_TMP_MAX_HITS],
84
int *and_count, int max_and_count, int hit_list_count,
85
EB_Hit hit_lists[EB_NUMBER_OF_SEARCH_CONTEXTS][EB_TMP_MAX_HITS],
86
int hit_counts[EB_NUMBER_OF_SEARCH_CONTEXTS]);
71
90
* Intialize search contexts of `book'.
74
eb_initialize_search_contexts(book)
93
eb_initialize_search_contexts(EB_Book *book)
77
95
EB_Search_Context *context;
83
101
i < EB_NUMBER_OF_SEARCH_CONTEXTS; i++, context++) {
84
102
context->code = EB_SEARCH_NONE;
85
103
context->compare_pre = NULL;
86
context->compare_hit = NULL;
104
context->compare_single = NULL;
105
context->compare_group = NULL;
87
106
context->comparison_result = -1;
88
107
context->word[0] = '\0';
89
108
context->canonicalized_word[0] = '\0';
187
204
eb_initialize_search(&subbook->endword_asis);
188
205
eb_initialize_search(&subbook->endword_kana);
189
206
eb_initialize_search(&subbook->keyword);
207
eb_initialize_search(&subbook->cross);
190
208
eb_initialize_search(&subbook->menu);
191
209
eb_initialize_search(&subbook->copyright);
192
210
eb_initialize_search(&subbook->text);
193
211
eb_initialize_search(&subbook->sound);
213
for (i = 0, multi = subbook->multis; i < EB_MAX_MULTI_SEARCHES;
215
eb_initialize_search(&multi->search);
216
multi->title[0] = '\0';
217
multi->entry_count = 0;
218
for (j = 0, entry = multi->entries;
219
j < EB_MAX_MULTI_ENTRIES; j++, entry++) {
220
eb_initialize_search(entry);
195
224
LOG(("out: eb_initialize_searches(book=%d)", (int)book->code));
221
252
eb_finalize_search(&subbook->text);
222
253
eb_finalize_search(&subbook->sound);
255
for (i = 0, multi = subbook->multis; i < EB_MAX_KEYWORDS;
257
eb_finalize_search(&multi->search);
258
multi->entry_count = 0;
259
for (j = 0, entry = multi->entries;
260
j < multi->entry_count; j++, entry++) {
261
eb_finalize_search(entry);
224
265
LOG(("out: eb_finalize_searches()"));
232
273
* If succeeded, 0 is returned. Otherwise -1 is returned.
235
eb_presearch_word(book, context)
237
EB_Search_Context *context;
276
eb_presearch_word(EB_Book *book, EB_Search_Context *context)
239
278
EB_Error_Code error_code;
260
299
* Seek and read a page.
262
if (zio_lseek(&book->subbook_current->text_zio,
263
(off_t)(context->page - 1) * EB_SIZE_PAGE, SEEK_SET) < 0) {
301
if (zio_lseek(&book->subbook_current->text_zio,
302
(context->page - 1) * EB_SIZE_PAGE, SEEK_SET) < 0) {
264
303
cache_book_code = EB_BOOK_NONE;
265
304
error_code = EB_ERR_FAIL_SEEK_TEXT;
285
324
context->offset = 4;
286
325
cache_p = cache_buffer + 4;
288
LOG(("aux: eb_presearch_word(page_id=0x%02x, entry_length=%d, \
289
entry_arrangement=%d, entry_count=%d)",
290
context->page_id, context->entry_length,
327
LOG(("aux: eb_presearch_word(page=%d, page_id=0x%02x, \
328
entry_length=%d, entry_arrangement=%d, entry_count=%d)",
329
context->page, context->page_id, context->entry_length,
291
330
context->entry_arrangement, context->entry_count));
357
396
* Get hit entries of a submitted search request.
360
eb_hit_list(book, max_hit_count, hit_list, hit_count)
399
eb_hit_list(EB_Book *book, int max_hit_count, EB_Hit *hit_list, int *hit_count)
366
401
EB_Error_Code error_code;
367
402
EB_Search_Context temporary_context;
410
445
case EB_SEARCH_KEYWORD:
446
case EB_SEARCH_CROSS:
412
* In case of keyword search.
448
* In case of keyword or cross search.
415
451
int search_is_over = 0;
417
453
for (i = 0; i < EB_MAX_KEYWORDS; i++) {
418
if (book->search_contexts[i].code != EB_SEARCH_KEYWORD)
454
if (book->search_contexts[i].code != EB_SEARCH_KEYWORD
455
&& book->search_contexts[i].code != EB_SEARCH_CROSS)
420
457
memcpy(&temporary_context, book->search_contexts + i,
421
458
sizeof(EB_Search_Context));
437
474
temporary_hit_counts);
439
476
for (i = 0; i < EB_MAX_MULTI_ENTRIES; i++) {
440
if (book->search_contexts[i].code != EB_SEARCH_KEYWORD)
477
if (book->search_contexts[i].code != EB_SEARCH_KEYWORD
478
&& book->search_contexts[i].code != EB_SEARCH_CROSS)
442
480
error_code = eb_hit_list_keyword(book,
443
481
book->search_contexts + i, temporary_hit_counts[i],
507
545
* Unlock cache data and the book.
510
LOG(("out: eb_hit_list(hit_count=%d) = %s",
548
LOG(("out: eb_hit_list(hit_count=%d) = %s",
511
549
*hit_count, eb_error_string(EB_SUCCESS)));
512
550
eb_unlock(&book->lock);
513
551
pthread_mutex_unlock(&cache_mutex);
529
567
* Get hit entries of a submitted exactword/word/endword search request.
531
569
static EB_Error_Code
532
eb_hit_list_word(book, context, max_hit_count, hit_list, hit_count)
534
EB_Search_Context *context;
570
eb_hit_list_word(EB_Book *book, EB_Search_Context *context, int max_hit_count,
571
EB_Hit *hit_list, int *hit_count)
539
573
EB_Error_Code error_code;
544
LOG(("in: eb_hit_list_word(book=%d, max_hit_count=%d)", (int)book->code,
578
LOG(("in: eb_hit_list_word(book=%d, max_hit_count=%d)", (int)book->code,
562
596
* 1. the search process reaches to the end of an index page,
563
597
* and tries to read the next page.
564
598
* 2. Someone else used the cache buffer.
566
600
* At the case of 1, the search process reads the page and update
567
601
* the search context. At the case of 2. it reads the page but
568
602
* must not update the context!
570
604
if (cache_book_code != book->code || cache_page != context->page) {
571
605
if (zio_lseek(&book->subbook_current->text_zio,
572
(off_t)(context->page - 1) * EB_SIZE_PAGE, SEEK_SET) < 0) {
606
(context->page - 1) * EB_SIZE_PAGE, SEEK_SET) < 0) {
573
607
error_code = EB_ERR_FAIL_SEEK_TEXT;
601
635
cache_p = cache_buffer + context->offset;
603
LOG(("aux: eb_hit_list_word(page_id=0x%02x, entry_length=%d, \
604
entry_arrangement=%d, entry_count=%d)",
605
context->page_id, context->entry_length,
637
LOG(("aux: eb_hit_list_word(page=%d, page_id=0x%02x, \
638
entry_length=%d, entry_arrangement=%d, entry_count=%d)",
639
context->page, context->page_id, context->entry_length,
606
640
context->entry_arrangement, context->entry_count));
608
642
if (!PAGE_ID_IS_LEAF_LAYER(context->page_id)) {
631
665
* If matched, add it to a hit list.
633
667
context->comparison_result
634
= context->compare_hit(context->word, cache_p,
635
(size_t)context->entry_length);
668
= context->compare_single(context->word, cache_p,
669
context->entry_length);
636
670
if (context->comparison_result == 0) {
637
671
hit->heading.page
638
672
= eb_uint4(cache_p + context->entry_length + 6);
678
712
* If matched, add it to a hit list.
680
714
context->comparison_result
681
= context->compare_hit(context->word, cache_p + 1,
682
(size_t)context->entry_length);
715
= context->compare_single(context->word, cache_p + 1,
716
context->entry_length);
683
717
if (context->comparison_result == 0) {
684
718
hit->heading.page
685
719
= eb_uint4(cache_p + context->entry_length + 7);
729
763
* If matched, add it to a hit list.
731
765
context->comparison_result
732
= context->compare_hit(context->canonicalized_word,
733
cache_p + 2, (size_t)context->entry_length);
734
if (context->comparison_result == 0
735
&& context->compare_hit(context->word, cache_p + 2,
736
(size_t)context->entry_length) == 0) {
766
= context->compare_single(context->canonicalized_word,
767
cache_p + 2, context->entry_length);
768
if (context->comparison_result == 0) {
737
769
hit->heading.page
738
770
= eb_uint4(cache_p + context->entry_length + 8);
739
771
hit->heading.offset
762
794
context->comparison_result
763
= context->compare_hit(context->canonicalized_word,
764
cache_p + 4, (size_t)context->entry_length);
795
= context->compare_single(context->canonicalized_word,
796
cache_p + 4, context->entry_length);
765
797
context->in_group_entry = 1;
766
798
cache_p += context->entry_length + 4;
767
799
context->offset += context->entry_length + 4;
783
815
if (context->comparison_result == 0
784
816
&& context->in_group_entry
785
&& context->compare_hit(context->word, cache_p + 2,
786
(size_t)context->entry_length) == 0) {
817
&& context->compare_group(context->word, cache_p + 2,
818
context->entry_length) == 0) {
787
819
hit->heading.page
788
820
= eb_uint4(cache_p + context->entry_length + 8);
789
821
hit->heading.offset
846
878
* Get hit entries of a submitted keyword search request.
848
880
static EB_Error_Code
849
eb_hit_list_keyword(book, context, max_hit_count, hit_list, hit_count)
851
EB_Search_Context *context;
881
eb_hit_list_keyword(EB_Book *book, EB_Search_Context *context,
882
int max_hit_count, EB_Hit *hit_list, int *hit_count)
856
884
EB_Error_Code error_code;
857
885
EB_Text_Context text_context;
894
922
* 1. the search process reaches to the end of an index page,
895
923
* and tries to read the next page.
896
924
* 2. Someone else used the cache buffer.
898
926
* At the case of 1, the search process reads the page and update
899
927
* the search context. At the case of 2. it reads the page but
900
928
* must not update the context!
902
930
if (cache_book_code != book->code || cache_page != context->page) {
903
931
if (zio_lseek(&book->subbook_current->text_zio,
904
(off_t)(context->page - 1) * EB_SIZE_PAGE, SEEK_SET) < 0) {
932
(context->page - 1) * EB_SIZE_PAGE, SEEK_SET) < 0) {
905
933
error_code = EB_ERR_FAIL_SEEK_TEXT;
933
961
cache_p = cache_buffer + context->offset;
935
LOG(("aux: eb_hit_list_keyword(page_id=0x%02x, entry_length=%d, \
936
entry_arrangement=%d, entry_count=%d)",
937
context->page_id, context->entry_length,
963
LOG(("aux: eb_hit_list_keyword(page=%d, page_id=0x%02x, \
964
entry_length=%d, entry_arrangement=%d, entry_count=%d)",
965
context->page, context->page_id, context->entry_length,
938
966
context->entry_arrangement, context->entry_count));
940
968
if (!PAGE_ID_IS_LEAF_LAYER(context->page_id)) {
963
991
* If matched, add it to a hit list.
965
993
context->comparison_result
966
= context->compare_hit(context->word, cache_p,
967
(size_t)context->entry_length);
994
= context->compare_single(context->word, cache_p,
995
context->entry_length);
968
996
if (context->comparison_result == 0) {
969
997
hit->heading.page
970
998
= eb_uint4(cache_p + context->entry_length + 6);
1009
1037
* If matched, add it to a hit list.
1011
1039
context->comparison_result
1012
= context->compare_hit(context->word, cache_p + 1,
1013
(size_t)context->entry_length);
1040
= context->compare_single(context->word, cache_p + 1,
1041
context->entry_length);
1014
1042
if (context->comparison_result == 0) {
1015
1043
hit->heading.page
1016
1044
= eb_uint4(cache_p + context->entry_length + 7);
1060
1088
* If matched, add it to a hit list.
1062
1090
context->comparison_result
1063
= context->compare_hit(context->canonicalized_word,
1064
cache_p + 2, (size_t)context->entry_length);
1065
if (context->comparison_result == 0
1066
&& context->compare_hit(context->word, cache_p + 2,
1067
(size_t)context->entry_length) == 0) {
1091
= context->compare_single(context->canonicalized_word,
1092
cache_p + 2, context->entry_length);
1093
if (context->comparison_result == 0) {
1068
1094
hit->heading.page
1069
1095
= eb_uint4(cache_p + context->entry_length + 8);
1070
1096
hit->heading.offset
1093
1119
context->comparison_result
1094
= context->compare_hit(context->word, cache_p + 6,
1095
(size_t)context->entry_length);
1120
= context->compare_single(context->word, cache_p + 6,
1121
context->entry_length);
1096
1122
context->keyword_heading.page
1097
1123
= eb_uint4(cache_p + context->entry_length + 6);
1098
1124
context->keyword_heading.offset
1199
1225
* Get hit entries of a submitted multi search request.
1201
1227
static EB_Error_Code
1202
eb_hit_list_multi(book, context, max_hit_count, hit_list, hit_count)
1204
EB_Search_Context *context;
1228
eb_hit_list_multi(EB_Book *book, EB_Search_Context *context, int max_hit_count,
1229
EB_Hit *hit_list, int *hit_count)
1209
1231
EB_Error_Code error_code;
1214
LOG(("in: eb_hit_list_multi(book=%d, max_hit_count=%d)", (int)book->code,
1236
LOG(("in: eb_hit_list_multi(book=%d, max_hit_count=%d)", (int)book->code,
1215
1237
max_hit_count));
1217
1239
hit = hit_list;
1232
1254
* 1. the search process reaches to the end of an index page,
1233
1255
* and tries to read the next page.
1234
1256
* 2. Someone else used the cache buffer.
1236
1258
* At the case of 1, the search process reads the page and update
1237
1259
* the search context. At the case of 2. it reads the page but
1238
1260
* must not update the context!
1240
1262
if (cache_book_code != book->code || cache_page != context->page) {
1241
1263
if (zio_lseek(&book->subbook_current->text_zio,
1242
(off_t)(context->page - 1) * EB_SIZE_PAGE, SEEK_SET) < 0) {
1264
(context->page - 1) * EB_SIZE_PAGE, SEEK_SET) < 0) {
1243
1265
error_code = EB_ERR_FAIL_SEEK_TEXT;
1271
1293
cache_p = cache_buffer + context->offset;
1273
LOG(("aux: eb_hit_list_multi(page_id=0x%02x, entry_length=%d, \
1274
entry_arrangement=%d, entry_count=%d)",
1275
context->page_id, context->entry_length,
1295
LOG(("aux: eb_hit_list_multi(page=%d, page_id=0x%02x, \
1296
entry_length=%d, entry_arrangement=%d, entry_count=%d)",
1297
context->page, context->page_id, context->entry_length,
1276
1298
context->entry_arrangement, context->entry_count));
1278
1300
if (!PAGE_ID_IS_LEAF_LAYER(context->page_id)) {
1301
1323
* If matched, add it to a hit list.
1303
1325
context->comparison_result
1304
= context->compare_hit(context->word, cache_p,
1305
(size_t)context->entry_length);
1326
= context->compare_single(context->word, cache_p,
1327
context->entry_length);
1306
1328
if (context->comparison_result == 0) {
1307
1329
hit->heading.page
1308
1330
= eb_uint4(cache_p + context->entry_length + 6);
1347
1369
* If matched, add it to a hit list.
1349
1371
context->comparison_result
1350
= context->compare_hit(context->word, cache_p + 1,
1351
(size_t)context->entry_length);
1372
= context->compare_single(context->word, cache_p + 1,
1373
context->entry_length);
1352
1374
if (context->comparison_result == 0) {
1353
1375
hit->heading.page
1354
1376
= eb_uint4(cache_p + context->entry_length + 7);
1398
1420
* If matched, add it to a hit list.
1400
1422
context->comparison_result
1401
= context->compare_hit(context->canonicalized_word,
1402
cache_p + 2, (size_t)context->entry_length);
1403
if (context->comparison_result == 0
1404
&& context->compare_hit(context->word, cache_p + 2,
1405
(size_t)context->entry_length) == 0) {
1423
= context->compare_single(context->canonicalized_word,
1424
cache_p + 2, context->entry_length);
1425
if (context->comparison_result == 0) {
1406
1426
hit->heading.page
1407
1427
= eb_uint4(cache_p + context->entry_length + 8);
1408
1428
hit->heading.offset
1431
1451
context->comparison_result
1432
= context->compare_hit(context->word,
1433
cache_p + 6, (size_t)context->entry_length);
1452
= context->compare_single(context->word, cache_p + 6,
1453
context->entry_length);
1434
1454
context->in_group_entry = 1;
1435
1455
cache_p += context->entry_length + 6;
1436
1456
context->offset += context->entry_length + 6;
1509
1529
* and_list = hit_lists[0] AND hit_lists[1] AND ...
1512
eb_and_hit_lists(and_list, and_count, max_and_count, hit_list_count,
1513
hit_lists, hit_counts)
1514
EB_Hit and_list[EB_TMP_MAX_HITS];
1518
EB_Hit hit_lists[EB_NUMBER_OF_SEARCH_CONTEXTS][EB_TMP_MAX_HITS];
1519
int hit_counts[EB_NUMBER_OF_SEARCH_CONTEXTS];
1532
eb_and_hit_lists(EB_Hit and_list[EB_TMP_MAX_HITS], int *and_count,
1533
int max_and_count, int hit_list_count,
1534
EB_Hit hit_lists[EB_NUMBER_OF_SEARCH_CONTEXTS][EB_TMP_MAX_HITS],
1535
int hit_counts[EB_NUMBER_OF_SEARCH_CONTEXTS])
1521
1537
int hit_indexes[EB_NUMBER_OF_SEARCH_CONTEXTS];
1522
1538
int greatest_list;
1528
1544
int increment_count;
1531
LOG(("in: eb_and_hit_lists(max_and_count=%d, hit_list_count=%d)",
1547
LOG(("in: eb_and_hit_lists(max_and_count=%d, hit_list_count=%d)",
1532
1548
max_and_count, hit_list_count));