110
112
void est_doc_add_hidden_text(ESTDOC *doc, const char *text);
115
/* Attach keywords to a document object.
116
`doc' specifies a document object.
117
`kwords' specifies a map object of keywords. Keys of the map should be keywords of the
118
document and values should be their scores in decimal string. The map object is copied
120
void est_doc_set_keywords(ESTDOC *doc, CBMAP *kwords);
123
/* Set the substitute score of a document object.
124
`doc' specifies a document object.
125
`score' specifies the substitute score. It it is negative, the substitute score setting is
127
void est_doc_set_score(ESTDOC *doc, int score);
113
130
/* Get the ID number of a document object.
114
131
`doc' specifies a document object.
115
132
The return value is the ID number of the document object. If the object has not been
148
165
char *est_doc_cat_texts(ESTDOC *doc);
168
/* Get attached keywords of a document object.
169
`doc' specifies a document object.
170
The return value is a map object of keywords and their scores in decimal string. If no
171
keyword is attached, `NULL' is returned. The life duration of the returned object is
172
synchronous with the one of the document object. */
173
CBMAP *est_doc_keywords(ESTDOC *doc);
176
/* Get the substitute score of a document object.
177
`doc' specifies a document object.
178
The return value is the substitute score or -1 if it is not set. */
179
int est_doc_score(ESTDOC *doc);
151
182
/* Dump draft data of a document object.
152
183
`doc' specifies a document object.
153
184
The return value is draft data of the document object. Because the region of the return value
206
239
#define ESTOPNUMLE "NUMLE" /* number or date is less than or equal to */
207
240
#define ESTOPNUMBT "NUMBT" /* number or date is between two tokens of */
242
#define ESTORDIDA "[IDA]" /* ID numbers in ascending order */
243
#define ESTORDIDD "[IDD]" /* ID numbers in descending order */
244
#define ESTORDSCA "[SCA]" /* scores in ascending order */
245
#define ESTORDSCD "[SCD]" /* scores in descending order */
209
246
#define ESTORDSTRA "STRA" /* strings in ascending order */
210
247
#define ESTORDSTRD "STRD" /* strings in descending order */
211
248
#define ESTORDNUMA "NUMA" /* numbers in ascending order */
212
249
#define ESTORDNUMD "NUMD" /* numbers in descending order */
251
#define ESTECLSIMURL 10.0 /* eclipse considering similarity and URL */
252
#define ESTECLSERV 100.0 /* eclipse on server basis */
253
#define ESTECLDIR 101.0 /* eclipse on directory basis */
254
#define ESTECLFILE 102.0 /* eclipse on file basis */
214
256
typedef struct { /* type of structure for search conditions */
215
257
char *phrase; /* search phrase */
216
258
int gstep; /* step of N-gram */
217
259
int tfidf; /* whether with TF-IDF tuning */
218
int simple; /* whether with the simplified phrase */
260
int pmode; /* mode of phrase form */
261
void (*cbxpn)(const char *, CBLIST *); /* callback function for query expansion */
219
262
CBLIST *attrs; /* conditions with attributes */
220
263
char *order; /* sorting order */
221
264
int max; /* maximum number of retrieval */
265
int skip; /* number of documents to be skipped */
266
int auxmin; /* minimum hits to adopt the auxiliary index */
267
CBMAP *auxwords; /* words which the auxiliary index has been used */
222
268
int scfb; /* whether to feed back scores */
223
269
int *scores; /* array of scores */
224
270
int snum; /* number of elemnts of the score array */
225
271
int opts; /* options for preservation */
226
272
double ecllim; /* lower limit of similarity eclipse */
227
273
CBMAP *shadows; /* map of eclipsed documents */
274
char *distinct; /* distinct attribute */
275
int mask; /* mask for meta search */
230
278
enum { /* enumeration for options */
274
324
void est_cond_set_max(ESTCOND *cond, int max);
327
/* Set the number of skipped documents of a condition object.
328
`cond' specifies a condition object.
329
`skip' specifies the number of documents to be skipped in the search result. */
330
void est_cond_set_skip(ESTCOND *cond, int skip);
277
333
/* Set options of retrieval of a condition object.
278
334
`cond' specifies a condition object.
279
335
`options' specifies options: `ESTCONDSURE' specifies that it checks every N-gram key,
280
336
`ESTCONDUSUAL', which is the default, specifies that it checks N-gram keys with skipping one
281
337
key, `ESTCONDFAST' skips two keys, `ESTCONDAGITO' skips three keys, `ESTCONDNOIDF' specifies
282
not to perform TF-IDF tuning, `ESTCONDSIMPLE' specifies to use simplified phrase, `ESTCONDETCH'
283
specifies to attach the keyword vector (only for the node API), `ESTCONDSCFB' specifies to
284
feed back scores (only for debugging). Each option can be specified at the same time by
285
bitwise or. If keys are skipped, though search speed is improved, the relevance ratio grows
338
not to perform TF-IDF tuning, `ESTCONDSIMPLE' specifies to use simplified phrase,
339
`ESTCONDROUGH' specifies to use rough phrase, `ESTCONDUNION' specifies to use union phrase,
340
`ESTCONDISECT' specifies to use intersection phrase, `ESTCONDSCFB' specifies to feed back
341
scores (only for debugging). Each option can be specified at the same time by bitwise or. If
342
keys are skipped, though search speed is improved, the relevance ratio grows less. */
287
343
void est_cond_set_options(ESTCOND *cond, int options);
346
/* Set permission to adopt result of the auxiliary index.
347
`cond' specifies a condition object.
348
`min' specifies the minimum hits to adopt result of the auxiliary index. If it is not more
349
than 0, the auxiliary index is not used. By default, it is 32. */
350
void est_cond_set_auxiliary(ESTCOND *cond, int min);
290
353
/* Set the lower limit of similarity eclipse.
291
354
`cond' specifies a condition object.
292
355
`limit' specifies the lower limit of similarity for documents to be eclipsed. Similarity is
293
between 0.0 and 1.0. */
356
between 0.0 and 1.0. If the limit is added by `ESTECLSIMURL', similarity is weighted by URL.
357
If the limit is `ESTECLSERV', similarity is ignored and documents in the same server are
358
eclipsed. If the limit is `ESTECLDIR', similarity is ignored and documents in the same
359
directory are eclipsed. If the limit is `ESTECLFILE', similarity is ignored and documents of
360
the same file are eclipsed. */
294
361
void est_cond_set_eclipse(ESTCOND *cond, double limit);
364
/* Set the attribute distinction filter.
365
`cond' specifies a condition object.
366
`name' specifies the name of an attribute to be distinct.
367
If this filter is set, candidates which have same value of the attribute is omitted. */
368
void est_cond_set_distinct(ESTCOND *cond, const char *name);
371
/* Set the mask of targets of meta search.
372
`cond' specifies a condition object.
373
`mask' specifies a masking number. 1 means the first target, 2 means the second target, 4
374
means the third target, and power values of 2 and their summation compose the mask. */
375
void est_cond_set_mask(ESTCOND *cond, int mask);
298
379
/*************************************************************************************************
299
380
* API for database
314
396
typedef struct { /* type of structure for a database object */
315
397
char *name; /* name of the database */
398
int inode; /* inode of the database */
316
399
DEPOT *metadb; /* handle of the meta database */
317
400
ESTIDX *idxdb; /* handles of the inverted indexs */
318
401
VILLA *fwmdb; /* handle of the database for forward matching */
402
VILLA *auxdb; /* handle of the auxiliary index */
403
VILLA *xfmdb; /* handle of the database for aux forward matching */
319
404
CURIA *attrdb; /* handle of the database for attrutes */
320
405
CURIA *textdb; /* handle of the database for texts */
321
406
CURIA *kwddb; /* handle of the database for keywords */
322
407
VILLA *listdb; /* handle of the database for document list */
408
CBMAP *aidxs; /* map of attribute indexes */
409
CBLIST *pdocs; /* list of pseudo documents */
410
CBMAP *puris; /* map of URIs of pseudo documents */
323
411
int ecode; /* last happened error code */
324
412
int fatal; /* whether to have a fatal error */
325
413
int dseq; /* sequence for document IDs */
326
414
int dnum; /* number of the documents */
327
415
int amode; /* mode of text analyzer */
416
int zmode; /* mode of data compression */
417
int smode; /* mode of score type */
328
418
CBMAP *idxcc; /* cache for the inverted index */
419
CBMAP *auxcc; /* cache for the auxiliary index */
329
420
size_t icsiz; /* power of the cache */
330
421
size_t icmax; /* max size of the cache */
331
422
CBMAP *outcc; /* cache for deleted documents */
366
460
ESTDBTRUNC = 1 << 3, /* a writer truncating */
367
461
ESTDBNOLCK = 1 << 4, /* open without locking */
368
462
ESTDBLCKNB = 1 << 5, /* lock without blocking */
369
ESTDBPERFNG = 1 << 6 /* use perfect N-gram analyzer */
463
ESTDBPERFNG = 1 << 10, /* use perfect N-gram analyzer */
464
ESTDBCHRCAT = 1 << 11, /* use character category analyzer */
465
ESTDBSMALL = 1 << 20, /* small tuning */
466
ESTDBLARGE = 1 << 21, /* large tuning */
467
ESTDBHUGE = 1 << 22, /* huge tuning */
468
ESTDBHUGE2 = 1 << 23, /* huge tuning second */
469
ESTDBHUGE3 = 1 << 24, /* huge tuning third */
470
ESTDBSCVOID = 1 << 25, /* store scores as void */
471
ESTDBSCINT = 1 << 26, /* store scores as integer */
472
ESTDBSCASIS = 1 << 27 /* refrain from adjustment of scores */
475
enum { /* enumeration for data types of attribute index */
476
ESTIDXATTRSEQ, /* for multipurpose sequencial access method */
477
ESTIDXATTRSTR, /* for narrowing with attributes as strings */
478
ESTIDXATTRNUM /* for narrowing with attributes as numbers */
372
481
enum { /* enumeration for options of optimization */
403
518
bitwise or: `ESTDBNOLCK', which means it opens a database file without file locking, or
404
519
`ESTDBLCKNB', which means locking is performed without blocking. If `ESTDBNOLCK' is used,
405
520
the application is responsible for exclusion control. `ESTDBCREAT' can be added to by bitwise
406
or: `ESTDBPERFNG', which means N-gram analysis is performed against European text also.
521
or: `ESTDBPERFNG', which means N-gram analysis is performed against European text also,
522
`ESTDBCHACAT', which means character category analysis is performed instead of N-gram analysis,
523
`ESTDBSMALL', which means the index is tuned to register less than 50000 documents,
524
`ESTDBLARGE', which means the index is tuned to register more than 300000 documents,
525
`ESTDBHUGE', which means the index is tuned to register more than 1000000 documents,
526
`ESTDBHUGE2', which means the index is tuned to register more than 5000000 documents,
527
`ESTDBHUGE3', which means the index is tuned to register more than 10000000 documents,
528
`ESTDBSCVOID', which means scores are stored as void, `ESTDBSCINT', which means scores are
529
stored as 32-bit integer, `ESTDBSCASIS', which means scores are stored as-is and marked not
530
to be tuned when search.
407
531
`ecp' specifies the pointer to a variable to which the error code is assigned.
408
532
The return value is a database object of the database or `NULL' if failure. */
409
533
ESTDB *est_db_open(const char *name, int omode, int *ecp);
428
552
int est_db_fatal(ESTDB *db);
555
/* Add an index for narrowing or sorting with document attributes.
556
`db' specifies a database object connected as a writer.
557
`name' specifies the name of an attribute.
558
`type' specifies the data type of attribute index; `ESTIDXATTRSEQ' for multipurpose sequencial
559
access method, `ESTIDXATTRSTR' for narrowing with attributes as strings, `ESTIDXATTRNUM' for
560
narrowing with attributes as numbers.
561
The return value is true if success, else it is false.
562
Note that this function should be called before the first document is registered. */
563
int est_db_add_attr_index(ESTDB *db, const char *name, int type);
431
566
/* Flush index words in the cache of a database.
432
567
`db' specifies a database object connected as a writer.
433
568
`max' specifies the maximum number of words to be flushed. If it not more than zero, all
451
586
int est_db_optimize(ESTDB *db, int options);
589
/* Merge another database.
590
`db' specifies a database object connected as a writer.
591
`name' specifies the name of another database directory.
592
`options' specifies options: `ESTMGCLEAN' to clean up dispensable regions of the deleted
594
The return value is true if success, else it is false.
595
Creation options of the two databases should be same entirely. ID numbers of imported
596
documents are changed within the sequence of the desitination database. If URIs of imported
597
documents conflict ones of exsisting documents, existing documents are removed. */
598
int est_db_merge(ESTDB *db, const char *name, int options);
454
601
/* Add a document to a database.
455
602
`db' specifies a database object connected as a writer.
456
603
`doc' specifies a document object. The document object should have the URI attribute.
457
604
`options' specifies options: `ESTPDCLEAN' to clean up dispensable regions of the overwritten
605
document, `ESTPDWEIGHT' to weight scores statically with score weighting attribute.
459
606
The return value is true if success, else it is false.
460
607
If the URI attribute is same with an existing document in the database, the existing one is
484
631
`db' specifies a database object.
485
632
`id' specifies the ID number of a registered document.
486
633
`options' specifies options: `ESTGDNOATTR' to ignore attributes, `ESTGDNOTEXT' to ignore
487
the body text. The two can be specified at the same time by bitwise or.
488
The return value is a document object. On error, `NULL' is returned. */
634
the body text, `ESTGDNOKWD' to ignore keywords. The three can be specified at the same time
636
The return value is a document object. It should be deleted with `est_doc_delete' if it is
637
no longer in use. On error, `NULL' is returned. */
489
638
ESTDOC *est_db_get_doc(ESTDB *db, int id, int options);
546
695
int *est_db_search(ESTDB *db, ESTCOND *cond, int *nump, CBMAP *hints);
698
/* Search plural databases for documents corresponding a condition.
699
`dbs' specifies an array whose elements are database objects.
700
`dbnum' specifies the number of elements of the array.
701
`cond' specifies a condition object.
702
`nump' specifies the pointer to a variable to which the number of elements in the result is
704
`hints' specifies a map object into which the number of documents corresponding to each word
705
is stored. If a word is in a negative condition, the number is negative. The element whose
706
key is an empty string specifies the number of whole result. If it is `NULL', it is not used.
707
The return value is an array whose elements are indexes of container databases and ID numbers
708
of in each database alternately.
709
This function does never fail. Even if no document corresponds or an error occurs, an empty
710
array is returned. Because the region of the return value is allocated with the `malloc'
711
call, it should be released with the `free' call if it is no longer in use. */
712
int *est_db_search_meta(ESTDB **dbs, int dbnum, ESTCOND *cond, int *nump, CBMAP *hints);
549
715
/* Check whether a document object matches the phrase of a search condition object definitely.
550
716
`db' specifies a database object.
551
717
`doc' specifies a document object.
558
724
/* Set the maximum size of the cache memory of a database.
559
725
`db' specifies a database object.
560
`size' specifies the maximum size of the index cache. By default, it is 64MB. If it is not
561
more than 0, the current size is not changed.
726
`size' specifies the maximum size of the index cache. By default, it is 64MB. If it is
727
negative, the current size is not changed.
562
728
`anum' specifies the maximum number of cached records for document attributes. By default, it
563
is 8192. If it is not more than 0, the current size is not changed.
729
is 8192. If it is negative, the current size is not changed.
564
730
`tnum' specifies the maximum number of cached records for document texts. By default, it is
565
1024. If it is not more than 0, the current size is not changed.
731
1024. If it is negative, the current size is not changed.
566
732
`rnum' specifies the maximum number of cached records for occurrence results. By default, it
567
is 256. If it is not more than 0, the current size is not changed. */
733
is 256. If it is negative, the current size is not changed. */
568
734
void est_db_set_cache_size(ESTDB *db, size_t size, int anum, int tnum, int rnum);
737
/* Add a pseudo index directory to a database.
738
`db' specifies a database object.
739
`path' specifies the path of a pseudo index directory.
740
The return value is true if success, else it is false. */
741
int est_db_add_pseudo_index(ESTDB *db, const char *path);
572
745
/*************************************************************************************************
573
746
* features for experts
574
747
*************************************************************************************************/
577
#define _EST_VERSION "1.0.6"
578
#define _EST_LIBVER 700
750
#define _EST_VERSION "1.4.9"
751
#define _EST_LIBVER 834
579
752
#define _EST_PROTVER "1.0"
581
754
#define _EST_PROJURL "http://hyperestraier.sourceforge.net/"
613
791
void est_break_text_perfng(const char *text, CBLIST *list, int norm, int tail);
794
/* Break a sentence of text and extract words, using character category analyzer.
795
`text' specifies a sentence of text.
796
`list' specifies a list object to which extract words are added.
797
`norm' specifies whether to normalize the text. */
798
void est_break_text_chrcat(const char *text, CBLIST *list, int norm);
801
/* Make a snippet of an arbitrary string.
802
`word' specifies a list object of words to be highlight.
803
`wwidth' specifies whole width of the result.
804
`hwidth' specifies width of strings picked up from the beginning of the text.
805
`awidth' specifies width of strings picked up around each highlighted word.
806
The return value is a snippet string of the string. Because the region of the return value is
807
allocated with the `malloc' call, it should be released with the `free' call if it is no
809
char *est_str_make_snippet(const char *str, const CBLIST *words,
810
int wwidth, int hwidth, int awidth);
616
813
/* Convert the character encoding of a string.
617
814
`ptr' specifies the pointer to a region.
618
815
`size' specifies the size of the region. If it is negative, the size is assigned with
673
870
`sp' specifies the pointer to a variable to which the size of the region of the return
674
871
value is assigned.
872
`mode' specifies detail behavior. 0 specifies using the standard deflate encoding, -1
873
specifies the raw deflate encoding, and 1 specifies the GZIP encoding.
675
874
If successful, the return value is the pointer to the result object, else, it is `NULL'.
676
875
Because the region of the return value is allocated with the `malloc' call, it should be
677
876
released with the `free' call if it is no longer in use. */
678
char *est_deflate(const char *ptr, int size, int *sp);
877
char *est_deflate(const char *ptr, int size, int *sp, int mode);
681
880
/* Decompress a serial object compressed with ZLIB.
683
882
`size' specifies the size of the region.
684
883
`sp' specifies the pointer to a variable to which the size of the region of the return
685
884
value is assigned. If it is `NULL', it is not used.
686
If successful, the return value is the pointer to the result object, else, it is `NULL'.
687
Because an additional zero code is appended at the end of the region of the return value,
688
the return value can be treated as a character string. Because the region of the return
689
value is allocated with the `malloc' call, it should be released with the `free' call if it
690
is no longer in use. */
691
char *est_inflate(const char *ptr, int size, int *sp);
885
`mode' specifies detail behavior. 0 specifies using the standard deflate encoding, -1
886
specifies the raw deflate encoding, and 1 specifies the GZIP encoding.
887
If successful, the return value is the pointer to the result object, else, it is `NULL'.
888
Because an additional zero code is appended at the end of the region of the return value,
889
the return value can be treated as a character string. Because the region of the return
890
value is allocated with the `malloc' call, it should be released with the `free' call if it
891
is no longer in use. */
892
char *est_inflate(const char *ptr, int size, int *sp, int mode);
895
/* Compress a serial object with LZO.
896
`ptr' specifies the pointer to a region.
897
`size' specifies the size of the region. If it is negative, the size is assigned with
899
`sp' specifies the pointer to a variable to which the size of the region of the return
901
If successful, the return value is the pointer to the result object, else, it is `NULL'.
902
Because the region of the return value is allocated with the `malloc' call, it should be
903
released with the `free' call if it is no longer in use. */
904
char *est_lzoencode(const char *ptr, int size, int *sp);
907
/* Decompress a serial object compressed with LZO.
908
`ptr' specifies the pointer to a region.
909
`size' specifies the size of the region.
910
`sp' specifies the pointer to a variable to which the size of the region of the return
911
value is assigned. If it is `NULL', it is not used.
912
If successful, the return value is the pointer to the result object, else, it is `NULL'.
913
Because an additional zero code is appended at the end of the region of the return value,
914
the return value can be treated as a character string. Because the region of the return
915
value is allocated with the `malloc' call, it should be released with the `free' call if it
916
is no longer in use. */
917
char *est_lzodecode(const char *ptr, int size, int *sp);
920
/* Compress a serial object with BZIP2.
921
`ptr' specifies the pointer to a region.
922
`size' specifies the size of the region. If it is negative, the size is assigned with
924
`sp' specifies the pointer to a variable to which the size of the region of the return
926
If successful, the return value is the pointer to the result object, else, it is `NULL'.
927
Because the region of the return value is allocated with the `malloc' call, it should be
928
released with the `free' call if it is no longer in use. */
929
char *est_bzencode(const char *ptr, int size, int *sp);
932
/* Decompress a serial object compressed with BZIP2.
933
`ptr' specifies the pointer to a region.
934
`size' specifies the size of the region.
935
`sp' specifies the pointer to a variable to which the size of the region of the return
936
value is assigned. If it is `NULL', it is not used.
937
If successful, the return value is the pointer to the result object, else, it is `NULL'.
938
Because an additional zero code is appended at the end of the region of the return value,
939
the return value can be treated as a character string. Because the region of the return
940
value is allocated with the `malloc' call, it should be released with the `free' call if it
941
is no longer in use. */
942
char *est_bzdecode(const char *ptr, int size, int *sp);
694
945
/* Get the border string for draft data of documents.
739
991
int est_regex_match(const void *regex, const char *str);
994
/* Check whether a regular expression matches a string.
995
`rstr' specifies a regular expression string.
996
`tstr' specifies a target string.
997
The return value is true if the regular expression string matchs the target string. */
998
int est_regex_match_str(const char *rstr, const char *tstr);
742
1001
/* Replace each substring matching a regular expression string.
743
1002
`str' specifies a target string.
744
1003
`bef' specifies a string of regular expressions for substrings.
745
`aft' specifies a string with which each substrings are replaced.
1004
`aft' specifies a string with which each substrings are replaced. Each "&" in the string is
1005
replaced with the matched substring. Each "\" in the string escapes the following character.
1006
Special escapes "\1" through "\9" referring to the corresponding matching sub-expressions in
1007
the regular expression string are supported.
746
1008
The return value is a new converted string. Even if the regular expression is invalid, a copy
747
1009
of the original string is returned. Because the region of the return value is allocated with
748
1010
the `malloc' call, it should be released with the `free' call if it is no longer in use. */
749
1011
char *est_regex_replace(const char *str, const char *bef, const char *aft);
1014
/* Duplicate a document object.
1015
`doc' specifies a document object.
1016
The return value is a duplicated document object. */
1017
ESTDOC *est_doc_dup(ESTDOC *doc);
752
1020
/* Set the ID number of a document object.
753
1021
`doc' specifies a document object.
754
1022
`id' specifies the ID number to set. */
762
1030
const char *est_doc_hidden_texts(ESTDOC *doc);
1033
/* Reduce the texts to fit to the specified size.
1034
`doc' specifies a document object.
1035
`len' specifies the total size of the texts. */
1036
void est_doc_slim(ESTDOC *doc, int size);
765
1039
/* Check whether a docuemnt object is empty.
766
1040
`doc' specifies a document object.
767
1041
The return value is true the document is empty, else it is false. */
768
1042
int est_doc_is_empty(ESTDOC *doc);
1045
/* Duplicate a condition object.
1046
`cond' specifies a condition object.
1047
The return value is a duplicated condition object. */
1048
ESTCOND *est_cond_dup(ESTCOND *cond);
771
1051
/* Get the phrase of a condition object.
772
1052
`cond' specifies a condition object.
773
1053
The return value is the phrase of the condition object or `NULL' if it is not specified. The
798
1078
int est_cond_max(ESTCOND *cond);
1081
/* Get the number of skipped documents of a condition object.
1082
`cond' specifies a condition object.
1083
The return value is the number of documents to be skipped in the search result. */
1084
int est_cond_skip(ESTCOND *cond);
801
1087
/* Get the options of a condition object.
802
1088
`cond' specifies a condition object.
803
1089
The return value is the options of the condition object. */
804
1090
int est_cond_options(ESTCOND *cond);
1093
/* Get permission to adopt result of the auxiliary index.
1094
`cond' specifies a condition object.
1095
The return value is permission to adopt result of the auxiliary index. */
1096
int est_cond_auxiliary(ESTCOND *cond);
1099
/* Get the attribute distinction filter.
1100
`cond' specifies a condition object.
1101
The return value is the name of the distinct attribute or `NULL' if it is not specified. The
1102
life duration of the returned string is synchronous with the one of the condition object. */
1103
const char *est_cond_distinct(ESTCOND *cond);
1106
/* Get the mask of targets of meta search.
1107
`cond' specifies a condition object.
1108
The return value is the mask of targets of meta search. */
1109
int est_cond_mask(ESTCOND *cond);
807
1112
/* Get the score of a document corresponding to a condition object.
808
1113
`cond' specifies a condition object.
809
1114
`index' specifies the index of an element of the result array of `est_db_search'.
811
1116
int est_cond_score(ESTCOND *cond, int index);
1119
/* Check whether a condition object has used the auxiliary index.
1120
`cond' specifies a condition object.
1121
`word' specifies a keyword to be checked. If it is an empty string, whether at least one
1122
keyword is used is checked.
1123
The return value is true if the condition object has used the auxiliary index, else it is
1125
int est_cond_auxiliary_word(ESTCOND *cond, const char *word);
814
1128
/* Get an array of ID numbers of eclipsed docuemnts of a document in a condition object.
815
1129
`cond' specifies a condition object.
816
`id' specifies the ID number of the parent document.
1130
`id' specifies the ID number of a parent document.
817
1131
`np' specifies the pointer to a variable to which the number of elements of the return value
819
1133
The return value is an array whose elements expresse the ID numbers and their scores
821
1135
const int *est_cond_shadows(ESTCOND *cond, int id, int *np);
1138
/* Set the callback function for query expansion.
1139
`cond' specifies a condition object.
1140
`func' specifies the pointer to a function. The first argument of the callback specifies a
1141
word to be expand. The second argument speciifes a list object into which renewed words to
1143
void est_cond_set_expander(ESTCOND *cond, void (*func)(const char *, CBLIST *));
824
1146
/* Set the error code of a database.
825
1147
`db' specifies a database object.
826
1148
`ecode' specifies a error code to set. */
827
1149
void est_db_set_ecode(ESTDB *db, int ecode);
1152
/* Check whether an option is set.
1153
`db' specifies a database object.
1154
`option' specifies an option used when opening the database.
1155
The return value is 1 if the option is set, 0 if the option is not set, or -1 if it is
1157
int est_db_check_option(ESTDB *db, int option);
1160
/* Get the inode number of a database.
1161
`db' specifies a database object.
1162
The return value is the inode number of the database. */
1163
int est_db_inode(ESTDB *db);
830
1166
/* Set the entity data of a document in a database.
831
1167
`db' specifies a database object connected as a writer.
832
1168
`id' specifies the ID number of a registered document.
886
1228
`db' specifies a database object connected as a writer.
887
1229
`id' specifies the ID number of a document.
888
1230
`kwords' specifies a map object of keywords of the document.
889
The return value is true if success, else it is false. */
890
int est_db_put_keywords(ESTDB *db, int id, CBMAP *kwords);
1231
`weight' specifies weighting bias of scores.
1232
The return value is true if success, else it is false. */
1233
int est_db_put_keywords(ESTDB *db, int id, CBMAP *kwords, double weight);
1236
/* Remove keywords of a document.
1237
`db' specifies a database object connected as a writer.
1238
`id' specifies the ID number of a document.
1239
The return value is true if success, else it is false. */
1240
int est_db_out_keywords(ESTDB *db, int id);
893
1243
/* Retrieve a map object of keywords.
945
1295
int est_db_word_rec_size(ESTDB *db, const char *word);
1298
/* Get the number of unique keywords in a database.
1299
`db' specifies a database object.
1300
The return value is the number of unique keywords in the database. */
1301
int est_db_keyword_num(ESTDB *db);
1304
/* Initialize the keyword iterator of a database.
1305
`db' specifies a database object.
1306
The return value is true if success, else it is false. */
1307
int est_db_keyword_iter_init(ESTDB *db);
1310
/* Get the next keyword of the word iterator of a database.
1311
`db' specifies a database object.
1312
The return value is the next word. If there is no more keyword, `NULL' is returned. Because
1313
the region of the return value is allocated with the `malloc' call, it should be released
1314
with the `free' call if it is no longer in use. */
1315
char *est_db_keyword_iter_next(ESTDB *db);
1318
/* Get the size of the record of a keyword.
1319
`db' specifies a database object.
1320
`word' specifies a keyword.
1321
The return value is the size of the record of the keyword. If there is no corresponding
1322
record, 0 is returned. */
1323
int est_db_keyword_rec_size(ESTDB *db, const char *word);
1326
/* Search documents corresponding a keyword for a database.
1327
`db' specifies a database object.
1328
`word' specifies a keyword.
1329
`nump' specifies the pointer to a variable to which the number of elements in the result is
1331
The return value is an array whose elements are ID numbers of corresponding documents.
1332
This function does never fail. Even if no document corresponds or an error occurs, an empty
1333
array is returned. Because the region of the return value is allocated with the `malloc'
1334
call, it should be released with the `free' call if it is no longer in use. */
1335
int *est_db_keyword_search(ESTDB *db, const char *word, int *nump);
948
1338
/* Get the number of records in the cache memory of a database.
949
1339
`db' specifies a database object.
950
1340
The return value is the cache memory of a database. */
1002
1393
CBLIST *est_db_list_rescc(ESTDB *db);
1396
/* Get the number of pseudo documents in a database.
1397
`db' specifies a database object.
1398
The return value is the number of pseudo documents in the database. */
1399
int est_db_pseudo_doc_num(ESTDB *db);
1402
/* Get a list of expressions of attribute indexes of a database.
1403
`db' specifies a database object.
1404
The return value is a new list object of expressions of attribute indexes. Because the object
1405
of the return value is opened with the function `cblistopen', it should be closed with the
1406
function `cblistclose' if it is no longer in use. */
1407
CBLIST *est_db_attr_index_exprs(ESTDB *db);
1005
1410
/* Interrupt long time processing.
1006
1411
`db' specifies a database object. */
1007
1412
void est_db_interrupt(ESTDB *db);
1415
/* Repair a broken database directory.
1416
`name' specifies the name of a database directory.
1417
`options' specifies options: `ESTRPSTRICT' to perform strict consistency check, `ESTRPSHODDY'
1418
to omit consistency check.
1419
`ecp' specifies the pointer to a variable to which the error code is assigned.
1420
The return value is true if success, else it is false. */
1421
int est_db_repair(const char *name, int options, int *ecp);
1010
1424
/* Extract words for snippet from hints of search.
1011
1425
`hints' specifies a map object whose records were set by `est_db_search'.
1012
1426
The return value is a new list object of words to be highlighted. Because the object of the
1030
1450
/* Get the canonicalized absolute pathname of a file.
1031
`path' specifies the path of a new directory.
1451
`path' specifies the path of a file.
1032
1452
The return value is the canonicalized absolute pathname of a file. Because the region of the
1033
1453
return value is allocated with the `malloc' call, it should be released with the `free' call
1034
1454
if it is no longer in use. */
1035
1455
char *est_realpath(const char *path);
1458
/* Get the inode number of a file.
1459
`path' specifies the path of a file.
1460
The return value is the inode number of a file or -1 on error. */
1461
int est_inode(const char *path);
1038
1464
/* Get the time of day in milliseconds.
1039
1465
The return value is the time of day in milliseconds. */
1040
1466
double est_gettimeofday(void);