1
/*------------------------------------------------------------------------------
2
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
4
* Distributable under the terms of either the Apache License (Version 2.0) or
5
* the GNU Lesser General Public License, as specified in the COPYING file.
6
------------------------------------------------------------------------------*/
11
#include "CLucene/search/MultiPhraseQuery.h"
14
StandardAnalyzer aStd;
15
WhitespaceAnalyzer aWS;
16
IndexSearcher* s=NULL;
18
void _TestSearchesRun(CuTest *tc, Analyzer* analyzer, Searcher* search, const TCHAR* qry){
22
q = QueryParser::parse(qry , _T("contents"), analyzer);
24
h = search->search( q );
26
if ( h->length() > 0 ){
27
//check for explanation memory leaks...
28
CL_NS(search)::Explanation expl1;
29
search->explain(q, h->id(0), &expl1);
30
TCHAR* tmp = expl1.toString();
31
_CLDELETE_CARRAY(tmp);
32
if ( h->length() > 1 ){ //do a second one just in case
33
CL_NS(search)::Explanation expl2;
34
search->explain(q, h->id(1), &expl2);
35
tmp = expl2.toString();
36
_CLDELETE_CARRAY(tmp);
40
}catch(CLuceneError& err){
41
CuFail(tc,_T("Error: %s\n"), err.twhat());
43
CuFail(tc,_T("Error: unknown\n"));
49
void testSrchOpenIndex(CuTest *tc ){
51
strcpy(loc, clucene_data_location);
52
strcat(loc, "/reuters-21578-index");
54
CuAssert(tc,_T("Index does not exist"), Misc::dir_Exists(loc));
55
s=_CLNEW IndexSearcher(loc);
57
void testSrchCloseIndex(CuTest* /*tc*/ ){
64
void testSrchPunctuation(CuTest *tc ){
65
CuAssert(tc,_T("Searcher was not open"),s!=NULL);
68
_TestSearchesRun(tc, &a,s, _T("a&b") );
69
_TestSearchesRun(tc, &a,s, _T("a&&b") );
70
_TestSearchesRun(tc, &a,s, _T(".NET") );
73
void testSrchSlop(CuTest *tc ){
75
CuNotImpl(tc,_T("Fuzzy"));
77
CuAssert(tc,_T("Searcher was not open"),s!=NULL);
79
_TestSearchesRun(tc, &a,s, _T("\"term germ\"~2") );
80
_TestSearchesRun(tc, &a,s, _T("\"term germ\"~2 flork") );
81
_TestSearchesRun(tc, &a,s, _T("\"term\"~2") );
82
_TestSearchesRun(tc, &a,s, _T("\" \"~2 germ") );
83
_TestSearchesRun(tc, &a,s, _T("\"term germ\"~2^2") );
87
void testSrchNumbers(CuTest *tc ){
88
CuAssert(tc,_T("Searcher was not open"),s!=NULL);
90
// The numbers go away because SimpleAnalzyer ignores them
91
_TestSearchesRun(tc, &a,s, _T("3") );
92
_TestSearchesRun(tc, &a,s, _T("term 1.0 1 2") );
93
_TestSearchesRun(tc, &a,s, _T("term term1 term2") );
95
_TestSearchesRun(tc, &aStd,s, _T("3") );
96
_TestSearchesRun(tc, &aStd,s, _T("term 1.0 1 2") );
97
_TestSearchesRun(tc, &aStd,s, _T("term term1 term2") );
100
void testSrchWildcard(CuTest *tc ){
101
#ifdef NO_WILDCARD_QUERY
102
CuNotImpl(tc,_T("Wildcard"));
104
CuAssert(tc,_T("Searcher was not open"),s!=NULL);
106
_TestSearchesRun(tc, &a,s, _T("term*") );
107
_TestSearchesRun(tc, &a,s, _T("term*^2") );
108
_TestSearchesRun(tc, &a,s, _T("term~") );
109
_TestSearchesRun(tc, &a,s, _T("term^2~") );
110
_TestSearchesRun(tc, &a,s, _T("term~^2") );
111
_TestSearchesRun(tc, &a,s, _T("term*germ") );
112
_TestSearchesRun(tc, &a,s, _T("term*germ^3") );
114
//test problem reported by Gary Mangum
115
BooleanQuery* bq = _CLNEW BooleanQuery();
116
Term* upper = _CLNEW Term(_T("contents"),_T("0105"));
117
Term* lower = _CLNEW Term(_T("contents"),_T("0105"));
118
RangeQuery* rq=_CLNEW RangeQuery(lower,upper,true);
119
bq->add(rq,true,true,false);
123
Term* prefix = _CLNEW Term(_T("contents"),_T("reuters21578"));
124
PrefixQuery* pq = _CLNEW PrefixQuery(prefix);
125
_CLDECDELETE(prefix);
126
bq->add(pq,true,true,false);
138
void testSrchEscapes(CuTest *tc ){
139
CuAssert(tc,_T("Searcher was not open"),s!=NULL);
141
_TestSearchesRun(tc, &aWS,s, _T("\\[brackets") );
142
_TestSearchesRun(tc, &a,s, _T("\\[brackets") );
143
_TestSearchesRun(tc, &aWS,s, _T("\\\\") );
144
_TestSearchesRun(tc, &aWS,s, _T("\\+blah") );
145
_TestSearchesRun(tc, &aWS,s, _T("\\(blah") );
148
void testSrchRange(CuTest *tc ){
149
#ifdef NO_RANGE_QUERY
150
CuNotImpl(tc,_T("Range"));
152
CuAssert(tc,_T("Searcher was not open"),s!=NULL);
154
_TestSearchesRun(tc, &a,s, _T("[ j m]") );
155
_TestSearchesRun(tc, &a,s, _T("[ j m ]") );
156
_TestSearchesRun(tc, &a,s, _T("{ j m}") );
157
_TestSearchesRun(tc, &a,s, _T("{ j m }") );
158
_TestSearchesRun(tc, &a,s, _T("{a TO b}") );
159
_TestSearchesRun(tc, &a,s, _T("{ j m }^2.0") );
160
_TestSearchesRun(tc, &a,s, _T("[ j m] OR bar") );
161
_TestSearchesRun(tc, &a,s, _T("[ j m] AND bar") );
162
_TestSearchesRun(tc, &a,s, _T("( bar blar { j m}) ") );
163
_TestSearchesRun(tc, &a,s, _T("gack ( bar blar { j m}) ") );
167
void testSrchSimple(CuTest *tc ){
168
CuAssert(tc,_T("Searcher was not open"),s!=NULL);
170
_TestSearchesRun(tc, &a,s, _T("a AND b") );
172
_TestSearchesRun(tc, &a,s, _T("term term term") );
176
lucene_utf8towcs(tmp1,"t\xc3\xbcrm term term",100);
177
_TestSearchesRun(tc, &a,s, tmp1 );
179
lucene_utf8towcs(tmp1,"\xc3\xbcmlaut",100);
180
_TestSearchesRun(tc, &a,s, tmp1 );
183
_TestSearchesRun(tc, &a,s, _T("(a AND b)") );
184
_TestSearchesRun(tc, &a,s, _T("c OR (a AND b)") );
185
_TestSearchesRun(tc, &a,s, _T("a AND NOT b") );
186
_TestSearchesRun(tc, &a,s, _T("a AND -b") );
187
_TestSearchesRun(tc, &a,s, _T("a AND !b") );
188
_TestSearchesRun(tc, &a,s, _T("a && b") );
189
_TestSearchesRun(tc, &a,s, _T("a && ! b") );
191
_TestSearchesRun(tc, &a,s, _T("a OR b") );
192
_TestSearchesRun(tc, &a,s, _T("a || b") );
193
_TestSearchesRun(tc, &a,s, _T("a OR !b") );
194
_TestSearchesRun(tc, &a,s, _T("a OR ! b") );
195
_TestSearchesRun(tc, &a,s, _T("a OR -b") );
197
_TestSearchesRun(tc, &a,s, _T("+term -term term") );
198
_TestSearchesRun(tc, &a,s, _T("foo:term AND field:anotherTerm") );
199
_TestSearchesRun(tc, &a,s, _T("term AND \"phrase phrase\"") );
200
_TestSearchesRun(tc, &a,s, _T("search AND \"meaningful direction\"") );
201
_TestSearchesRun(tc, &a,s, _T("\"hello there\"") );
203
_TestSearchesRun(tc, &a,s, _T("a AND b") );
204
_TestSearchesRun(tc, &a,s, _T("hello") );
205
_TestSearchesRun(tc, &a,s, _T("\"hello there\"") );
207
_TestSearchesRun(tc, &a,s, _T("germ term^2.0") );
208
_TestSearchesRun(tc, &a,s, _T("term^2.0") );
209
_TestSearchesRun(tc, &a,s, _T("term^2") );
210
_TestSearchesRun(tc, &a,s, _T("term^2.3") );
211
_TestSearchesRun(tc, &a,s, _T("\"germ term\"^2.0") );
212
_TestSearchesRun(tc, &a,s, _T("\"term germ\"^2") );
214
_TestSearchesRun(tc, &a,s, _T("(foo OR bar) AND (baz OR boo)") );
215
_TestSearchesRun(tc, &a,s, _T("((a OR b) AND NOT c) OR d") );
216
_TestSearchesRun(tc, &a,s, _T("+(apple \"steve jobs\") -(foo bar baz)") );
218
_TestSearchesRun(tc, &a,s, _T("+title:(dog OR cat) -author:\"bob dole\"") );
221
_TestSearchesRun(tc, &a,s, _T(".*") );
222
_TestSearchesRun(tc, &a,s, _T("<*") );
223
_TestSearchesRun(tc, &a,s, _T("/*") );
224
_TestSearchesRun(tc, &a,s, _T(";*") );
227
void SearchTest(CuTest *tc, bool bram) {
228
uint64_t start = Misc::currentTimeMillis();
230
SimpleAnalyzer analyzer;
232
char fsdir[CL_MAX_PATH];
233
_snprintf(fsdir,CL_MAX_PATH,"%s/%s",cl_tempDir, "test.search");
234
Directory* ram = (bram?(Directory*)_CLNEW RAMDirectory():(Directory*)FSDirectory::getDirectory(fsdir) );
236
IndexWriter writer( ram, &analyzer, true);
237
writer.setUseCompoundFile(false);
238
writer.setMaxBufferedDocs(3);
240
const TCHAR* docs[] = { _T("a b c d e asdf"),
241
_T("a b c d e a b c d e asdg"),
242
_T("a b c d e f g h i j"),
245
_T("a c e a c e asef"),
249
for (int j = 0; j < 7; j++) {
250
Document* d = _CLNEW Document();
251
//no need to delete fields... document takes ownership
252
d->add(*_CLNEW Field(_T("contents"),docs[j],Field::STORE_YES | Field::INDEX_TOKENIZED));
254
writer.addDocument(d);
262
ram = (Directory*)FSDirectory::getDirectory(fsdir);
265
IndexReader* reader = IndexReader::open(ram);
266
IndexSearcher searcher(reader);
268
const TCHAR* queries[] = {
278
int shouldbe[] = {3,3,4,4,4,7,3,3};
280
QueryParser parser(_T("contents"), &analyzer);
282
for (int k = 0; k < 8; k++) {
283
Query* query = parser.parse(queries[k]);
285
TCHAR* qryInfo = query->toString(_T("contents"));
286
hits = searcher.search(query);
287
CLUCENE_ASSERT( hits->length() == shouldbe[k] );
288
_CLDELETE_CARRAY(qryInfo);
293
//test MultiPositionQuery...
295
MultiPhraseQuery* query = _CLNEW MultiPhraseQuery();
296
RefCountArray<Term*> terms(3);
297
Term* termE = _CLNEW Term(_T("contents"), _T("e"));
298
terms[0] = _CLNEW Term(_T("contents"), _T("asdf"));
299
terms[1] = _CLNEW Term(_T("contents"), _T("asdg"));
300
terms[2] = _CLNEW Term(_T("contents"), _T("asef"));
306
terms.deleteValues();
308
TCHAR* qryInfo = query->toString(_T("contents"));
309
hits = searcher.search(query);
310
CLUCENE_ASSERT( hits->length() == 3 );
311
_CLDELETE_CARRAY(qryInfo);
323
CuMessageA (tc,"took %d milliseconds\n", (int32_t)(Misc::currentTimeMillis()-start));
326
void testNormEncoding(CuTest *tc) {
327
//just a quick test of the default similarity
328
CLUCENE_ASSERT(CL_NS(search)::Similarity::getDefault()->queryNorm(1)==1.0);
330
float_t f = CL_NS(search)::Similarity::getDefault()->queryNorm(9);
334
CLUCENE_ASSERT(f < 0.1);
336
//test that div by zero is handled
337
float_t tmp = CL_NS(search)::Similarity::getDefault()->lengthNorm(_T("test"),0);
338
tmp = CL_NS(search)::Similarity::getDefault()->queryNorm(0);
340
//test that norm encoding is working properly
341
CLUCENE_ASSERT( CL_NS(search)::Similarity::encodeNorm(-1)==0 );
342
CLUCENE_ASSERT( CL_NS(search)::Similarity::encodeNorm(0)==0 );
343
CLUCENE_ASSERT( CL_NS(search)::Similarity::encodeNorm(1)==124 );
344
CLUCENE_ASSERT( CL_NS(search)::Similarity::encodeNorm(1)==124 );
345
CLUCENE_ASSERT( CL_NS(search)::Similarity::encodeNorm(7516192768.0 )==255);
348
CLUCENE_ASSERT( CL_NS(search)::Similarity::decodeNorm(124)==1 );
349
CLUCENE_ASSERT( CL_NS(search)::Similarity::decodeNorm(255)==7516192768.0 );
352
CLUCENE_ASSERT( CL_NS(search)::Similarity::encodeNorm(0.5f) == 120 );
355
CLUCENE_ASSERT( CL_NS(search)::Similarity::encodeNorm(CL_NS(search)::Similarity::decodeNorm(57)) == 57 );
358
void testSrchManyHits(CuTest* /*tc*/) {
359
SimpleAnalyzer analyzer;
361
IndexWriter writer( &ram, &analyzer, true);
363
const TCHAR* docs[] = { _T("a b c d e"),
364
_T("a b c d e a b c d e"),
365
_T("a b c d e f g h i j"),
372
for (int j = 0; j < 140; j++) {
373
Document* d = _CLNEW Document();
374
//no need to delete fields... document takes ownership
376
d->add(*_CLNEW Field(_T("contents"),docs[x],Field::STORE_YES | Field::INDEX_TOKENIZED));
378
writer.addDocument(d);
383
IndexSearcher searcher(&ram);
386
Term* t = _CLNEW Term(_T("contents"), _T("a"));
387
query.add(_CLNEW TermQuery(t),true,false, false);
389
Hits* hits = searcher.search(&query);
390
for ( size_t x=0;x<hits->length();x++ ){
397
void testSrchMulti(CuTest *tc) {
398
SimpleAnalyzer analyzer;
400
IndexWriter writer0( &ram0, &analyzer, true);
402
const TCHAR* docs0[] = {
406
Document* d = _CLNEW Document();
407
//no need to delete fields... document takes ownership
408
d->add(*_CLNEW Field(_T("contents"),docs0[0],Field::STORE_YES | Field::INDEX_TOKENIZED));
410
writer0.addDocument(d);
415
IndexWriter writer1( &ram1, &analyzer, true);
417
const TCHAR* docs1[] = {
421
d = _CLNEW Document();
422
//no need to delete fields... document takes ownership
423
d->add(*_CLNEW Field(_T("contents"),docs1[0],Field::STORE_YES | Field::INDEX_TOKENIZED));
425
writer1.addDocument(d);
429
IndexSearcher searcher0(&ram0);
430
IndexSearcher searcher1(&ram1);
432
Searchable* searchers[3];
434
searchers[0] = &searcher0;
435
searchers[1] = &searcher1;
438
MultiSearcher searcher(searchers);
440
Term* termA = _CLNEW Term(_T("contents"), _T("a"));
441
Term* termC = _CLNEW Term(_T("contents"), _T("c"));
442
RangeQuery query(termA, termC, true);
446
Query* rewritten = searcher.rewrite(&query);
447
Hits* hits = searcher.search(rewritten);
448
for ( size_t x=0;x<hits->length();x++ ){
451
CLUCENE_ASSERT(hits->length() == 1);
452
if (&query != rewritten) {
453
_CLDELETE(rewritten);
459
void ramSearchTest(CuTest *tc) { SearchTest(tc, true); }
460
void fsSearchTest(CuTest *tc) { SearchTest(tc, false); }
462
CuSuite *testsearch(void)
464
CuSuite *suite = CuSuiteNew(_T("CLucene Search Test"));
465
SUITE_ADD_TEST(suite, ramSearchTest);
466
SUITE_ADD_TEST(suite, fsSearchTest);
468
SUITE_ADD_TEST(suite, testNormEncoding);
469
SUITE_ADD_TEST(suite, testSrchManyHits);
470
SUITE_ADD_TEST(suite, testSrchMulti);
471
SUITE_ADD_TEST(suite, testSrchOpenIndex);
472
SUITE_ADD_TEST(suite, testSrchPunctuation);
473
SUITE_ADD_TEST(suite, testSrchSlop);
474
SUITE_ADD_TEST(suite, testSrchNumbers);
475
SUITE_ADD_TEST(suite, testSrchWildcard);
476
SUITE_ADD_TEST(suite, testSrchEscapes);
477
SUITE_ADD_TEST(suite, testSrchRange);
478
SUITE_ADD_TEST(suite, testSrchSimple);
479
SUITE_ADD_TEST(suite, testSrchCloseIndex);