1
/*------------------------------------------------------------------------------
2
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
4
* Distributable under the terms of either the Apache License (Version 2.0) or
5
* the GNU Lesser General Public License, as specified in the COPYING file.
6
------------------------------------------------------------------------------*/
9
static void print_tHits( CuTest *tc, Hits* hits ) {
10
CuMessageA(tc,"%d total results\n\n", hits->length());
11
for (size_t i = 0 ; i < hits->length(); i++) {
12
if ( i < 10 || (i > 94 && i < 105) ) {
13
const Document& d = hits->doc(i);
14
CuMessage(tc, _T("%d %s\n"), i, d.get(_T("id")) );
18
void testSearchTestForDuplicatesRaw(CuTest *tc){
19
const int MAX_DOCS=1500;
20
const char *strBody[10] = {"test", "value", "why not", "computer", "clucene",
21
"sun", "program", "main", "database", "code"};
25
WhitespaceAnalyzer an;
26
IndexWriter* writer = _CLNEW IndexWriter(&ram, &an, true);
31
//printf("Indexing, please wait...\n");
32
for (int32_t i = 0; i < MAX_DOCS; i++) {
34
//printf("%d/%d=%s\n", i, MAX_DOCS,strBody[i%10]);
35
doc = _CLNEW Document();
38
_sntprintf(strDb, 1024, _T("%d"), i);
39
doc->add( *_CLNEW Field(_T("id"), strDb,Field::STORE_YES | Field::INDEX_UNTOKENIZED) );
41
STRCPY_AtoT(strDb, strBody[i%10], 1022);
43
doc->add(*_CLNEW Field(_T("body"), strDb,Field::STORE_NO | Field::INDEX_TOKENIZED) );
45
writer->addDocument(doc);
49
//printf("\nDone.\n");
57
IndexSearcher searcher(&ram);
60
Query* query = QueryParser::parse(_T("test"), _T("body"), &an);
61
Hits* result = searcher.search(query);
63
CLUCENE_ASSERT(result->length()==((int)MAX_DOCS/10));
65
//printf("Building result map...\n");
66
std::map<int32_t, int32_t> resMap;
68
for (size_t j = 0; j < result->length(); j++) {
69
doc = &result->doc(j);
71
id = _ttoi(doc->get(_T("id")));
72
if ( resMap.find(id) ==resMap.end() ) {
73
resMap.insert( std::pair<int32_t,int32_t>(id, 1));
74
//printf("Inserted $d\n",id);
77
_sntprintf(tmp,2048,_T("Duplicated result found - Id: %d\n"), id);
78
CuAssert(tc,tmp,false);
82
//printf("Total duplicated found: %d\n", dupl);
91
void testSearchTestForDuplicates(CuTest *tc) {
92
RAMDirectory directory;
93
SimpleAnalyzer analyzer;
94
IndexWriter* writer = _CLNEW IndexWriter(&directory, &analyzer, true);
95
const int32_t MAX_DOCS = 255;
97
for (int32_t j = 0; j < MAX_DOCS; j++) {
98
Document* d = _CLNEW Document();
99
d->add(*_CLNEW Field(_T("priority"), _T("high"),Field::STORE_YES | Field::INDEX_TOKENIZED));
103
d->add(*_CLNEW Field(_T("id"), buf,Field::STORE_YES | Field::INDEX_TOKENIZED));
104
writer->addDocument(d);
111
// try a search without OR
112
Searcher* searcher = _CLNEW IndexSearcher( &directory );
113
QueryParser* parser = _CLNEW QueryParser(_T("priority"), &analyzer);
116
Query* query = parser->parse(_T("high"));
117
TCHAR* tmp = query->toString(_T("priority"));
118
CuMessage(tc, _T("Query: %s\n"), tmp );
119
_CLDELETE_CARRAY(tmp);
121
hits = searcher->search(query);
122
print_tHits(tc, hits);
132
// try a new search with OR
133
searcher = _CLNEW IndexSearcher( &directory );
134
parser = _CLNEW QueryParser(_T("priority"), &analyzer);
137
query = parser->parse(_T("high OR medium"));
138
tmp = query->toString(_T("priority"));
139
CuMessage(tc, _T("Query: %s\n"), tmp );
140
_CLDELETE_CARRAY(tmp);
142
hits = searcher->search(query);
143
print_tHits(tc, hits);
155
CuSuite *testduplicates(void)
157
CuSuite *suite = CuSuiteNew(_T("CLucene Duplicates Test"));
159
SUITE_ADD_TEST(suite, testSearchTestForDuplicates);
160
SUITE_ADD_TEST(suite, testSearchTestForDuplicatesRaw);