1
/*------------------------------------------------------------------------------
2
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
4
* Distributable under the terms of either the Apache License (Version 2.0) or
5
* the GNU Lesser General Public License, as specified in the COPYING file.
6
------------------------------------------------------------------------------*/
7
#include "CLucene/_ApiHeader.h"
8
#include "IndexSearcher.h"
10
#include "SearchHeader.h"
12
#include "_HitQueue.h"
15
#include "_FieldDocSortedHitQueue.h"
16
#include "CLucene/store/Directory.h"
17
#include "CLucene/document/Document.h"
18
#include "CLucene/index/IndexReader.h"
19
#include "CLucene/index/Term.h"
20
#include "CLucene/util/BitSet.h"
21
#include "FieldSortedHitQueue.h"
22
#include "Explanation.h"
30
class SimpleTopDocsCollector:public HitCollector{
33
const CL_NS(util)::BitSet* bits;
38
SimpleTopDocsCollector(const CL_NS(util)::BitSet* bs, HitQueue* hitQueue, int32_t* totalhits, size_t ndocs, const float_t ms=-1.0f):
46
~SimpleTopDocsCollector(){}
47
void collect(const int32_t doc, const float_t score){
48
if (score > 0.0f && // ignore zeroed buckets
49
(bits==NULL || bits->get(doc))) { // skip docs not in bits
51
if (hq->size() < nDocs || (minScore==-1.0f || score >= minScore)) {
52
ScoreDoc sd = {doc, score};
53
hq->insert(sd); // update hit queue
54
if ( minScore != -1.0f )
55
minScore = hq->top().score; // maintain minScore
61
class SortedTopDocsCollector:public HitCollector{
63
const CL_NS(util)::BitSet* bits;
64
FieldSortedHitQueue* hq;
68
SortedTopDocsCollector(const CL_NS(util)::BitSet* bs, FieldSortedHitQueue* hitQueue, int32_t* totalhits, size_t _nDocs):
75
~SortedTopDocsCollector(){
77
void collect(const int32_t doc, const float_t score){
78
if (score > 0.0f && // ignore zeroed buckets
79
(bits==NULL || bits->get(doc))) { // skip docs not in bits
81
FieldDoc* fd = _CLNEW FieldDoc(doc, score); //todo: see jlucene way... with fields def???
82
if ( !hq->insert(fd) ) // update hit queue
88
class SimpleFilteredCollector: public HitCollector{
90
CL_NS(util)::BitSet* bits;
91
HitCollector* results;
93
SimpleFilteredCollector(CL_NS(util)::BitSet* bs, HitCollector* collector):
98
~SimpleFilteredCollector(){
101
void collect(const int32_t doc, const float_t score){
102
if (bits->get(doc)) { // skip docs not in bits
103
results->collect(doc, score);
109
IndexSearcher::IndexSearcher(const char* path){
111
// Creates a searcher searching the index in the named directory. */
113
//Post - The instance has been created
115
CND_PRECONDITION(path != NULL, "path is NULL");
117
reader = IndexReader::open(path);
121
IndexSearcher::IndexSearcher(CL_NS(store)::Directory* directory){
123
// Creates a searcher searching the index in the specified directory. */
125
//Post - The instance has been created
127
CND_PRECONDITION(directory != NULL, "directory is NULL");
129
reader = IndexReader::open(directory);
133
IndexSearcher::IndexSearcher(IndexReader* r){
135
// Creates a searcher searching the index with the provide IndexReader
137
//Post - The instance has been created
143
IndexSearcher::~IndexSearcher(){
146
//Post - The instance has been destroyed
151
void IndexSearcher::close(){
152
//Func - Frees resources associated with this Searcher.
154
//Post - The resources associated have been freed
155
if (readerOwner && reader){
162
int32_t IndexSearcher::docFreq(const Term* term) const{
164
//Pre - reader != NULL
167
CND_PRECONDITION(reader != NULL, "reader is NULL");
169
return reader->docFreq(term);
172
_CL_DEPRECATED( doc(i, document) ) CL_NS(document)::Document* IndexSearcher::doc(int32_t i){
173
CL_NS(document)::Document* ret = _CLNEW CL_NS(document)::Document;
180
bool IndexSearcher::doc(int32_t i, CL_NS(document)::Document& d) {
181
//Func - Retrieves i-th document found
182
// For use by HitCollector implementations.
183
//Pre - reader != NULL
184
//Post - The i-th document has been returned
186
CND_PRECONDITION(reader != NULL, "reader is NULL");
188
return reader->document(i,d);
190
bool IndexSearcher::doc(int32_t i, CL_NS(document)::Document* d) {
191
//Func - Retrieves i-th document found
192
// For use by HitCollector implementations.
193
//Pre - reader != NULL
194
//Post - The i-th document has been returned
196
CND_PRECONDITION(reader != NULL, "reader is NULL");
198
return reader->document(i,*d);
202
int32_t IndexSearcher::maxDoc() const {
203
//Func - Return total number of documents including the ones marked deleted
204
//Pre - reader != NULL
205
//Post - The total number of documents including the ones marked deleted
208
CND_PRECONDITION(reader != NULL, "reader is NULL");
210
return reader->maxDoc();
213
//todo: find out why we are passing Query* and not Weight*, as Weight is being extracted anyway from Query*
214
TopDocs* IndexSearcher::_search(Query* query, Filter* filter, const int32_t nDocs){
216
//Pre - reader != NULL
219
CND_PRECONDITION(reader != NULL, "reader is NULL");
220
CND_PRECONDITION(query != NULL, "query is NULL");
222
Weight* weight = query->weight(this);
223
Scorer* scorer = weight->scorer(reader);
224
if (scorer == NULL) {
225
Query* wq = weight->getQuery();
229
return _CLNEW TopDocs(0, NULL, 0);
232
BitSet* bits = filter != NULL ? filter->bits(reader) : NULL;
233
HitQueue* hq = _CLNEW HitQueue(nDocs);
235
//Check hq has been allocated properly
236
CND_CONDITION(hq != NULL, "Could not allocate memory for HitQueue hq");
238
int32_t* totalHits = _CL_NEWARRAY(int32_t,1);
241
SimpleTopDocsCollector hitCol(bits,hq,totalHits,nDocs,0.0f);
242
scorer->score( &hitCol );
245
int32_t scoreDocsLength = hq->size();
247
ScoreDoc* scoreDocs = new ScoreDoc[scoreDocsLength];
249
for (int32_t i = scoreDocsLength-1; i >= 0; --i) // put docs in array
250
scoreDocs[i] = hq->pop();
252
int32_t totalHitsInt = totalHits[0];
255
if ( bits != NULL && filter->shouldDeleteBitSet(bits) )
257
_CLDELETE_ARRAY(totalHits);
258
Query* wq = weight->getQuery();
259
if ( query != wq ) //query was re-written
263
return _CLNEW TopDocs(totalHitsInt, scoreDocs, scoreDocsLength);
267
TopFieldDocs* IndexSearcher::_search(Query* query, Filter* filter, const int32_t nDocs,
270
CND_PRECONDITION(reader != NULL, "reader is NULL");
271
CND_PRECONDITION(query != NULL, "query is NULL");
273
Weight* weight = query->weight(this);
274
Scorer* scorer = weight->scorer(reader);
276
return _CLNEW TopFieldDocs(0, NULL, 0, NULL );
279
BitSet* bits = filter != NULL ? filter->bits(reader) : NULL;
280
FieldSortedHitQueue hq(reader, sort->getSort(), nDocs);
281
int32_t* totalHits = _CL_NEWARRAY(int32_t,1);
284
SortedTopDocsCollector hitCol(bits,&hq,totalHits,nDocs);
285
scorer->score(&hitCol);
288
int32_t hqLen = hq.size();
289
FieldDoc** fieldDocs = _CL_NEWARRAY(FieldDoc*,hqLen);
290
for (int32_t i = hqLen-1; i >= 0; --i){ // put docs in array
291
fieldDocs[i] = hq.fillFields (hq.pop());
294
Query* wq = weight->getQuery();
295
if ( query != wq ) //query was re-written
299
SortField** hqFields = hq.getFields();
300
hq.setFields(NULL); //move ownership of memory over to TopFieldDocs
301
int32_t totalHits0 = totalHits[0];
302
if ( bits != NULL && filter->shouldDeleteBitSet(bits) )
304
_CLDELETE_LARRAY(totalHits);
305
return _CLNEW TopFieldDocs(totalHits0, fieldDocs, hqLen, hqFields );
308
void IndexSearcher::_search(Query* query, Filter* filter, HitCollector* results){
309
//Func - _search an index and fetch the results
310
// Applications should only use this if they need all of the
311
// matching documents. The high-level search API (search(Query)) is usually more efficient,
312
// as it skips non-high-scoring hits.
313
//Pre - query is a valid reference to a query
314
// filter may or may not be NULL
315
// results is a valid reference to a HitCollector and used to store the results
316
//Post - filter if non-NULL, a bitset used to eliminate some documents
318
CND_PRECONDITION(reader != NULL, "reader is NULL");
319
CND_PRECONDITION(query != NULL, "query is NULL");
322
SimpleFilteredCollector* fc = NULL;
325
bits = filter->bits(reader);
326
fc = _CLNEW SimpleFilteredCollector(bits, results);
329
Weight* weight = query->weight(this);
330
Scorer* scorer = weight->scorer(reader);
331
if (scorer != NULL) {
333
scorer->score(results);
335
scorer->score((HitCollector*)fc);
341
Query* wq = weight->getQuery();
342
if (wq != query) // query was rewritten
345
if ( bits != NULL && filter->shouldDeleteBitSet(bits) )
349
Query* IndexSearcher::rewrite(Query* original) {
350
Query* query = original;
351
Query* last = original;
352
for (Query* rewrittenQuery = query->rewrite(reader);
353
rewrittenQuery != query;
354
rewrittenQuery = query->rewrite(reader)) {
355
query = rewrittenQuery;
356
if ( query != last && last != original ){
364
void IndexSearcher::explain(Query* query, int32_t doc, Explanation* ret){
365
Weight* weight = query->weight(this);
366
ret->addDetail(weight->explain(reader, doc)); // TODO: A hack until this function will return Explanation* as well
368
Query* wq = weight->getQuery();
369
if ( query != wq ) //query was re-written
374
CL_NS(index)::IndexReader* IndexSearcher::getReader(){
378
const char* IndexSearcher::getClassName(){
379
return "IndexSearcher";
381
const char* IndexSearcher::getObjectName() const{
382
return IndexSearcher::getClassName();