1
/*------------------------------------------------------------------------------
2
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
4
* Distributable under the terms of either the Apache License (Version 2.0) or
5
* the GNU Lesser General Public License, as specified in the COPYING file.
6
------------------------------------------------------------------------------*/
7
#include "CLucene/_ApiHeader.h"
8
#include "CLucene/index/Term.h"
9
#include "CLucene/index/Terms.h"
10
#include "CLucene/index/IndexReader.h"
11
#include "Similarity.h"
12
#include "PrefixQuery.h"
13
#include "BooleanClause.h"
14
#include "BooleanQuery.h"
15
#include "TermQuery.h"
16
#include "CLucene/util/BitSet.h"
17
#include "CLucene/util/StringBuffer.h"
23
PrefixQuery::PrefixQuery(Term* Prefix){
25
// Constructs a query for terms starting with prefix
26
//Pre - Prefix != NULL
27
//Post - The instance has been created
29
//Get a pointer to Prefix
30
prefix = _CL_POINTER(Prefix);
33
PrefixQuery::PrefixQuery(const PrefixQuery& clone):Query(clone){
34
prefix = _CL_POINTER(clone.prefix);
36
Query* PrefixQuery::clone() const{
37
return _CLNEW PrefixQuery(*this);
40
Term* PrefixQuery::getPrefix(bool pointer){
42
return _CL_POINTER(prefix);
47
PrefixQuery::~PrefixQuery(){
50
//Post - The instance has been destroyed.
52
//Delete prefix by finalizing it
57
/** Returns a hash code value for this object.*/
58
size_t PrefixQuery::hashCode() const {
59
return Similarity::floatToByte(getBoost()) ^ prefix->hashCode();
62
const char* PrefixQuery::getObjectName()const{
63
//Func - Returns the name "PrefixQuery"
65
//Post - The string "PrefixQuery" has been returned
67
return getClassName();
69
const char* PrefixQuery::getClassName(){
70
//Func - Returns the name "PrefixQuery"
72
//Post - The string "PrefixQuery" has been returned
77
bool PrefixQuery::equals(Query * other) const{
78
if (!(other->instanceOf(PrefixQuery::getClassName())))
81
PrefixQuery* rq = (PrefixQuery*)other;
82
bool ret = (this->getBoost() == rq->getBoost())
83
&& (this->prefix->equals(rq->prefix));
88
Query* PrefixQuery::rewrite(IndexReader* reader){
89
BooleanQuery* query = _CLNEW BooleanQuery( true );
90
TermEnum* enumerator = reader->terms(prefix);
91
Term* lastTerm = NULL;
93
const TCHAR* prefixText = prefix->text();
94
const TCHAR* prefixField = prefix->field();
97
size_t prefixLen = prefix->textLength();
99
lastTerm = enumerator->term();
100
if (lastTerm != NULL &&
101
lastTerm->field() == prefixField ) // interned comparison
104
//now see if term->text() starts with prefixText
105
size_t termLen = lastTerm->textLength();
106
if ( prefixLen>termLen )
107
break; //the prefix is longer than the term, can't be matched
109
tmp = lastTerm->text();
111
//check for prefix match in reverse, since most change will be at the end
112
for ( i=prefixLen-1;i!=-1;--i ){
113
if ( tmp[i] != prefixText[i] ){
114
tmp=NULL;//signals inequality
121
TermQuery* tq = _CLNEW TermQuery(lastTerm); // found a match
122
tq->setBoost(getBoost()); // set the boost
123
query->add(tq,true,false, false); // add to query
126
_CLDECDELETE(lastTerm);
127
} while (enumerator->next());
130
_CLDELETE(enumerator);
131
_CLDECDELETE(lastTerm);
133
_CLDECDELETE(lastTerm);
136
//if we only added one clause and the clause is not prohibited then
137
//we can just return the query
138
if (query->getClauseCount() == 1) { // optimize 1-clause queries
140
query->getClauses(&c);
142
if (!c->prohibited) { // just return clause
143
c->deleteQuery=false;
144
Query* ret = c->getQuery();
154
Query* PrefixQuery::combine(CL_NS(util)::ArrayBase<Query*>* queries) {
155
return Query::mergeBooleanQueries(queries);
158
TCHAR* PrefixQuery::toString(const TCHAR* field) const{
159
//Func - Creates a user-readable version of this query and returns it as as string
160
//Pre - field != NULL
161
//Post - a user-readable version of this query has been returned as as string
163
//Instantiate a stringbuffer buffer to store the readable version temporarily
164
CL_NS(util)::StringBuffer buffer;
165
//check if field equal to the field of prefix
167
_tcscmp(prefix->field(),field) != 0 ) {
168
//Append the field of prefix to the buffer
169
buffer.append(prefix->field());
171
buffer.append(_T(":") );
173
//Append the text of the prefix
174
buffer.append(prefix->text());
175
//Append a wildchar character
176
buffer.append(_T("*"));
177
//if the boost factor is not eaqual to 1
178
if (getBoost() != 1.0f) {
180
buffer.append(_T("^"));
181
//Append the boost factor
182
buffer.appendFloat( getBoost(),1);
184
//Convert StringBuffer buffer to TCHAR block and return it
185
return buffer.toString();
192
//todo: this needs to be exposed, but java is still a bit confused about how...
193
class PrefixFilter::PrefixGenerator{
196
PrefixGenerator(const Term* prefix){
197
this->prefix = prefix;
199
virtual ~PrefixGenerator(){
202
virtual void handleDoc(int doc) = 0;
204
void generate(IndexReader* reader) {
205
TermEnum* enumerator = reader->terms(prefix);
206
TermDocs* termDocs = reader->termDocs();
207
const TCHAR* prefixText = prefix->text();
208
const TCHAR* prefixField = prefix->field();
211
size_t prefixLen = prefix->textLength();
216
term = enumerator->term(false);
218
term->field() == prefixField // interned comparison
220
//now see if term->text() starts with prefixText
221
size_t termLen = term->textLength();
222
if ( prefixLen>termLen )
223
break; //the prefix is longer than the term, can't be matched
227
//check for prefix match in reverse, since most change will be at the end
228
for ( i=prefixLen-1;i!=-1;--i ){
229
if ( tmp[i] != prefixText[i] ){
230
tmp=NULL;//signals inequality
237
termDocs->seek(enumerator);
238
while (termDocs->next()) {
239
handleDoc(termDocs->doc());
242
}while(enumerator->next());
247
_CLDELETE(enumerator);
252
class DefaultPrefixGenerator: public PrefixFilter::PrefixGenerator{
255
DefaultPrefixGenerator(BitSet* bts, const Term* prefix):
256
PrefixGenerator(prefix)
260
virtual ~DefaultPrefixGenerator(){
262
void handleDoc(int doc) {
267
PrefixFilter::PrefixFilter( Term* prefix )
269
this->prefix = _CL_POINTER(prefix);
272
PrefixFilter::~PrefixFilter()
274
_CLDECDELETE(prefix);
277
PrefixFilter::PrefixFilter( const PrefixFilter& copy ) :
279
prefix( _CL_POINTER(copy.prefix) )
283
Filter* PrefixFilter::clone() const {
284
return _CLNEW PrefixFilter(*this );
287
TCHAR* PrefixFilter::toString()
289
//Instantiate a stringbuffer buffer to store the readable version temporarily
290
CL_NS(util)::StringBuffer buffer;
291
buffer.append(_T("PrefixFilter("));
292
buffer.append(prefix->field());
293
buffer.append(_T(")"));
295
//Convert StringBuffer buffer to TCHAR block and return it
296
return buffer.toString();
299
/** Returns a BitSet with true for documents which should be permitted in
300
search results, and false for those that should not. */
301
BitSet* PrefixFilter::bits( IndexReader* reader )
303
BitSet* bts = _CLNEW BitSet( reader->maxDoc() );
304
DefaultPrefixGenerator gen(bts, prefix);
305
gen.generate(reader);
309
CL_NS(index)::Term* PrefixFilter::getPrefix() const { return prefix; }