1
/*------------------------------------------------------------------------------
2
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
4
* Distributable under the terms of either the Apache License (Version 2.0) or
5
* the GNU Lesser General Public License, as specified in the COPYING file.
6
------------------------------------------------------------------------------*/
7
#include "CLucene/_ApiHeader.h"
8
#include "QueryParser.h"
10
#include "CLucene/analysis/AnalysisHeader.h"
11
#include "CLucene/util/CLStreams.h"
12
#include "CLucene/search/SearchHeader.h"
13
#include "CLucene/search/BooleanClause.h"
14
#include "CLucene/search/Query.h"
15
#include "CLucene/index/Term.h"
16
#include "QueryToken.h"
18
#include "_TokenList.h"
26
CL_NS_DEF2(queryParser,legacy)
28
QueryParser::QueryParser(const TCHAR* _field, Analyzer* _analyzer) : QueryParserBase(_analyzer){
30
// Instantiates a QueryParser for the named field _field
31
//Pre - _field != NULL
32
//Post - An instance has been created
35
field = STRDUP_TtoT(_field);
39
lowercaseExpandedTerms = true;
42
QueryParser::~QueryParser() {
45
//Post - The instance has been destroyed
47
_CLDELETE_CARRAY(field);
51
Query* QueryParser::parse(const TCHAR* query, const TCHAR* field, Analyzer* analyzer){
52
//Func - Returns a new instance of the Query class with a specified query, field and
54
//Pre - query != NULL and holds the query to parse
55
// field != NULL and holds the default field for query terms
56
// analyzer holds a valid reference to an Analyzer and is used to
57
// find terms in the query text
58
//Post - query has been parsed and an instance of Query has been returned
60
CND_PRECONDITION(query != NULL, "query is NULL");
61
CND_PRECONDITION(field != NULL, "field is NULL");
63
QueryParser parser(field, analyzer);
64
return parser.parse(query);
67
Query* QueryParser::parse(const TCHAR* query){
68
//Func - Returns a parsed Query instance
69
//Pre - query != NULL and contains the query value to be parsed
70
//Post - Returns a parsed Query Instance
72
CND_PRECONDITION(query != NULL, "query is NULL");
74
//Instantie a Stringer that can read the query string
75
BufferedReader* r = _CLNEW StringReader(query);
77
//Check to see if r has been created properly
78
CND_CONDITION(r != NULL, "Could not allocate memory for StringReader r");
80
//Pointer for the return value
84
//Parse the query managed by the StringReader R and return a parsed Query instance
94
Query* QueryParser::parse(BufferedReader* reader){
95
//Func - Returns a parsed Query instance
96
//Pre - reader contains a valid reference to a Reader and manages the query string
97
//Post - A parsed Query instance has been returned or
99
//instantiate the TokenList tokens
101
this->tokens = &_tokens;
103
//Instantiate a lexer
104
Lexer lexer(this, reader);
106
//tokens = lexer.Lex();
110
//Peek to the first token and check if is an EOF
111
if (tokens->peek()->Type == QueryToken::EOF_){
112
// The query string failed to yield any tokens. We discard the
113
// TokenList tokens and raise an exceptioin.
114
QueryToken* token = this->tokens->extract();
116
_CLTHROWA(CL_ERR_Parse, "No query given.");
119
//Return the parsed Query instance
120
Query* ret = MatchQuery(field);
125
int32_t QueryParser::MatchConjunction(){
126
//Func - matches for CONJUNCTION
127
// CONJUNCTION ::= <AND> | <OR>
128
//Pre - tokens != NULL
129
//Post - if the first token is an AND or an OR then
130
// the token is extracted and deleted and CONJ_AND or CONJ_OR is returned
131
// otherwise CONJ_NONE is returned
133
CND_PRECONDITION(tokens != NULL, "tokens is NULL");
135
switch(tokens->peek()->Type){
136
case QueryToken::AND_ :
137
//Delete the first token of tokenlist
138
ExtractAndDeleteToken();
140
case QueryToken::OR :
141
//Delete the first token of tokenlist
142
ExtractAndDeleteToken();
149
int32_t QueryParser::MatchModifier(){
150
//Func - matches for MODIFIER
151
// MODIFIER ::= <PLUS> | <MINUS> | <NOT>
152
//Pre - tokens != NULL
153
//Post - if the first token is a PLUS the token is extracted and deleted and MOD_REQ is returned
154
// if the first token is a MINUS or NOT the token is extracted and deleted and MOD_NOT is returned
155
// otherwise MOD_NONE is returned
156
CND_PRECONDITION(tokens != NULL, "tokens is NULL");
158
switch(tokens->peek()->Type){
159
case QueryToken::PLUS :
160
//Delete the first token of tokenlist
161
ExtractAndDeleteToken();
163
case QueryToken::MINUS :
164
case QueryToken::NOT :
165
//Delete the first token of tokenlist
166
ExtractAndDeleteToken();
173
Query* QueryParser::MatchQuery(const TCHAR* field){
174
//Func - matches for QUERY
175
// QUERY ::= [MODIFIER] QueryParser::CLAUSE (<CONJUNCTION> [MODIFIER] CLAUSE)*
176
//Pre - field != NULL
179
CND_PRECONDITION(tokens != NULL, "tokens is NULL");
181
vector<BooleanClause*> clauses;
185
int32_t mods = MOD_NONE;
186
int32_t conj = CONJ_NONE;
189
mods = MatchModifier();
192
q = MatchClause(field);
193
AddClause(clauses, CONJ_NONE, mods, q);
197
QueryToken* p = tokens->peek();
198
if(p->Type == QueryToken::EOF_){
199
QueryToken* qt = MatchQueryToken(QueryToken::EOF_);
204
if(p->Type == QueryToken::RPAREN){
205
//MatchQueryToken(QueryToken::RPAREN);
209
//match for a conjuction (AND OR NOT)
210
conj = MatchConjunction();
211
//match for a modifier
212
mods = MatchModifier();
214
q = MatchClause(field);
216
AddClause(clauses, conj, mods, q);
220
if(clauses.size() == 1){ //bvk: removed this && firstQuery != NULL
221
BooleanClause* c = clauses[0];
222
Query* q = c->getQuery();
224
//Condition check to be sure clauses[0] is valid
225
CND_CONDITION(c != NULL, "c is NULL");
227
//Tell the boolean clause not to delete its query
228
c->deleteQuery=false;
229
//Clear the clauses list
235
return GetBooleanQuery(clauses);
239
Query* QueryParser::MatchClause(const TCHAR* field){
240
//Func - matches for CLAUSE
241
// CLAUSE ::= [TERM <COLONQueryParser::>] ( TERM | (<LPAREN> QUERY <RPAREN>))
242
//Pre - field != NULL
246
const TCHAR* sfield = field;
249
QueryToken *DelToken = NULL;
251
//match for [TERM <COLON>]
252
QueryToken* term = tokens->extract();
253
if(term->Type == QueryToken::TERM && tokens->peek()->Type == QueryToken::COLON){
254
DelToken = MatchQueryToken(QueryToken::COLON);
256
CND_CONDITION(DelToken != NULL,"DelToken is NULL");
259
tmp = STRDUP_TtoT(term->Value);
260
discardEscapeChar(tmp);
269
// TERM | (<LPAREN> QUERY <RPAREN>)
270
if(tokens->peek()->Type == QueryToken::LPAREN){
271
DelToken = MatchQueryToken(QueryToken::LPAREN);
273
CND_CONDITION(DelToken != NULL,"DelToken is NULL");
276
q = MatchQuery(sfield);
278
//If exception is thrown while trying to match trailing parenthesis,
279
//need to prevent q from leaking.
282
DelToken = MatchQueryToken(QueryToken::RPAREN);
284
CND_CONDITION(DelToken != NULL,"DelToken is NULL");
292
q = MatchTerm(sfield);
295
_CLDELETE_CARRAY(tmp);
300
Query* QueryParser::MatchTerm(const TCHAR* field){
301
//Func - matches for TERM
302
// TERM ::= TERM | PREFIXTERM | WILDTERM | NUMBER
303
// [ <FUZZY> ] [ <CARAT> <NUMBER> [<FUZZY>]]
304
// | (<RANGEIN> | <RANGEEX>) [<CARAT> <NUMBER>]
305
// | <QUOTED> [SLOP] [<CARAT> <NUMBER>]
306
//Pre - field != NULL
309
QueryToken* term = NULL;
310
QueryToken* slop = NULL;
311
QueryToken* boost = NULL;
314
bool wildcard = false;
316
bool rangein = false;
319
term = tokens->extract();
320
QueryToken* DelToken = NULL; //Token that is about to be deleted
323
case QueryToken::TERM:
324
case QueryToken::NUMBER:
325
case QueryToken::PREFIXTERM:
326
case QueryToken::WILDTERM:
328
//Check if type of QueryToken term is a prefix term
329
if(term->Type == QueryToken::PREFIXTERM){
332
//Check if type of QueryToken term is a wildcard term
333
if(term->Type == QueryToken::WILDTERM){
336
//Peek to see if the type of the next token is fuzzy term
337
if(tokens->peek()->Type == QueryToken::FUZZY){
338
DelToken = MatchQueryToken(QueryToken::FUZZY);
340
CND_CONDITION(DelToken !=NULL, "DelToken is NULL");
345
if(tokens->peek()->Type == QueryToken::CARAT){
346
DelToken = MatchQueryToken(QueryToken::CARAT);
348
CND_CONDITION(DelToken !=NULL, "DelToken is NULL");
351
boost = MatchQueryToken(QueryToken::NUMBER);
353
if(tokens->peek()->Type == QueryToken::FUZZY){
354
DelToken = MatchQueryToken(QueryToken::FUZZY);
356
CND_CONDITION(DelToken !=NULL, "DelToken is NULL");
361
} //end if type==CARAT
363
discardEscapeChar(term->Value); //clean up
365
q = GetWildcardQuery(field,term->Value);
368
//Create a PrefixQuery
369
term->Value[_tcslen(term->Value)-1] = 0; //discard the *
370
q = GetPrefixQuery(field,term->Value);
373
//Create a FuzzyQuery
375
//Check if the last char is a ~
376
if(term->Value[_tcslen(term->Value)-1] == '~'){
378
term->Value[_tcslen(term->Value)-1] = '\0';
381
q = GetFuzzyQuery(field,term->Value);
384
q = GetFieldQuery(field, term->Value);
390
case QueryToken::RANGEIN:
391
case QueryToken::RANGEEX:{
392
if(term->Type == QueryToken::RANGEIN){
396
if(tokens->peek()->Type == QueryToken::CARAT){
397
DelToken = MatchQueryToken(QueryToken::CARAT);
399
CND_CONDITION(DelToken !=NULL, "DelToken is NULL");
402
boost = MatchQueryToken(QueryToken::NUMBER);
405
TCHAR* noBrackets = term->Value + 1;
406
noBrackets[_tcslen(noBrackets)-1] = 0;
407
q = ParseRangeQuery(field, noBrackets, rangein);
412
case QueryToken::QUOTED:{
413
if(tokens->peek()->Type == QueryToken::SLOP){
414
slop = MatchQueryToken(QueryToken::SLOP);
417
if(tokens->peek()->Type == QueryToken::CARAT){
418
DelToken = MatchQueryToken(QueryToken::CARAT);
420
CND_CONDITION(DelToken !=NULL, "DelToken is NULL");
423
boost = MatchQueryToken(QueryToken::NUMBER);
427
TCHAR* quotedValue = term->Value+1;
428
quotedValue[_tcslen(quotedValue)-1] = '\0';
430
int32_t islop = phraseSlop;
433
islop = _ttoi(slop->Value+1);
439
q = GetFieldQuery(field, quotedValue, islop);
450
if( q!=NULL && boost != NULL ){
453
f = _tcstod(boost->Value, NULL);
465
QueryToken* QueryParser::MatchQueryToken(QueryToken::Types expectedType){
466
//Func - matches for QueryToken of the specified type and returns it
467
// otherwise Exception throws
468
//Pre - tokens != NULL
471
CND_PRECONDITION(tokens != NULL,"tokens is NULL");
473
if(tokens->count() == 0){
474
throwParserException(_T("Error: Unexpected end of program"),' ',0,0);
477
//Extract a token form the TokenList tokens
478
QueryToken* t = tokens->extract();
479
//Check if the type of the token t matches the expectedType
480
if (expectedType != t->Type){
482
_sntprintf(buf,200,_T("Error: Unexpected QueryToken: %d, expected: %d"),t->Type,expectedType);
484
throwParserException(buf,' ',0,0);
487
//Return the matched token
491
void QueryParser::ExtractAndDeleteToken(void){
492
//Func - Extracts the first token from the Tokenlist tokenlist
495
//Post - The first token has been extracted and destroyed
497
CND_PRECONDITION(tokens != NULL, "tokens is NULL");
499
//Extract the token from the TokenList tokens
500
QueryToken* t = tokens->extract();
501
//Condition Check Token may not be NULL
502
CND_CONDITION(t != NULL, "Token is NULL");