~mixxxdevelopers/mixxx/features_search

« back to all changes in this revision

Viewing changes to mixxx/lib/clucene-2.3.3.4/src/core/CLucene/queryParser/legacy/QueryParser.cpp

  • Committer: RJ Ryan
  • Date: 2011-05-27 23:54:08 UTC
  • Revision ID: rryan@mit.edu-20110527235408-kepzw0zdxdhngsqr
Add clucene-2.3.3.4 to lib/

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*------------------------------------------------------------------------------
 
2
* Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
 
3
*
 
4
* Distributable under the terms of either the Apache License (Version 2.0) or
 
5
* the GNU Lesser General Public License, as specified in the COPYING file.
 
6
------------------------------------------------------------------------------*/
 
7
#include "CLucene/_ApiHeader.h"
 
8
#include "QueryParser.h"
 
9
 
 
10
#include "CLucene/analysis/AnalysisHeader.h"
 
11
#include "CLucene/util/CLStreams.h"
 
12
#include "CLucene/search/SearchHeader.h"
 
13
#include "CLucene/search/BooleanClause.h"
 
14
#include "CLucene/search/Query.h"
 
15
#include "CLucene/index/Term.h"
 
16
#include "QueryToken.h"
 
17
 
 
18
#include "_TokenList.h"
 
19
#include "_Lexer.h"
 
20
 
 
21
CL_NS_USE(util)
 
22
CL_NS_USE(index)
 
23
CL_NS_USE(analysis)
 
24
CL_NS_USE(search)
 
25
 
 
26
CL_NS_DEF2(queryParser,legacy)
 
27
 
 
28
    QueryParser::QueryParser(const TCHAR* _field, Analyzer* _analyzer) : QueryParserBase(_analyzer){
 
29
    //Func - Constructor.
 
30
        //       Instantiates a QueryParser for the named field _field
 
31
        //Pre  - _field != NULL
 
32
        //Post - An instance has been created
 
33
 
 
34
                if ( _field )
 
35
                        field = STRDUP_TtoT(_field);
 
36
                else
 
37
                        field = NULL;
 
38
                tokens = NULL;
 
39
                lowercaseExpandedTerms = true;
 
40
        }
 
41
 
 
42
        QueryParser::~QueryParser() {
 
43
        //Func - Destructor
 
44
        //Pre  - true
 
45
        //Post - The instance has been destroyed
 
46
 
 
47
        _CLDELETE_CARRAY(field);
 
48
        }
 
49
 
 
50
    //static
 
51
    Query* QueryParser::parse(const TCHAR* query, const TCHAR* field, Analyzer* analyzer){
 
52
    //Func - Returns a new instance of the Query class with a specified query, field and
 
53
    //       analyzer values.
 
54
    //Pre  - query != NULL and holds the query to parse
 
55
        //       field != NULL and holds the default field for query terms
 
56
        //       analyzer holds a valid reference to an Analyzer and is used to
 
57
        //       find terms in the query text
 
58
        //Post - query has been parsed and an instance of Query has been returned
 
59
 
 
60
                CND_PRECONDITION(query != NULL, "query is NULL");
 
61
        CND_PRECONDITION(field != NULL, "field is NULL");
 
62
 
 
63
                QueryParser parser(field, analyzer);
 
64
                return parser.parse(query);
 
65
        }
 
66
 
 
67
    Query* QueryParser::parse(const TCHAR* query){
 
68
        //Func - Returns a parsed Query instance
 
69
        //Pre  - query != NULL and contains the query value to be parsed
 
70
        //Post - Returns a parsed Query Instance
 
71
 
 
72
        CND_PRECONDITION(query != NULL, "query is NULL");
 
73
 
 
74
                //Instantie a Stringer that can read the query string
 
75
        BufferedReader* r = _CLNEW StringReader(query);
 
76
 
 
77
                //Check to see if r has been created properly
 
78
                CND_CONDITION(r != NULL, "Could not allocate memory for StringReader r");
 
79
 
 
80
                //Pointer for the return value
 
81
                Query* ret = NULL;
 
82
 
 
83
                try{
 
84
                        //Parse the query managed by the StringReader R and return a parsed Query instance
 
85
                        //into ret
 
86
                        ret = parse(r);
 
87
                }_CLFINALLY (
 
88
                        _CLDELETE(r);
 
89
                );
 
90
 
 
91
                return ret;
 
92
        }
 
93
 
 
94
        Query* QueryParser::parse(BufferedReader* reader){
 
95
        //Func - Returns a parsed Query instance
 
96
        //Pre  - reader contains a valid reference to a Reader and manages the query string
 
97
        //Post - A parsed Query instance has been returned or
 
98
 
 
99
                //instantiate the TokenList tokens
 
100
                TokenList _tokens;
 
101
                this->tokens = &_tokens;
 
102
 
 
103
                //Instantiate a lexer
 
104
                Lexer lexer(this, reader);
 
105
 
 
106
                //tokens = lexer.Lex();
 
107
                //Lex the tokens
 
108
                lexer.Lex(tokens);
 
109
 
 
110
                //Peek to the first token and check if is an EOF
 
111
                if (tokens->peek()->Type == QueryToken::EOF_){
 
112
                        // The query string failed to yield any tokens.  We discard the
 
113
                        // TokenList tokens and raise an exceptioin.
 
114
                        QueryToken* token = this->tokens->extract();
 
115
                        _CLDELETE(token);
 
116
                   _CLTHROWA(CL_ERR_Parse, "No query given.");
 
117
                }
 
118
 
 
119
                //Return the parsed Query instance
 
120
                Query* ret = MatchQuery(field);
 
121
                this->tokens = NULL;
 
122
                return ret;
 
123
        }
 
124
 
 
125
        int32_t QueryParser::MatchConjunction(){
 
126
        //Func - matches for CONJUNCTION
 
127
        //       CONJUNCTION ::= <AND> | <OR>
 
128
        //Pre  - tokens != NULL
 
129
        //Post - if the first token is an AND or an OR then
 
130
        //       the token is extracted and deleted and CONJ_AND or CONJ_OR is returned
 
131
        //       otherwise CONJ_NONE is returned
 
132
 
 
133
        CND_PRECONDITION(tokens != NULL, "tokens is NULL");
 
134
 
 
135
                switch(tokens->peek()->Type){
 
136
                        case QueryToken::AND_ :
 
137
                                //Delete the first token of tokenlist
 
138
                                ExtractAndDeleteToken();
 
139
                                return CONJ_AND;
 
140
                        case QueryToken::OR   :
 
141
                                //Delete the first token of tokenlist
 
142
                                ExtractAndDeleteToken();
 
143
                                return CONJ_OR;
 
144
                        default :
 
145
                                return CONJ_NONE;
 
146
                }
 
147
        }
 
148
 
 
149
        int32_t QueryParser::MatchModifier(){
 
150
        //Func - matches for MODIFIER
 
151
        //       MODIFIER ::= <PLUS> | <MINUS> | <NOT>
 
152
        //Pre  - tokens != NULL
 
153
        //Post - if the first token is a PLUS the token is extracted and deleted and MOD_REQ is returned
 
154
        //       if the first token is a MINUS or NOT the token is extracted and deleted and MOD_NOT is returned
 
155
        //       otherwise MOD_NONE is returned
 
156
                CND_PRECONDITION(tokens != NULL, "tokens is NULL");
 
157
 
 
158
                switch(tokens->peek()->Type){
 
159
                        case QueryToken::PLUS :
 
160
                                //Delete the first token of tokenlist
 
161
                                ExtractAndDeleteToken();
 
162
                                return MOD_REQ;
 
163
                        case QueryToken::MINUS :
 
164
                        case QueryToken::NOT   :
 
165
                                //Delete the first token of tokenlist
 
166
                                ExtractAndDeleteToken();
 
167
                                return MOD_NOT;
 
168
                        default :
 
169
                                return MOD_NONE;
 
170
                }
 
171
        }
 
172
 
 
173
        Query* QueryParser::MatchQuery(const TCHAR* field){
 
174
        //Func - matches for QUERY
 
175
        //       QUERY ::= [MODIFIER] QueryParser::CLAUSE (<CONJUNCTION> [MODIFIER] CLAUSE)*
 
176
        //Pre  - field != NULL
 
177
        //Post -
 
178
 
 
179
                CND_PRECONDITION(tokens != NULL, "tokens is NULL");
 
180
 
 
181
                vector<BooleanClause*> clauses;
 
182
 
 
183
                Query* q = NULL;
 
184
 
 
185
                int32_t mods = MOD_NONE;
 
186
                int32_t conj = CONJ_NONE;
 
187
 
 
188
                //match for MODIFIER
 
189
                mods = MatchModifier();
 
190
 
 
191
                //match for CLAUSE
 
192
                q = MatchClause(field);
 
193
                AddClause(clauses, CONJ_NONE, mods, q);
 
194
 
 
195
                // match for CLAUSE*
 
196
                while(true){
 
197
                        QueryToken* p = tokens->peek();
 
198
                        if(p->Type == QueryToken::EOF_){
 
199
                                QueryToken* qt = MatchQueryToken(QueryToken::EOF_);
 
200
                                _CLDELETE(qt);
 
201
                                break;
 
202
                        }
 
203
 
 
204
                        if(p->Type == QueryToken::RPAREN){
 
205
                                //MatchQueryToken(QueryToken::RPAREN);
 
206
                                break;
 
207
                        }
 
208
 
 
209
                        //match for a conjuction (AND OR NOT)
 
210
                        conj = MatchConjunction();
 
211
                        //match for a modifier
 
212
                        mods = MatchModifier();
 
213
 
 
214
                        q = MatchClause(field);
 
215
                        if ( q != NULL )
 
216
                                AddClause(clauses, conj, mods, q);
 
217
                }
 
218
 
 
219
                // finalize query
 
220
                if(clauses.size() == 1){ //bvk: removed this && firstQuery != NULL
 
221
                        BooleanClause* c = clauses[0];
 
222
                        Query* q = c->getQuery();
 
223
 
 
224
                        //Condition check to be sure clauses[0] is valid
 
225
                        CND_CONDITION(c != NULL, "c is NULL");
 
226
 
 
227
                        //Tell the boolean clause not to delete its query
 
228
                        c->deleteQuery=false;
 
229
                        //Clear the clauses list
 
230
                        clauses.clear();
 
231
                        _CLDELETE(c);
 
232
 
 
233
                        return q;
 
234
                }else{
 
235
                        return GetBooleanQuery(clauses);
 
236
                }
 
237
        }
 
238
 
 
239
        Query* QueryParser::MatchClause(const TCHAR* field){
 
240
        //Func - matches for CLAUSE
 
241
        //       CLAUSE ::= [TERM <COLONQueryParser::>] ( TERM | (<LPAREN> QUERY <RPAREN>))
 
242
        //Pre  - field != NULL
 
243
        //Post -
 
244
 
 
245
                Query* q = NULL;
 
246
                const TCHAR* sfield = field;
 
247
                TCHAR* tmp = NULL;
 
248
 
 
249
                QueryToken *DelToken = NULL;
 
250
 
 
251
                //match for [TERM <COLON>]
 
252
                QueryToken* term = tokens->extract();
 
253
                if(term->Type == QueryToken::TERM && tokens->peek()->Type == QueryToken::COLON){
 
254
                        DelToken = MatchQueryToken(QueryToken::COLON);
 
255
 
 
256
                        CND_CONDITION(DelToken != NULL,"DelToken is NULL");
 
257
                        _CLDELETE(DelToken);
 
258
 
 
259
                        tmp = STRDUP_TtoT(term->Value);
 
260
                        discardEscapeChar(tmp);
 
261
                        sfield = tmp;
 
262
                        _CLDELETE(term);
 
263
                }else{
 
264
                        tokens->push(term);
 
265
                        term = NULL;
 
266
                }
 
267
 
 
268
                // match for
 
269
                // TERM | (<LPAREN> QUERY <RPAREN>)
 
270
                if(tokens->peek()->Type == QueryToken::LPAREN){
 
271
                        DelToken = MatchQueryToken(QueryToken::LPAREN);
 
272
 
 
273
                        CND_CONDITION(DelToken != NULL,"DelToken is NULL");
 
274
                        _CLDELETE(DelToken);
 
275
 
 
276
                        q = MatchQuery(sfield);
 
277
                        //DSR:2004.11.01:
 
278
                        //If exception is thrown while trying to match trailing parenthesis,
 
279
                        //need to prevent q from leaking.
 
280
 
 
281
                        try{
 
282
                           DelToken = MatchQueryToken(QueryToken::RPAREN);
 
283
 
 
284
                           CND_CONDITION(DelToken != NULL,"DelToken is NULL");
 
285
                           _CLDELETE(DelToken);
 
286
 
 
287
                        }catch(...) {
 
288
                                _CLDELETE(q);
 
289
                                throw;
 
290
                        }
 
291
                }else{
 
292
                        q = MatchTerm(sfield);
 
293
                }
 
294
 
 
295
          _CLDELETE_CARRAY(tmp);
 
296
          return q;
 
297
        }
 
298
 
 
299
 
 
300
        Query* QueryParser::MatchTerm(const TCHAR* field){
 
301
        //Func - matches for TERM
 
302
        //       TERM ::= TERM | PREFIXTERM | WILDTERM | NUMBER
 
303
        //                [ <FUZZY> ] [ <CARAT> <NUMBER> [<FUZZY>]]
 
304
        //                            | (<RANGEIN> | <RANGEEX>) [<CARAT> <NUMBER>]
 
305
        //                            | <QUOTED> [SLOP] [<CARAT> <NUMBER>]
 
306
        //Pre  - field != NULL
 
307
        //Post -
 
308
 
 
309
                QueryToken* term = NULL;
 
310
                QueryToken* slop = NULL;
 
311
                QueryToken* boost = NULL;
 
312
 
 
313
                bool prefix = false;
 
314
                bool wildcard = false;
 
315
                bool fuzzy = false;
 
316
                bool rangein = false;
 
317
                Query* q = NULL;
 
318
 
 
319
                term = tokens->extract();
 
320
                QueryToken* DelToken = NULL; //Token that is about to be deleted
 
321
 
 
322
                switch(term->Type){
 
323
                        case QueryToken::TERM:
 
324
                        case QueryToken::NUMBER:
 
325
                        case QueryToken::PREFIXTERM:
 
326
                        case QueryToken::WILDTERM:
 
327
                        { //start case
 
328
                                //Check if type of QueryToken term is a prefix term
 
329
                                if(term->Type == QueryToken::PREFIXTERM){
 
330
                                        prefix = true;
 
331
                                }
 
332
                                //Check if type of QueryToken term is a wildcard term
 
333
                                if(term->Type == QueryToken::WILDTERM){
 
334
                                        wildcard = true;
 
335
                                }
 
336
                                //Peek to see if the type of the next token is fuzzy term
 
337
                                if(tokens->peek()->Type == QueryToken::FUZZY){
 
338
                                        DelToken = MatchQueryToken(QueryToken::FUZZY);
 
339
 
 
340
                                        CND_CONDITION(DelToken !=NULL, "DelToken is NULL");
 
341
                                        _CLDELETE(DelToken);
 
342
 
 
343
                                        fuzzy = true;
 
344
                                }
 
345
                                if(tokens->peek()->Type == QueryToken::CARAT){
 
346
                                        DelToken = MatchQueryToken(QueryToken::CARAT);
 
347
 
 
348
                                        CND_CONDITION(DelToken !=NULL, "DelToken is NULL");
 
349
                                        _CLDELETE(DelToken);
 
350
 
 
351
                                        boost = MatchQueryToken(QueryToken::NUMBER);
 
352
 
 
353
                                        if(tokens->peek()->Type == QueryToken::FUZZY){
 
354
                                           DelToken = MatchQueryToken(QueryToken::FUZZY);
 
355
 
 
356
                                           CND_CONDITION(DelToken !=NULL, "DelToken is NULL");
 
357
                                           _CLDELETE(DelToken);
 
358
 
 
359
                                           fuzzy = true;
 
360
                                   }
 
361
                                } //end if type==CARAT
 
362
 
 
363
                                discardEscapeChar(term->Value); //clean up
 
364
                                if(wildcard){
 
365
                                        q = GetWildcardQuery(field,term->Value);
 
366
                                        break;
 
367
                                }else if(prefix){
 
368
                                        //Create a PrefixQuery
 
369
                                        term->Value[_tcslen(term->Value)-1] = 0; //discard the *
 
370
                                        q = GetPrefixQuery(field,term->Value);
 
371
                                        break;
 
372
                                }else if(fuzzy){
 
373
                                        //Create a FuzzyQuery
 
374
 
 
375
                                        //Check if the last char is a ~
 
376
                                        if(term->Value[_tcslen(term->Value)-1] == '~'){
 
377
                                                //remove the ~
 
378
                                                term->Value[_tcslen(term->Value)-1] = '\0';
 
379
                                        }
 
380
 
 
381
                                        q = GetFuzzyQuery(field,term->Value);
 
382
                                        break;
 
383
                                }else{
 
384
                                        q = GetFieldQuery(field, term->Value);
 
385
                                        break;
 
386
                                }
 
387
                        }
 
388
 
 
389
 
 
390
                        case QueryToken::RANGEIN:
 
391
                        case QueryToken::RANGEEX:{
 
392
                                if(term->Type == QueryToken::RANGEIN){
 
393
                                        rangein = true;
 
394
                                }
 
395
 
 
396
                                if(tokens->peek()->Type == QueryToken::CARAT){
 
397
                                        DelToken = MatchQueryToken(QueryToken::CARAT);
 
398
 
 
399
                                        CND_CONDITION(DelToken !=NULL, "DelToken is NULL");
 
400
                                        _CLDELETE(DelToken);
 
401
 
 
402
                                        boost = MatchQueryToken(QueryToken::NUMBER);
 
403
                                }
 
404
 
 
405
                                TCHAR* noBrackets = term->Value + 1;
 
406
                                noBrackets[_tcslen(noBrackets)-1] = 0;
 
407
                                q = ParseRangeQuery(field, noBrackets, rangein);
 
408
                                break;
 
409
                        }
 
410
 
 
411
 
 
412
                        case QueryToken::QUOTED:{
 
413
                                if(tokens->peek()->Type == QueryToken::SLOP){
 
414
                                        slop = MatchQueryToken(QueryToken::SLOP);
 
415
                                }
 
416
 
 
417
                                if(tokens->peek()->Type == QueryToken::CARAT){
 
418
                                        DelToken = MatchQueryToken(QueryToken::CARAT);
 
419
 
 
420
                                        CND_CONDITION(DelToken !=NULL, "DelToken is NULL");
 
421
                                        _CLDELETE(DelToken);
 
422
 
 
423
                                        boost = MatchQueryToken(QueryToken::NUMBER);
 
424
                                }
 
425
 
 
426
                                //remove the quotes
 
427
                                TCHAR* quotedValue = term->Value+1;
 
428
                                quotedValue[_tcslen(quotedValue)-1] = '\0';
 
429
 
 
430
                                int32_t islop = phraseSlop;
 
431
                                if(slop != NULL ){
 
432
                                   try {
 
433
             islop = _ttoi(slop->Value+1);
 
434
                                   }catch(...){
 
435
                                           //ignored
 
436
                                   }
 
437
                                }
 
438
 
 
439
                                q = GetFieldQuery(field, quotedValue, islop);
 
440
                                _CLDELETE(slop);
 
441
                        }
 
442
                        
 
443
                        default:
 
444
                          break;
 
445
                } // end of switch
 
446
 
 
447
                _CLDELETE(term);
 
448
 
 
449
 
 
450
                if( q!=NULL && boost != NULL ){
 
451
                        float_t f = 1.0F;
 
452
                        try {
 
453
                                f = _tcstod(boost->Value, NULL);
 
454
                        }catch(...){
 
455
                                //ignored
 
456
                        }
 
457
                        _CLDELETE(boost);
 
458
 
 
459
                        q->setBoost( f);
 
460
                }
 
461
 
 
462
                return q;
 
463
        }
 
464
 
 
465
        QueryToken* QueryParser::MatchQueryToken(QueryToken::Types expectedType){
 
466
        //Func - matches for QueryToken of the specified type and returns it
 
467
        //       otherwise Exception throws
 
468
        //Pre  - tokens != NULL
 
469
        //Post -
 
470
 
 
471
                CND_PRECONDITION(tokens != NULL,"tokens is NULL");
 
472
 
 
473
                if(tokens->count() == 0){
 
474
                        throwParserException(_T("Error: Unexpected end of program"),' ',0,0);
 
475
                }
 
476
 
 
477
          //Extract a token form the TokenList tokens
 
478
          QueryToken* t = tokens->extract();
 
479
          //Check if the type of the token t matches the expectedType
 
480
          if (expectedType != t->Type){
 
481
                  TCHAR buf[200];
 
482
                  _sntprintf(buf,200,_T("Error: Unexpected QueryToken: %d, expected: %d"),t->Type,expectedType);
 
483
                  _CLDELETE(t);
 
484
                  throwParserException(buf,' ',0,0);
 
485
                }
 
486
 
 
487
          //Return the matched token
 
488
          return t;
 
489
        }
 
490
 
 
491
        void QueryParser::ExtractAndDeleteToken(void){
 
492
        //Func - Extracts the first token from the Tokenlist tokenlist
 
493
        //       and destroys it
 
494
        //Pre  - true
 
495
        //Post - The first token has been extracted and destroyed
 
496
 
 
497
                CND_PRECONDITION(tokens != NULL, "tokens is NULL");
 
498
 
 
499
                //Extract the token from the TokenList tokens
 
500
                QueryToken* t = tokens->extract();
 
501
                //Condition Check Token may not be NULL
 
502
                CND_CONDITION(t != NULL, "Token is NULL");
 
503
                //Delete Token
 
504
                _CLDELETE(t);
 
505
        }
 
506
 
 
507
CL_NS_END2