9
#include "searchdata.h"
10
#include "wasaparserdriver.h"
11
#include "wasaparse.tab.h"
15
int yylex(yy::parser::semantic_type *, yy::parser::location_type *,
17
void yyerror(char const *);
18
static void qualify(Rcl::SearchDataClauseDist *, const string &);
20
static void addSubQuery(WasaParserDriver *d,
21
Rcl::SearchData *sd, Rcl::SearchData *sq)
23
sd->addClause(new Rcl::SearchDataClauseSub(RefCntr<Rcl::SearchData>(sq)));
33
%parse-param {WasaParserDriver* d}
34
%lex-param {WasaParserDriver* d}
38
Rcl::SearchDataClauseSimple *cl;
41
%destructor {delete $$;} <str>
47
%type <str> complexfieldname
49
/* Non operator tokens need precedence because of the possibility of
50
concatenation which needs to have lower prec than OR */
53
%left <str> QUALIFIERS
54
%left AND UCONCAT '(' '-'
57
%token EQUALS CONTAINS SMALLEREQ SMALLER GREATEREQ GREATER
67
query query %prec UCONCAT
69
//cerr << "q: query query" << endl;
70
Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang);
71
addSubQuery(d, sd, $1);
72
addSubQuery(d, sd, $2);
77
//cerr << "q: query AND query" << endl;
78
Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang);
79
addSubQuery(d, sd, $1);
80
addSubQuery(d, sd, $3);
85
//cerr << "q: query OR query" << endl;
86
Rcl::SearchData *top = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang);
87
Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_OR, d->m_stemlang);
88
addSubQuery(d, sd, $1);
89
addSubQuery(d, sd, $3);
90
addSubQuery(d, top, sd);
95
//cerr << "q: ( query )" << endl;
99
fieldexpr %prec UCONCAT
101
//cerr << "q: fieldexpr" << endl;
102
Rcl::SearchData *sd = new Rcl::SearchData(Rcl::SCLT_AND, d->m_stemlang);
103
d->addClause(sd, $1);
110
// cerr << "fe: simple fieldexpr: " << $1->gettext() << endl;
113
| complexfieldname EQUALS term
115
// cerr << "fe: " << *$1 << " = " << $3->gettext() << endl;
117
$3->setrel(Rcl::SearchDataClause::REL_EQUALS);
121
| complexfieldname CONTAINS term
123
// cerr << "fe: " << *$1 << " : " << $3->gettext() << endl;
125
$3->setrel(Rcl::SearchDataClause::REL_CONTAINS);
129
| complexfieldname SMALLER term
131
// cerr << "fe: " << *$1 << " < " << $3->gettext() << endl;
133
$3->setrel(Rcl::SearchDataClause::REL_LT);
137
| complexfieldname SMALLEREQ term
139
// cerr << "fe: " << *$1 << " <= " << $3->gettext() << endl;
141
$3->setrel(Rcl::SearchDataClause::REL_LTE);
145
| complexfieldname GREATER term
147
// cerr << "fe: " << *$1 << " > " << $3->gettext() << endl;
149
$3->setrel(Rcl::SearchDataClause::REL_GT);
153
| complexfieldname GREATEREQ term
155
// cerr << "fe: " << *$1 << " >= " << $3->gettext() << endl;
157
$3->setrel(Rcl::SearchDataClause::REL_GTE);
163
// cerr << "fe: - fieldexpr[" << $2->gettext() << "]" << endl;
164
$2->setexclude(true);
169
/* Deal with field names like dc:title */
173
// cerr << "cfn: WORD" << endl;
177
complexfieldname CONTAINS WORD
179
// cerr << "cfn: complexfieldname ':' WORD" << endl;
180
$$ = new string(*$1 + string(":") + *$3);
188
//cerr << "term[" << *$1 << "]" << endl;
189
$$ = new Rcl::SearchDataClauseSimple(Rcl::SCLT_AND, *$1);
200
// cerr << "QUOTED[" << *$1 << "]" << endl;
201
$$ = new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, *$1, 0);
206
// cerr << "QUOTED[" << *$1 << "] QUALIFIERS[" << *$2 << "]" << endl;
207
Rcl::SearchDataClauseDist *cl =
208
new Rcl::SearchDataClauseDist(Rcl::SCLT_PHRASE, *$1, 0);
220
// Look for int at index, skip and return new index found? value.
221
static unsigned int qualGetInt(const string& q, unsigned int cur, int *pval)
223
unsigned int ncur = cur;
224
if (cur < q.size() - 1) {
226
int val = strtol(&q[cur + 1], &endptr, 10);
227
if (endptr != &q[cur + 1]) {
228
ncur += endptr - &q[cur + 1];
235
static void qualify(Rcl::SearchDataClauseDist *cl, const string& quals)
237
// cerr << "qualify(" << cl << ", " << quals << ")" << endl;
238
for (unsigned int i = 0; i < quals.length(); i++) {
239
//fprintf(stderr, "qual char %c\n", quals[i]);
246
cl->addModifier(Rcl::SearchDataClause::SDCM_CASESENS);
250
cl->addModifier(Rcl::SearchDataClause::SDCM_DIACSENS);
253
cl->addModifier(Rcl::SearchDataClause::SDCM_CASESENS);
254
cl->addModifier(Rcl::SearchDataClause::SDCM_DIACSENS);
255
cl->addModifier(Rcl::SearchDataClause::SDCM_NOSTEMMING);
258
cl->addModifier(Rcl::SearchDataClause::SDCM_NOSTEMMING);
264
i = qualGetInt(quals, i, &slack);
266
//cerr << "set slack " << cl->getslack() << " done" << endl;
270
cl->setTp(Rcl::SCLT_NEAR);
271
if (cl->getslack() == 0) {
273
//cerr << "set slack " << cl->getslack() << " done" << endl;
276
case '.':case '0':case '1':case '2':case '3':case '4':
277
case '5':case '6':case '7':case '8':case '9':
281
if (sscanf(&(quals[i]), "%f %n", &factor, &n)) {
283
cl->setWeight(factor);
296
// specialstartchars are special only at the beginning of a token
297
// (e.g. doctor-who is a term, not 2 terms separated by '-')
298
static const string specialstartchars("-");
299
// specialinchars are special everywhere except inside a quoted string
300
static const string specialinchars(":=<>()");
302
// Called with the first dquote already read
303
static int parseString(WasaParserDriver *d, yy::parser::semantic_type *yylval)
305
string* value = new string();
306
d->qualifiers().clear();
308
while ((c = d->GETCHAR())) {
311
/* Escape: get next char */
320
/* End of string. Look for qualifiers */
321
while ((c = d->GETCHAR()) && !isspace(c))
322
d->qualifiers().push_back(c);
329
//cerr << "GOT QUOTED ["<<value<<"] quals [" << d->qualifiers() << "]" << endl;
331
return yy::parser::token::QUOTED;
335
int yylex(yy::parser::semantic_type *yylval, yy::parser::location_type *,
338
if (!d->qualifiers().empty()) {
339
yylval->str = new string();
340
yylval->str->swap(d->qualifiers());
341
return yy::parser::token::QUALIFIERS;
346
/* Skip white space. */
347
while ((c = d->GETCHAR()) && isspace(c))
353
if (specialstartchars.find_first_of(c) != string::npos) {
354
//cerr << "yylex: return " << c << endl;
358
// field-term relations
360
case '=': return yy::parser::token::EQUALS;
361
case ':': return yy::parser::token::CONTAINS;
363
int c1 = d->GETCHAR();
365
return yy::parser::token::SMALLEREQ;
368
return yy::parser::token::SMALLER;
372
int c1 = d->GETCHAR();
374
return yy::parser::token::GREATEREQ;
377
return yy::parser::token::GREATER;
385
return parseString(d, yylval);
389
// Other chars start a term or field name or reserved word
390
string* word = new string();
391
while ((c = d->GETCHAR())) {
393
//cerr << "Word broken by whitespace" << endl;
395
} else if (specialinchars.find_first_of(c) != string::npos) {
396
//cerr << "Word broken by special char" << endl;
400
//cerr << "Word broken by EOF" << endl;
407
if (!word->compare("AND") || !word->compare("&&")) {
409
return yy::parser::token::AND;
410
} else if (!word->compare("OR") || !word->compare("||")) {
412
return yy::parser::token::OR;
415
// cerr << "Got word [" << word << "]" << endl;
417
return yy::parser::token::WORD;