2
* IO definitions for tsquery and mtsquery. This type
3
* are identical, but for parsing mtsquery used parser for text
4
* and also morphology is used.
6
* query tree, then string with original value.
7
* Query tree with plain view. It's means that in array of nodes
8
* right child is always next and left position = item+item->left
9
* Teodor Sigaev <teodor@sigaev.ru>
16
#include "access/gist.h"
17
#include "access/itup.h"
18
#include "access/rtree.h"
19
#include "utils/array.h"
20
#include "utils/builtins.h"
21
#include "storage/bufpage.h"
31
PG_FUNCTION_INFO_V1(tsquery_in);
32
Datum tsquery_in(PG_FUNCTION_ARGS);
34
PG_FUNCTION_INFO_V1(tsquery_out);
35
Datum tsquery_out(PG_FUNCTION_ARGS);
37
PG_FUNCTION_INFO_V1(exectsq);
38
Datum exectsq(PG_FUNCTION_ARGS);
40
PG_FUNCTION_INFO_V1(rexectsq);
41
Datum rexectsq(PG_FUNCTION_ARGS);
43
PG_FUNCTION_INFO_V1(tsquerytree);
44
Datum tsquerytree(PG_FUNCTION_ARGS);
46
PG_FUNCTION_INFO_V1(to_tsquery);
47
Datum to_tsquery(PG_FUNCTION_ARGS);
49
PG_FUNCTION_INFO_V1(to_tsquery_name);
50
Datum to_tsquery_name(PG_FUNCTION_ARGS);
52
PG_FUNCTION_INFO_V1(to_tsquery_current);
53
Datum to_tsquery_current(PG_FUNCTION_ARGS);
57
#define WAITOPERATOR 2
60
* node of query tree, also used
61
* for storing polish notation in parser
78
/* reverse polish notation in list (for temprorary usage) */
83
/* user-friendly operand */
89
/* state for value's parser */
97
get_weight(char *buf, int2 *weight)
107
switch (tolower(*buf))
131
* get token from query string
134
gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2 *weight)
138
switch (state->state)
141
if (*(state->buf) == '!')
147
else if (*(state->buf) == '(')
153
else if (*(state->buf) == ':')
156
(errcode(ERRCODE_SYNTAX_ERROR),
157
errmsg("error at start of operand")));
159
else if (*(state->buf) != ' ')
161
state->valstate.prsbuf = state->buf;
162
state->state = WAITOPERATOR;
163
if (gettoken_tsvector(&(state->valstate)))
165
*strval = state->valstate.word;
166
*lenval = state->valstate.curpos - state->valstate.word;
167
state->buf = get_weight(state->valstate.prsbuf, weight);
172
(errcode(ERRCODE_SYNTAX_ERROR),
173
errmsg("no operand")));
177
if (*(state->buf) == '&' || *(state->buf) == '|')
179
state->state = WAITOPERAND;
180
*val = (int4) *(state->buf);
184
else if (*(state->buf) == ')')
188
return (state->count < 0) ? ERR : CLOSE;
190
else if (*(state->buf) == '\0')
191
return (state->count) ? ERR : END;
192
else if (*(state->buf) != ' ')
205
* push new one in polish notation reverse view
208
pushquery(QPRS_STATE * state, int4 type, int4 val, int4 distance, int4 lenval, int2 weight)
210
NODE *tmp = (NODE *) palloc(sizeof(NODE));
212
tmp->weight = weight;
215
if (distance >= MAXSTRPOS)
217
(errcode(ERRCODE_SYNTAX_ERROR),
218
errmsg("value is too big")));
219
if (lenval >= MAXSTRLEN)
221
(errcode(ERRCODE_SYNTAX_ERROR),
222
errmsg("operand is too long")));
223
tmp->distance = distance;
224
tmp->length = lenval;
225
tmp->next = state->str;
231
* This function is used for tsquery parsing
234
pushval_asis(QPRS_STATE * state, int type, char *strval, int lenval, int2 weight)
236
if (lenval >= MAXSTRLEN)
238
(errcode(ERRCODE_SYNTAX_ERROR),
239
errmsg("word is too long")));
241
pushquery(state, type, crc32_sz((uint8 *) strval, lenval),
242
state->curop - state->op, lenval, weight);
244
while (state->curop - state->op + lenval + 1 >= state->lenop)
246
int4 tmp = state->curop - state->op;
249
state->op = (char *) repalloc((void *) state->op, state->lenop);
250
state->curop = state->op + tmp;
252
memcpy((void *) state->curop, (void *) strval, lenval);
253
state->curop += lenval;
254
*(state->curop) = '\0';
256
state->sumlen += lenval + 1;
261
* This function is used for morph parsing
264
pushval_morph(QPRS_STATE * state, int typeval, char *strval, int lenval, int2 weight)
272
prs.words = (TSWORD *) palloc(sizeof(TSWORD) * prs.lenwords);
274
parsetext_v2(findcfg(state->cfg_id), &prs, strval, lenval);
276
for (count = 0; count < prs.curwords; count++)
278
pushval_asis(state, VAL, prs.words[count].word, prs.words[count].len, weight);
279
pfree(prs.words[count].word);
281
pushquery(state, OPR, (int4) '&', 0, 0, 0);
286
if (prs.curwords == 0)
287
pushval_asis(state, VALSTOP, NULL, 0, 0);
290
#define STACKDEPTH 32
292
* make polish notaion of query
295
makepol(QPRS_STATE * state, void (*pushval) (QPRS_STATE *, int, char *, int, int2))
301
int4 stack[STACKDEPTH];
305
while ((type = gettoken_query(state, &val, &lenval, &strval, &weight)) != END)
310
(*pushval) (state, VAL, strval, lenval, weight);
311
while (lenstack && (stack[lenstack - 1] == (int4) '&' ||
312
stack[lenstack - 1] == (int4) '!'))
315
pushquery(state, OPR, stack[lenstack], 0, 0, 0);
319
if (lenstack && val == (int4) '|')
320
pushquery(state, OPR, val, 0, 0, 0);
323
if (lenstack == STACKDEPTH)
325
elog(ERROR, "stack too short");
326
stack[lenstack] = val;
331
if (makepol(state, pushval) == ERR)
333
if (lenstack && (stack[lenstack - 1] == (int4) '&' ||
334
stack[lenstack - 1] == (int4) '!'))
337
pushquery(state, OPR, stack[lenstack], 0, 0, 0);
344
pushquery(state, OPR, stack[lenstack], 0, 0, 0);
351
(errcode(ERRCODE_SYNTAX_ERROR),
352
errmsg("syntax error")));
360
pushquery(state, OPR, stack[lenstack], 0, 0, 0);
374
* compare 2 string values
377
ValCompare(CHKVAL * chkval, WordEntry * ptr, ITEM * item)
379
if (ptr->len == item->length)
381
&(chkval->values[ptr->pos]),
382
&(chkval->operand[item->distance]),
385
return (ptr->len > item->length) ? 1 : -1;
392
checkclass_str(CHKVAL * chkval, WordEntry * val, ITEM * item)
394
WordEntryPos *ptr = (WordEntryPos *) (chkval->values + val->pos + SHORTALIGN(val->len) + sizeof(uint16));
395
uint16 len = *((uint16 *) (chkval->values + val->pos + SHORTALIGN(val->len)));
399
if (item->weight & (1 << ptr->weight))
407
* is there value 'val' in array or not ?
410
checkcondition_str(void *checkval, ITEM * val)
412
WordEntry *StopLow = ((CHKVAL *) checkval)->arrb;
413
WordEntry *StopHigh = ((CHKVAL *) checkval)->arre;
414
WordEntry *StopMiddle;
417
/* Loop invariant: StopLow <= val < StopHigh */
419
while (StopLow < StopHigh)
421
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
422
difference = ValCompare((CHKVAL *) checkval, StopMiddle, val);
424
return (val->weight && StopMiddle->haspos) ?
425
checkclass_str((CHKVAL *) checkval, StopMiddle, val) : true;
426
else if (difference < 0)
427
StopLow = StopMiddle + 1;
429
StopHigh = StopMiddle;
436
* check for boolean condition
439
TS_execute(ITEM * curitem, void *checkval, bool calcnot, bool (*chkcond) (void *checkval, ITEM * val))
441
if (curitem->type == VAL)
442
return (*chkcond) (checkval, curitem);
443
else if (curitem->val == (int4) '!')
446
((TS_execute(curitem + 1, checkval, calcnot, chkcond)) ? false : true)
449
else if (curitem->val == (int4) '&')
451
if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
452
return TS_execute(curitem + 1, checkval, calcnot, chkcond);
458
if (TS_execute(curitem + curitem->left, checkval, calcnot, chkcond))
461
return TS_execute(curitem + 1, checkval, calcnot, chkcond);
470
rexectsq(PG_FUNCTION_ARGS)
473
return DirectFunctionCall2(
481
exectsq(PG_FUNCTION_ARGS)
483
tsvector *val = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
484
QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));
489
if (!val->size || !query->size)
491
PG_FREE_IF_COPY(val, 0);
492
PG_FREE_IF_COPY(query, 1);
493
PG_RETURN_BOOL(false);
496
chkval.arrb = ARRPTR(val);
497
chkval.arre = chkval.arrb + val->size;
498
chkval.values = STRPTR(val);
499
chkval.operand = GETOPERAND(query);
507
PG_FREE_IF_COPY(val, 0);
508
PG_FREE_IF_COPY(query, 1);
509
PG_RETURN_BOOL(result);
513
* find left operand in polish notation view
516
findoprnd(ITEM * ptr, int4 *pos)
519
elog(DEBUG3, (ptr[*pos].type == OPR) ?
520
"%d %c" : "%d %d", *pos, ptr[*pos].val);
522
if (ptr[*pos].type == VAL || ptr[*pos].type == VALSTOP)
527
else if (ptr[*pos].val == (int4) '!')
535
ITEM *curitem = &ptr[*pos];
540
curitem->left = *pos - tmp;
550
queryin(char *buf, void (*pushval) (QPRS_STATE *, int, char *, int, int2), int cfg_id)
567
state.state = WAITOPERAND;
571
state.cfg_id = cfg_id;
573
/* init value parser's state */
574
state.valstate.oprisdelim = true;
575
state.valstate.len = 32;
576
state.valstate.word = (char *) palloc(state.valstate.len);
578
/* init list of operand */
581
state.curop = state.op = (char *) palloc(state.lenop);
582
*(state.curop) = '\0';
584
/* parse query & make polish notation (postfix, but in reverse order) */
585
makepol(&state, pushval);
586
pfree(state.valstate.word);
589
(errcode(ERRCODE_SYNTAX_ERROR),
590
errmsg("empty query")));
592
/* make finish struct */
593
commonlen = COMPUTESIZE(state.num, state.sumlen);
594
query = (QUERYTYPE *) palloc(commonlen);
595
query->len = commonlen;
596
query->size = state.num;
597
ptr = GETQUERY(query);
599
/* set item in polish notation */
600
for (i = 0; i < state.num; i++)
602
ptr[i].weight = state.str->weight;
603
ptr[i].type = state.str->type;
604
ptr[i].val = state.str->val;
605
ptr[i].distance = state.str->distance;
606
ptr[i].length = state.str->length;
607
tmp = state.str->next;
612
/* set user friendly-operand view */
613
memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
616
/* set left operand's position for every operator */
618
findoprnd(ptr, &pos);
623
for (i = 0; i < query->size; i++)
625
if (ptr[i].type == OPR)
626
sprintf(cur, "%c(%d) ", ptr[i].val, ptr[i].left);
628
sprintf(cur, "%d(%s) ", ptr[i].val, GETOPERAND(query) + ptr[i].distance);
629
cur = strchr(cur, '\0');
631
elog(DEBUG3, "POR: %s", pbuf);
638
* in without morphology
641
tsquery_in(PG_FUNCTION_ARGS)
644
PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0), pushval_asis, 0));
659
#define RESIZEBUF(inf,addsize) \
660
while( ( inf->cur - inf->buf ) + addsize + 1 >= inf->buflen ) \
662
int4 len = inf->cur - inf->buf; \
664
inf->buf = (char*) repalloc( (void*)inf->buf, inf->buflen ); \
665
inf->cur = inf->buf + len; \
669
* recursive walk on tree and print it in
670
* infix (human-readable) view
673
infix(INFIX * in, bool first)
675
if (in->curpol->type == VAL)
677
char *op = in->op + in->curpol->distance;
679
RESIZEBUF(in, in->curpol->length * 2 + 2 + 5);
695
if (in->curpol->weight)
699
if (in->curpol->weight & (1 << 3))
704
if (in->curpol->weight & (1 << 2))
709
if (in->curpol->weight & (1 << 1))
714
if (in->curpol->weight & 1)
723
else if (in->curpol->val == (int4) '!')
732
if (in->curpol->type == OPR)
736
sprintf(in->cur, "( ");
737
in->cur = strchr(in->cur, '\0');
743
sprintf(in->cur, " )");
744
in->cur = strchr(in->cur, '\0');
749
int4 op = in->curpol->val;
753
if (op == (int4) '|' && !first)
756
sprintf(in->cur, "( ");
757
in->cur = strchr(in->cur, '\0');
760
nrm.curpol = in->curpol;
763
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
765
/* get right operand */
768
/* get & print left operand */
769
in->curpol = nrm.curpol;
772
/* print operator & right operand */
773
RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
774
sprintf(in->cur, " %c %s", op, nrm.buf);
775
in->cur = strchr(in->cur, '\0');
778
if (op == (int4) '|' && !first)
781
sprintf(in->cur, " )");
782
in->cur = strchr(in->cur, '\0');
789
tsquery_out(PG_FUNCTION_ARGS)
791
QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
794
if (query->size == 0)
799
PG_RETURN_POINTER(b);
801
nrm.curpol = GETQUERY(query);
803
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
805
nrm.op = GETOPERAND(query);
808
PG_FREE_IF_COPY(query, 0);
809
PG_RETURN_POINTER(nrm.buf);
813
* debug function, used only for view query
814
* which will be executed in non-leaf pages in index
817
tsquerytree(PG_FUNCTION_ARGS)
819
QUERYTYPE *query = (QUERYTYPE *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));
826
if (query->size == 0)
828
res = (text *) palloc(VARHDRSZ);
829
VARATT_SIZEP(res) = VARHDRSZ;
830
PG_RETURN_POINTER(res);
833
q = clean_NOT_v2(GETQUERY(query), &len);
837
res = (text *) palloc(1 + VARHDRSZ);
838
VARATT_SIZEP(res) = 1 + VARHDRSZ;
839
*((char *) VARDATA(res)) = 'T';
845
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
847
nrm.op = GETOPERAND(query);
850
res = (text *) palloc(nrm.cur - nrm.buf + VARHDRSZ);
851
VARATT_SIZEP(res) = nrm.cur - nrm.buf + VARHDRSZ;
852
strncpy(VARDATA(res), nrm.buf, nrm.cur - nrm.buf);
856
PG_FREE_IF_COPY(query, 0);
858
PG_RETURN_POINTER(res);
862
to_tsquery(PG_FUNCTION_ARGS)
864
text *in = PG_GETARG_TEXT_P(1);
873
PG_FREE_IF_COPY(in, 1);
875
query = queryin(str, pushval_morph, PG_GETARG_INT32(0));
876
res = clean_fakeval_v2(GETQUERY(query), &len);
879
query->len = HDRSIZEQT;
881
PG_RETURN_POINTER(query);
883
memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(ITEM));
885
PG_RETURN_POINTER(query);
889
to_tsquery_name(PG_FUNCTION_ARGS)
891
text *name = PG_GETARG_TEXT_P(0);
895
res = DirectFunctionCall2(to_tsquery,
896
Int32GetDatum(name2id_cfg(name)),
899
PG_FREE_IF_COPY(name, 0);
900
PG_RETURN_DATUM(res);
904
to_tsquery_current(PG_FUNCTION_ARGS)
907
PG_RETURN_DATUM(DirectFunctionCall2(to_tsquery,
908
Int32GetDatum(get_currcfg()),
909
PG_GETARG_DATUM(0)));