1
/*-------------------------------------------------------------------------
4
* I/O functions for tsquery
6
* Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
10
* src/backend/utils/adt/tsquery.c
12
*-------------------------------------------------------------------------
17
#include "libpq/pqformat.h"
18
#include "miscadmin.h"
19
#include "tsearch/ts_locale.h"
20
#include "tsearch/ts_type.h"
21
#include "tsearch/ts_utils.h"
22
#include "utils/builtins.h"
23
#include "utils/memutils.h"
24
#include "utils/pg_crc.h"
27
struct TSQueryParserStateData
29
/* State for gettoken_query */
30
char *buffer; /* entire string we are scanning */
31
char *buf; /* current scan point */
33
int count; /* nesting count, incremented by (,
36
/* polish (prefix) notation in list, filled in by push* functions */
40
* Strings from operands are collected in op. curop is a pointer to the
41
* end of used space of op.
45
int lenop; /* allocated size of op */
46
int sumlen; /* used size of op */
48
/* state for value's parser */
49
TSVectorParseState valstate;
54
#define WAITOPERATOR 2
55
#define WAITFIRSTOPERAND 3
56
#define WAITSINGLEOPERAND 4
59
* subroutine to parse the modifiers (weight and prefix flag currently)
60
* part, like ':1AB' of a query.
63
get_modifiers(char *buf, int16 *weight, bool *prefix)
68
if (!t_iseq(buf, ':'))
72
while (*buf && pg_mblen(buf) == 1)
105
* token types for parsing
118
* get token from query string
120
* *operator is filled in with OP_* when return values is PT_OPR
121
* *strval, *lenval and *weight are filled in when return value is PT_VAL
124
gettoken_query(TSQueryParserState state,
126
int *lenval, char **strval, int16 *weight, bool *prefix)
133
switch (state->state)
135
case WAITFIRSTOPERAND:
137
if (t_iseq(state->buf, '!'))
139
(state->buf)++; /* can safely ++, t_iseq guarantee
140
* that pg_mblen()==1 */
142
state->state = WAITOPERAND;
145
else if (t_iseq(state->buf, '('))
149
state->state = WAITOPERAND;
152
else if (t_iseq(state->buf, ':'))
155
(errcode(ERRCODE_SYNTAX_ERROR),
156
errmsg("syntax error in tsquery: \"%s\"",
159
else if (!t_isspace(state->buf))
162
* We rely on the tsvector parser to parse the value for
165
reset_tsvector_parser(state->valstate, state->buf);
166
if (gettoken_tsvector(state->valstate, strval, lenval, NULL, NULL, &state->buf))
168
state->buf = get_modifiers(state->buf, weight, prefix);
169
state->state = WAITOPERATOR;
172
else if (state->state == WAITFIRSTOPERAND)
176
(errcode(ERRCODE_SYNTAX_ERROR),
177
errmsg("no operand in tsquery: \"%s\"",
182
if (t_iseq(state->buf, '&'))
184
state->state = WAITOPERAND;
189
if (t_iseq(state->buf, '|'))
191
state->state = WAITOPERAND;
196
else if (t_iseq(state->buf, ')'))
200
return (state->count < 0) ? PT_ERR : PT_CLOSE;
202
else if (*(state->buf) == '\0')
203
return (state->count) ? PT_ERR : PT_END;
204
else if (!t_isspace(state->buf))
207
case WAITSINGLEOPERAND:
208
if (*(state->buf) == '\0')
210
*strval = state->buf;
211
*lenval = strlen(state->buf);
212
state->buf += strlen(state->buf);
219
state->buf += pg_mblen(state->buf);
225
* Push an operator to state->polstr
228
pushOperator(TSQueryParserState state, int8 oper)
232
Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR);
234
tmp = (QueryOperator *) palloc0(sizeof(QueryOperator));
237
/* left is filled in later with findoprnd */
239
state->polstr = lcons(tmp, state->polstr);
243
pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight, bool prefix)
247
if (distance >= MAXSTRPOS)
249
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
250
errmsg("value is too big in tsquery: \"%s\"",
252
if (lenval >= MAXSTRLEN)
254
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
255
errmsg("operand is too long in tsquery: \"%s\"",
258
tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));
260
tmp->weight = weight;
261
tmp->prefix = prefix;
262
tmp->valcrc = (int32) valcrc;
263
tmp->length = lenval;
264
tmp->distance = distance;
266
state->polstr = lcons(tmp, state->polstr);
270
* Push an operand to state->polstr.
272
* strval must point to a string equal to state->curop. lenval is the length
276
pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight, bool prefix)
280
if (lenval >= MAXSTRLEN)
282
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
283
errmsg("word is too long in tsquery: \"%s\"",
287
COMP_CRC32(valcrc, strval, lenval);
289
pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight, prefix);
291
/* append the value string to state.op, enlarging buffer if needed first */
292
while (state->curop - state->op + lenval + 1 >= state->lenop)
294
int used = state->curop - state->op;
297
state->op = (char *) repalloc((void *) state->op, state->lenop);
298
state->curop = state->op + used;
300
memcpy((void *) state->curop, (void *) strval, lenval);
301
state->curop += lenval;
302
*(state->curop) = '\0';
304
state->sumlen += lenval + 1 /* \0 */ ;
309
* Push a stopword placeholder to state->polstr
312
pushStop(TSQueryParserState state)
316
tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));
317
tmp->type = QI_VALSTOP;
319
state->polstr = lcons(tmp, state->polstr);
323
#define STACKDEPTH 32
326
* Make polish (prefix) notation of query.
328
* See parse_tsquery for explanation of pushval.
331
makepol(TSQueryParserState state,
332
PushFunction pushval,
339
int8 opstack[STACKDEPTH];
344
/* since this function recurses, it could be driven to stack overflow */
347
while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight, &prefix)) != PT_END)
352
pushval(opaque, state, strval, lenval, weight, prefix);
353
while (lenstack && (opstack[lenstack - 1] == OP_AND ||
354
opstack[lenstack - 1] == OP_NOT))
357
pushOperator(state, opstack[lenstack]);
361
if (lenstack && operator == OP_OR)
362
pushOperator(state, OP_OR);
365
if (lenstack == STACKDEPTH) /* internal error */
366
elog(ERROR, "tsquery stack too small");
367
opstack[lenstack] = operator;
372
makepol(state, pushval, opaque);
374
while (lenstack && (opstack[lenstack - 1] == OP_AND ||
375
opstack[lenstack - 1] == OP_NOT))
378
pushOperator(state, opstack[lenstack]);
385
pushOperator(state, opstack[lenstack]);
391
(errcode(ERRCODE_SYNTAX_ERROR),
392
errmsg("syntax error in tsquery: \"%s\"",
399
pushOperator(state, opstack[lenstack]);
404
findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes)
406
/* since this function recurses, it could be driven to stack overflow. */
410
elog(ERROR, "malformed tsquery: operand not found");
412
if (ptr[*pos].type == QI_VAL ||
413
ptr[*pos].type == QI_VALSTOP) /* need to handle VALSTOP here, they
414
* haven't been cleaned away yet. */
420
Assert(ptr[*pos].type == QI_OPR);
422
if (ptr[*pos].qoperator.oper == OP_NOT)
424
ptr[*pos].qoperator.left = 1;
426
findoprnd_recurse(ptr, pos, nnodes);
430
QueryOperator *curitem = &ptr[*pos].qoperator;
433
Assert(curitem->oper == OP_AND || curitem->oper == OP_OR);
436
findoprnd_recurse(ptr, pos, nnodes);
437
curitem->left = *pos - tmp;
438
findoprnd_recurse(ptr, pos, nnodes);
445
* Fills in the left-fields previously left unfilled. The input
446
* QueryItems must be in polish (prefix) notation.
449
findoprnd(QueryItem *ptr, int size)
454
findoprnd_recurse(ptr, &pos, size);
457
elog(ERROR, "malformed tsquery: extra nodes");
462
* Each value (operand) in the query is be passed to pushval. pushval can
463
* transform the simple value to an arbitrarily complex expression using
464
* pushValue and pushOperator. It must push a single value with pushValue,
465
* a complete expression with all operands, or a a stopword placeholder
466
* with pushStop, otherwise the prefix notation representation will be broken,
467
* having an operator with no operand.
469
* opaque is passed on to pushval as is, pushval can use it to store its
472
* The returned query might contain QI_STOPVAL nodes. The caller is responsible
473
* for cleaning them up (with clean_fakeval)
476
parse_tsquery(char *buf,
477
PushFunction pushval,
481
struct TSQueryParserStateData state;
491
state.state = (isplain) ? WAITSINGLEOPERAND : WAITFIRSTOPERAND;
495
/* init value parser's state */
496
state.valstate = init_tsvector_parser(state.buffer, true, true);
498
/* init list of operand */
501
state.curop = state.op = (char *) palloc(state.lenop);
502
*(state.curop) = '\0';
504
/* parse query & make polish notation (postfix, but in reverse order) */
505
makepol(&state, pushval, opaque);
507
close_tsvector_parser(state.valstate);
509
if (list_length(state.polstr) == 0)
512
(errmsg("text-search query doesn't contain lexemes: \"%s\"",
514
query = (TSQuery) palloc(HDRSIZETQ);
515
SET_VARSIZE(query, HDRSIZETQ);
520
/* Pack the QueryItems in the final TSQuery struct to return to caller */
521
commonlen = COMPUTESIZE(list_length(state.polstr), state.sumlen);
522
query = (TSQuery) palloc0(commonlen);
523
SET_VARSIZE(query, commonlen);
524
query->size = list_length(state.polstr);
525
ptr = GETQUERY(query);
527
/* Copy QueryItems to TSQuery */
529
foreach(cell, state.polstr)
531
QueryItem *item = (QueryItem *) lfirst(cell);
536
memcpy(&ptr[i], item, sizeof(QueryOperand));
539
ptr[i].type = QI_VALSTOP;
542
memcpy(&ptr[i], item, sizeof(QueryOperator));
545
elog(ERROR, "unrecognized QueryItem type: %d", item->type);
550
/* Copy all the operand strings to TSQuery */
551
memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
554
/* Set left operand pointers for every operator. */
555
findoprnd(ptr, query->size);
561
pushval_asis(Datum opaque, TSQueryParserState state, char *strval, int lenval,
562
int16 weight, bool prefix)
564
pushValue(state, strval, lenval, weight, prefix);
568
* in without morphology
571
tsqueryin(PG_FUNCTION_ARGS)
573
char *in = PG_GETARG_CSTRING(0);
575
pg_verifymbstr(in, strlen(in), false);
577
PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), false));
592
/* Makes sure inf->buf is large enough for adding 'addsize' bytes */
593
#define RESIZEBUF(inf, addsize) \
594
while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
596
int len = (inf)->cur - (inf)->buf; \
597
(inf)->buflen *= 2; \
598
(inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
599
(inf)->cur = (inf)->buf + len; \
603
* recursive walk on tree and print it in
604
* infix (human-readable) view
607
infix(INFIX *in, bool first)
609
/* since this function recurses, it could be driven to stack overflow. */
612
if (in->curpol->type == QI_VAL)
614
QueryOperand *curpol = &in->curpol->qoperand;
615
char *op = in->op + curpol->distance;
618
RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 6);
623
if (t_iseq(op, '\''))
628
else if (t_iseq(op, '\\'))
633
COPYCHAR(in->cur, op);
641
if (curpol->weight || curpol->prefix)
650
if (curpol->weight & (1 << 3))
655
if (curpol->weight & (1 << 2))
660
if (curpol->weight & (1 << 1))
665
if (curpol->weight & 1)
674
else if (in->curpol->qoperator.oper == OP_NOT)
684
if (in->curpol->type == QI_OPR)
688
sprintf(in->cur, "( ");
689
in->cur = strchr(in->cur, '\0');
696
sprintf(in->cur, " )");
697
in->cur = strchr(in->cur, '\0');
702
int8 op = in->curpol->qoperator.oper;
706
if (op == OP_OR && !first)
709
sprintf(in->cur, "( ");
710
in->cur = strchr(in->cur, '\0');
713
nrm.curpol = in->curpol;
716
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
718
/* get right operand */
721
/* get & print left operand */
722
in->curpol = nrm.curpol;
725
/* print operator & right operand */
726
RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
730
sprintf(in->cur, " | %s", nrm.buf);
733
sprintf(in->cur, " & %s", nrm.buf);
736
/* OP_NOT is handled in above if-branch */
737
elog(ERROR, "unrecognized operator type: %d", op);
739
in->cur = strchr(in->cur, '\0');
742
if (op == OP_OR && !first)
745
sprintf(in->cur, " )");
746
in->cur = strchr(in->cur, '\0');
753
tsqueryout(PG_FUNCTION_ARGS)
755
TSQuery query = PG_GETARG_TSQUERY(0);
758
if (query->size == 0)
763
PG_RETURN_POINTER(b);
765
nrm.curpol = GETQUERY(query);
767
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
769
nrm.op = GETOPERAND(query);
772
PG_FREE_IF_COPY(query, 0);
773
PG_RETURN_CSTRING(nrm.buf);
777
* Binary Input / Output functions. The binary format is as follows:
779
* uint32 number of operators/operands in the query
781
* Followed by the operators and operands, in prefix notation. For each
786
* operand text in client encoding, null-terminated
791
* uint8 operator, one of OP_AND, OP_OR, OP_NOT.
794
tsquerysend(PG_FUNCTION_ARGS)
796
TSQuery query = PG_GETARG_TSQUERY(0);
799
QueryItem *item = GETQUERY(query);
801
pq_begintypsend(&buf);
803
pq_sendint(&buf, query->size, sizeof(uint32));
804
for (i = 0; i < query->size; i++)
806
pq_sendint(&buf, item->type, sizeof(item->type));
811
pq_sendint(&buf, item->qoperand.weight, sizeof(uint8));
812
pq_sendint(&buf, item->qoperand.prefix, sizeof(uint8));
813
pq_sendstring(&buf, GETOPERAND(query) + item->qoperand.distance);
816
pq_sendint(&buf, item->qoperator.oper, sizeof(item->qoperator.oper));
819
elog(ERROR, "unrecognized tsquery node type: %d", item->type);
824
PG_FREE_IF_COPY(query, 0);
826
PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
830
tsqueryrecv(PG_FUNCTION_ARGS)
832
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
840
const char **operands;
842
size = pq_getmsgint(buf, sizeof(uint32));
843
if (size > (MaxAllocSize / sizeof(QueryItem)))
844
elog(ERROR, "invalid size of tsquery");
846
/* Allocate space to temporarily hold operand strings */
847
operands = palloc(size * sizeof(char *));
849
/* Allocate space for all the QueryItems. */
850
len = HDRSIZETQ + sizeof(QueryItem) * size;
851
query = (TSQuery) palloc0(len);
853
item = GETQUERY(query);
856
for (i = 0; i < size; i++)
858
item->type = (int8) pq_getmsgint(buf, sizeof(int8));
860
if (item->type == QI_VAL)
862
size_t val_len; /* length after recoding to server encoding */
868
weight = (uint8) pq_getmsgint(buf, sizeof(uint8));
869
prefix = (uint8) pq_getmsgint(buf, sizeof(uint8));
870
val = pq_getmsgstring(buf);
871
val_len = strlen(val);
876
elog(ERROR, "invalid tsquery: invalid weight bitmap");
878
if (val_len > MAXSTRLEN)
879
elog(ERROR, "invalid tsquery: operand too long");
881
if (datalen > MAXSTRPOS)
882
elog(ERROR, "invalid tsquery: total operand length exceeded");
887
COMP_CRC32(valcrc, val, val_len);
890
item->qoperand.weight = weight;
891
item->qoperand.prefix = (prefix) ? true : false;
892
item->qoperand.valcrc = (int32) valcrc;
893
item->qoperand.length = val_len;
894
item->qoperand.distance = datalen;
897
* Operand strings are copied to the final struct after this loop;
898
* here we just collect them to an array
902
datalen += val_len + 1; /* + 1 for the '\0' terminator */
904
else if (item->type == QI_OPR)
908
oper = (int8) pq_getmsgint(buf, sizeof(int8));
909
if (oper != OP_NOT && oper != OP_OR && oper != OP_AND)
910
elog(ERROR, "invalid tsquery: unrecognized operator type %d",
913
elog(ERROR, "invalid pointer to right operand");
915
item->qoperator.oper = oper;
918
elog(ERROR, "unrecognized tsquery node type: %d", item->type);
923
/* Enlarge buffer to make room for the operand values. */
924
query = (TSQuery) repalloc(query, len + datalen);
925
item = GETQUERY(query);
926
ptr = GETOPERAND(query);
929
* Fill in the left-pointers. Checks that the tree is well-formed as a
932
findoprnd(item, size);
934
/* Copy operands to output struct */
935
for (i = 0; i < size; i++)
937
if (item->type == QI_VAL)
939
memcpy(ptr, operands[i], item->qoperand.length + 1);
940
ptr += item->qoperand.length + 1;
947
Assert(ptr - GETOPERAND(query) == datalen);
949
SET_VARSIZE(query, len + datalen);
951
PG_RETURN_TSVECTOR(query);
955
* debug function, used only for view query
956
* which will be executed in non-leaf pages in index
959
tsquerytree(PG_FUNCTION_ARGS)
961
TSQuery query = PG_GETARG_TSQUERY(0);
967
if (query->size == 0)
969
res = (text *) palloc(VARHDRSZ);
970
SET_VARSIZE(res, VARHDRSZ);
971
PG_RETURN_POINTER(res);
974
q = clean_NOT(GETQUERY(query), &len);
978
res = cstring_to_text("T");
984
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
986
nrm.op = GETOPERAND(query);
988
res = cstring_to_text_with_len(nrm.buf, nrm.cur - nrm.buf);
992
PG_FREE_IF_COPY(query, 0);
994
PG_RETURN_TEXT_P(res);