~ubuntu-branches/ubuntu/oneiric/postgresql-9.1/oneiric-security

« back to all changes in this revision

Viewing changes to src/backend/utils/adt/tsquery.c

  • Committer: Bazaar Package Importer
  • Author(s): Martin Pitt
  • Date: 2011-05-11 10:41:53 UTC
  • Revision ID: james.westby@ubuntu.com-20110511104153-psbh2o58553fv1m0
Tags: upstream-9.1~beta1
ImportĀ upstreamĀ versionĀ 9.1~beta1

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*-------------------------------------------------------------------------
 
2
 *
 
3
 * tsquery.c
 
4
 *        I/O functions for tsquery
 
5
 *
 
6
 * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
 
7
 *
 
8
 *
 
9
 * IDENTIFICATION
 
10
 *        src/backend/utils/adt/tsquery.c
 
11
 *
 
12
 *-------------------------------------------------------------------------
 
13
 */
 
14
 
 
15
#include "postgres.h"
 
16
 
 
17
#include "libpq/pqformat.h"
 
18
#include "miscadmin.h"
 
19
#include "tsearch/ts_locale.h"
 
20
#include "tsearch/ts_type.h"
 
21
#include "tsearch/ts_utils.h"
 
22
#include "utils/builtins.h"
 
23
#include "utils/memutils.h"
 
24
#include "utils/pg_crc.h"
 
25
 
 
26
 
 
27
struct TSQueryParserStateData
 
28
{
 
29
        /* State for gettoken_query */
 
30
        char       *buffer;                     /* entire string we are scanning */
 
31
        char       *buf;                        /* current scan point */
 
32
        int                     state;
 
33
        int                     count;                  /* nesting count, incremented by (,
 
34
                                                                 * decremented by ) */
 
35
 
 
36
        /* polish (prefix) notation in list, filled in by push* functions */
 
37
        List       *polstr;
 
38
 
 
39
        /*
 
40
         * Strings from operands are collected in op. curop is a pointer to the
 
41
         * end of used space of op.
 
42
         */
 
43
        char       *op;
 
44
        char       *curop;
 
45
        int                     lenop;                  /* allocated size of op */
 
46
        int                     sumlen;                 /* used size of op */
 
47
 
 
48
        /* state for value's parser */
 
49
        TSVectorParseState valstate;
 
50
};
 
51
 
 
52
/* parser's states */
 
53
#define WAITOPERAND 1
 
54
#define WAITOPERATOR    2
 
55
#define WAITFIRSTOPERAND 3
 
56
#define WAITSINGLEOPERAND 4
 
57
 
 
58
/*
 
59
 * subroutine to parse the modifiers (weight and prefix flag currently)
 
60
 * part, like ':1AB' of a query.
 
61
 */
 
62
static char *
 
63
get_modifiers(char *buf, int16 *weight, bool *prefix)
 
64
{
 
65
        *weight = 0;
 
66
        *prefix = false;
 
67
 
 
68
        if (!t_iseq(buf, ':'))
 
69
                return buf;
 
70
 
 
71
        buf++;
 
72
        while (*buf && pg_mblen(buf) == 1)
 
73
        {
 
74
                switch (*buf)
 
75
                {
 
76
                        case 'a':
 
77
                        case 'A':
 
78
                                *weight |= 1 << 3;
 
79
                                break;
 
80
                        case 'b':
 
81
                        case 'B':
 
82
                                *weight |= 1 << 2;
 
83
                                break;
 
84
                        case 'c':
 
85
                        case 'C':
 
86
                                *weight |= 1 << 1;
 
87
                                break;
 
88
                        case 'd':
 
89
                        case 'D':
 
90
                                *weight |= 1;
 
91
                                break;
 
92
                        case '*':
 
93
                                *prefix = true;
 
94
                                break;
 
95
                        default:
 
96
                                return buf;
 
97
                }
 
98
                buf++;
 
99
        }
 
100
 
 
101
        return buf;
 
102
}
 
103
 
 
104
/*
 
105
 * token types for parsing
 
106
 */
 
107
typedef enum
 
108
{
 
109
        PT_END = 0,
 
110
        PT_ERR = 1,
 
111
        PT_VAL = 2,
 
112
        PT_OPR = 3,
 
113
        PT_OPEN = 4,
 
114
        PT_CLOSE = 5
 
115
} ts_tokentype;
 
116
 
 
117
/*
 
118
 * get token from query string
 
119
 *
 
120
 * *operator is filled in with OP_* when return values is PT_OPR
 
121
 * *strval, *lenval and *weight are filled in when return value is PT_VAL
 
122
 */
 
123
static ts_tokentype
 
124
gettoken_query(TSQueryParserState state,
 
125
                           int8 *operator,
 
126
                           int *lenval, char **strval, int16 *weight, bool *prefix)
 
127
{
 
128
        *weight = 0;
 
129
        *prefix = false;
 
130
 
 
131
        while (1)
 
132
        {
 
133
                switch (state->state)
 
134
                {
 
135
                        case WAITFIRSTOPERAND:
 
136
                        case WAITOPERAND:
 
137
                                if (t_iseq(state->buf, '!'))
 
138
                                {
 
139
                                        (state->buf)++;         /* can safely ++, t_iseq guarantee
 
140
                                                                                 * that pg_mblen()==1 */
 
141
                                        *operator = OP_NOT;
 
142
                                        state->state = WAITOPERAND;
 
143
                                        return PT_OPR;
 
144
                                }
 
145
                                else if (t_iseq(state->buf, '('))
 
146
                                {
 
147
                                        state->count++;
 
148
                                        (state->buf)++;
 
149
                                        state->state = WAITOPERAND;
 
150
                                        return PT_OPEN;
 
151
                                }
 
152
                                else if (t_iseq(state->buf, ':'))
 
153
                                {
 
154
                                        ereport(ERROR,
 
155
                                                        (errcode(ERRCODE_SYNTAX_ERROR),
 
156
                                                         errmsg("syntax error in tsquery: \"%s\"",
 
157
                                                                        state->buffer)));
 
158
                                }
 
159
                                else if (!t_isspace(state->buf))
 
160
                                {
 
161
                                        /*
 
162
                                         * We rely on the tsvector parser to parse the value for
 
163
                                         * us
 
164
                                         */
 
165
                                        reset_tsvector_parser(state->valstate, state->buf);
 
166
                                        if (gettoken_tsvector(state->valstate, strval, lenval, NULL, NULL, &state->buf))
 
167
                                        {
 
168
                                                state->buf = get_modifiers(state->buf, weight, prefix);
 
169
                                                state->state = WAITOPERATOR;
 
170
                                                return PT_VAL;
 
171
                                        }
 
172
                                        else if (state->state == WAITFIRSTOPERAND)
 
173
                                                return PT_END;
 
174
                                        else
 
175
                                                ereport(ERROR,
 
176
                                                                (errcode(ERRCODE_SYNTAX_ERROR),
 
177
                                                                 errmsg("no operand in tsquery: \"%s\"",
 
178
                                                                                state->buffer)));
 
179
                                }
 
180
                                break;
 
181
                        case WAITOPERATOR:
 
182
                                if (t_iseq(state->buf, '&'))
 
183
                                {
 
184
                                        state->state = WAITOPERAND;
 
185
                                        *operator = OP_AND;
 
186
                                        (state->buf)++;
 
187
                                        return PT_OPR;
 
188
                                }
 
189
                                if (t_iseq(state->buf, '|'))
 
190
                                {
 
191
                                        state->state = WAITOPERAND;
 
192
                                        *operator = OP_OR;
 
193
                                        (state->buf)++;
 
194
                                        return PT_OPR;
 
195
                                }
 
196
                                else if (t_iseq(state->buf, ')'))
 
197
                                {
 
198
                                        (state->buf)++;
 
199
                                        state->count--;
 
200
                                        return (state->count < 0) ? PT_ERR : PT_CLOSE;
 
201
                                }
 
202
                                else if (*(state->buf) == '\0')
 
203
                                        return (state->count) ? PT_ERR : PT_END;
 
204
                                else if (!t_isspace(state->buf))
 
205
                                        return PT_ERR;
 
206
                                break;
 
207
                        case WAITSINGLEOPERAND:
 
208
                                if (*(state->buf) == '\0')
 
209
                                        return PT_END;
 
210
                                *strval = state->buf;
 
211
                                *lenval = strlen(state->buf);
 
212
                                state->buf += strlen(state->buf);
 
213
                                state->count++;
 
214
                                return PT_VAL;
 
215
                        default:
 
216
                                return PT_ERR;
 
217
                                break;
 
218
                }
 
219
                state->buf += pg_mblen(state->buf);
 
220
        }
 
221
        return PT_END;
 
222
}
 
223
 
 
224
/*
 
225
 * Push an operator to state->polstr
 
226
 */
 
227
void
 
228
pushOperator(TSQueryParserState state, int8 oper)
 
229
{
 
230
        QueryOperator *tmp;
 
231
 
 
232
        Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR);
 
233
 
 
234
        tmp = (QueryOperator *) palloc0(sizeof(QueryOperator));
 
235
        tmp->type = QI_OPR;
 
236
        tmp->oper = oper;
 
237
        /* left is filled in later with findoprnd */
 
238
 
 
239
        state->polstr = lcons(tmp, state->polstr);
 
240
}
 
241
 
 
242
static void
 
243
pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight, bool prefix)
 
244
{
 
245
        QueryOperand *tmp;
 
246
 
 
247
        if (distance >= MAXSTRPOS)
 
248
                ereport(ERROR,
 
249
                                (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 
250
                                 errmsg("value is too big in tsquery: \"%s\"",
 
251
                                                state->buffer)));
 
252
        if (lenval >= MAXSTRLEN)
 
253
                ereport(ERROR,
 
254
                                (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 
255
                                 errmsg("operand is too long in tsquery: \"%s\"",
 
256
                                                state->buffer)));
 
257
 
 
258
        tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));
 
259
        tmp->type = QI_VAL;
 
260
        tmp->weight = weight;
 
261
        tmp->prefix = prefix;
 
262
        tmp->valcrc = (int32) valcrc;
 
263
        tmp->length = lenval;
 
264
        tmp->distance = distance;
 
265
 
 
266
        state->polstr = lcons(tmp, state->polstr);
 
267
}
 
268
 
 
269
/*
 
270
 * Push an operand to state->polstr.
 
271
 *
 
272
 * strval must point to a string equal to state->curop. lenval is the length
 
273
 * of the string.
 
274
 */
 
275
void
 
276
pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight, bool prefix)
 
277
{
 
278
        pg_crc32        valcrc;
 
279
 
 
280
        if (lenval >= MAXSTRLEN)
 
281
                ereport(ERROR,
 
282
                                (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 
283
                                 errmsg("word is too long in tsquery: \"%s\"",
 
284
                                                state->buffer)));
 
285
 
 
286
        INIT_CRC32(valcrc);
 
287
        COMP_CRC32(valcrc, strval, lenval);
 
288
        FIN_CRC32(valcrc);
 
289
        pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight, prefix);
 
290
 
 
291
        /* append the value string to state.op, enlarging buffer if needed first */
 
292
        while (state->curop - state->op + lenval + 1 >= state->lenop)
 
293
        {
 
294
                int                     used = state->curop - state->op;
 
295
 
 
296
                state->lenop *= 2;
 
297
                state->op = (char *) repalloc((void *) state->op, state->lenop);
 
298
                state->curop = state->op + used;
 
299
        }
 
300
        memcpy((void *) state->curop, (void *) strval, lenval);
 
301
        state->curop += lenval;
 
302
        *(state->curop) = '\0';
 
303
        state->curop++;
 
304
        state->sumlen += lenval + 1 /* \0 */ ;
 
305
}
 
306
 
 
307
 
 
308
/*
 
309
 * Push a stopword placeholder to state->polstr
 
310
 */
 
311
void
 
312
pushStop(TSQueryParserState state)
 
313
{
 
314
        QueryOperand *tmp;
 
315
 
 
316
        tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));
 
317
        tmp->type = QI_VALSTOP;
 
318
 
 
319
        state->polstr = lcons(tmp, state->polstr);
 
320
}
 
321
 
 
322
 
 
323
#define STACKDEPTH      32
 
324
 
 
325
/*
 
326
 * Make polish (prefix) notation of query.
 
327
 *
 
328
 * See parse_tsquery for explanation of pushval.
 
329
 */
 
330
static void
 
331
makepol(TSQueryParserState state,
 
332
                PushFunction pushval,
 
333
                Datum opaque)
 
334
{
 
335
        int8            operator = 0;
 
336
        ts_tokentype type;
 
337
        int                     lenval = 0;
 
338
        char       *strval = NULL;
 
339
        int8            opstack[STACKDEPTH];
 
340
        int                     lenstack = 0;
 
341
        int16           weight = 0;
 
342
        bool            prefix;
 
343
 
 
344
        /* since this function recurses, it could be driven to stack overflow */
 
345
        check_stack_depth();
 
346
 
 
347
        while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight, &prefix)) != PT_END)
 
348
        {
 
349
                switch (type)
 
350
                {
 
351
                        case PT_VAL:
 
352
                                pushval(opaque, state, strval, lenval, weight, prefix);
 
353
                                while (lenstack && (opstack[lenstack - 1] == OP_AND ||
 
354
                                                                        opstack[lenstack - 1] == OP_NOT))
 
355
                                {
 
356
                                        lenstack--;
 
357
                                        pushOperator(state, opstack[lenstack]);
 
358
                                }
 
359
                                break;
 
360
                        case PT_OPR:
 
361
                                if (lenstack && operator == OP_OR)
 
362
                                        pushOperator(state, OP_OR);
 
363
                                else
 
364
                                {
 
365
                                        if (lenstack == STACKDEPTH) /* internal error */
 
366
                                                elog(ERROR, "tsquery stack too small");
 
367
                                        opstack[lenstack] = operator;
 
368
                                        lenstack++;
 
369
                                }
 
370
                                break;
 
371
                        case PT_OPEN:
 
372
                                makepol(state, pushval, opaque);
 
373
 
 
374
                                while (lenstack && (opstack[lenstack - 1] == OP_AND ||
 
375
                                                                        opstack[lenstack - 1] == OP_NOT))
 
376
                                {
 
377
                                        lenstack--;
 
378
                                        pushOperator(state, opstack[lenstack]);
 
379
                                }
 
380
                                break;
 
381
                        case PT_CLOSE:
 
382
                                while (lenstack)
 
383
                                {
 
384
                                        lenstack--;
 
385
                                        pushOperator(state, opstack[lenstack]);
 
386
                                };
 
387
                                return;
 
388
                        case PT_ERR:
 
389
                        default:
 
390
                                ereport(ERROR,
 
391
                                                (errcode(ERRCODE_SYNTAX_ERROR),
 
392
                                                 errmsg("syntax error in tsquery: \"%s\"",
 
393
                                                                state->buffer)));
 
394
                }
 
395
        }
 
396
        while (lenstack)
 
397
        {
 
398
                lenstack--;
 
399
                pushOperator(state, opstack[lenstack]);
 
400
        }
 
401
}
 
402
 
 
403
static void
 
404
findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes)
 
405
{
 
406
        /* since this function recurses, it could be driven to stack overflow. */
 
407
        check_stack_depth();
 
408
 
 
409
        if (*pos >= nnodes)
 
410
                elog(ERROR, "malformed tsquery: operand not found");
 
411
 
 
412
        if (ptr[*pos].type == QI_VAL ||
 
413
                ptr[*pos].type == QI_VALSTOP)   /* need to handle VALSTOP here, they
 
414
                                                                                 * haven't been cleaned away yet. */
 
415
        {
 
416
                (*pos)++;
 
417
        }
 
418
        else
 
419
        {
 
420
                Assert(ptr[*pos].type == QI_OPR);
 
421
 
 
422
                if (ptr[*pos].qoperator.oper == OP_NOT)
 
423
                {
 
424
                        ptr[*pos].qoperator.left = 1;
 
425
                        (*pos)++;
 
426
                        findoprnd_recurse(ptr, pos, nnodes);
 
427
                }
 
428
                else
 
429
                {
 
430
                        QueryOperator *curitem = &ptr[*pos].qoperator;
 
431
                        int                     tmp = *pos;
 
432
 
 
433
                        Assert(curitem->oper == OP_AND || curitem->oper == OP_OR);
 
434
 
 
435
                        (*pos)++;
 
436
                        findoprnd_recurse(ptr, pos, nnodes);
 
437
                        curitem->left = *pos - tmp;
 
438
                        findoprnd_recurse(ptr, pos, nnodes);
 
439
                }
 
440
        }
 
441
}
 
442
 
 
443
 
 
444
/*
 
445
 * Fills in the left-fields previously left unfilled. The input
 
446
 * QueryItems must be in polish (prefix) notation.
 
447
 */
 
448
static void
 
449
findoprnd(QueryItem *ptr, int size)
 
450
{
 
451
        uint32          pos;
 
452
 
 
453
        pos = 0;
 
454
        findoprnd_recurse(ptr, &pos, size);
 
455
 
 
456
        if (pos != size)
 
457
                elog(ERROR, "malformed tsquery: extra nodes");
 
458
}
 
459
 
 
460
 
 
461
/*
 
462
 * Each value (operand) in the query is be passed to pushval. pushval can
 
463
 * transform the simple value to an arbitrarily complex expression using
 
464
 * pushValue and pushOperator. It must push a single value with pushValue,
 
465
 * a complete expression with all operands, or a a stopword placeholder
 
466
 * with pushStop, otherwise the prefix notation representation will be broken,
 
467
 * having an operator with no operand.
 
468
 *
 
469
 * opaque is passed on to pushval as is, pushval can use it to store its
 
470
 * private state.
 
471
 *
 
472
 * The returned query might contain QI_STOPVAL nodes. The caller is responsible
 
473
 * for cleaning them up (with clean_fakeval)
 
474
 */
 
475
TSQuery
 
476
parse_tsquery(char *buf,
 
477
                          PushFunction pushval,
 
478
                          Datum opaque,
 
479
                          bool isplain)
 
480
{
 
481
        struct TSQueryParserStateData state;
 
482
        int                     i;
 
483
        TSQuery         query;
 
484
        int                     commonlen;
 
485
        QueryItem  *ptr;
 
486
        ListCell   *cell;
 
487
 
 
488
        /* init state */
 
489
        state.buffer = buf;
 
490
        state.buf = buf;
 
491
        state.state = (isplain) ? WAITSINGLEOPERAND : WAITFIRSTOPERAND;
 
492
        state.count = 0;
 
493
        state.polstr = NIL;
 
494
 
 
495
        /* init value parser's state */
 
496
        state.valstate = init_tsvector_parser(state.buffer, true, true);
 
497
 
 
498
        /* init list of operand */
 
499
        state.sumlen = 0;
 
500
        state.lenop = 64;
 
501
        state.curop = state.op = (char *) palloc(state.lenop);
 
502
        *(state.curop) = '\0';
 
503
 
 
504
        /* parse query & make polish notation (postfix, but in reverse order) */
 
505
        makepol(&state, pushval, opaque);
 
506
 
 
507
        close_tsvector_parser(state.valstate);
 
508
 
 
509
        if (list_length(state.polstr) == 0)
 
510
        {
 
511
                ereport(NOTICE,
 
512
                                (errmsg("text-search query doesn't contain lexemes: \"%s\"",
 
513
                                                state.buffer)));
 
514
                query = (TSQuery) palloc(HDRSIZETQ);
 
515
                SET_VARSIZE(query, HDRSIZETQ);
 
516
                query->size = 0;
 
517
                return query;
 
518
        }
 
519
 
 
520
        /* Pack the QueryItems in the final TSQuery struct to return to caller */
 
521
        commonlen = COMPUTESIZE(list_length(state.polstr), state.sumlen);
 
522
        query = (TSQuery) palloc0(commonlen);
 
523
        SET_VARSIZE(query, commonlen);
 
524
        query->size = list_length(state.polstr);
 
525
        ptr = GETQUERY(query);
 
526
 
 
527
        /* Copy QueryItems to TSQuery */
 
528
        i = 0;
 
529
        foreach(cell, state.polstr)
 
530
        {
 
531
                QueryItem  *item = (QueryItem *) lfirst(cell);
 
532
 
 
533
                switch (item->type)
 
534
                {
 
535
                        case QI_VAL:
 
536
                                memcpy(&ptr[i], item, sizeof(QueryOperand));
 
537
                                break;
 
538
                        case QI_VALSTOP:
 
539
                                ptr[i].type = QI_VALSTOP;
 
540
                                break;
 
541
                        case QI_OPR:
 
542
                                memcpy(&ptr[i], item, sizeof(QueryOperator));
 
543
                                break;
 
544
                        default:
 
545
                                elog(ERROR, "unrecognized QueryItem type: %d", item->type);
 
546
                }
 
547
                i++;
 
548
        }
 
549
 
 
550
        /* Copy all the operand strings to TSQuery */
 
551
        memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
 
552
        pfree(state.op);
 
553
 
 
554
        /* Set left operand pointers for every operator. */
 
555
        findoprnd(ptr, query->size);
 
556
 
 
557
        return query;
 
558
}
 
559
 
 
560
static void
 
561
pushval_asis(Datum opaque, TSQueryParserState state, char *strval, int lenval,
 
562
                         int16 weight, bool prefix)
 
563
{
 
564
        pushValue(state, strval, lenval, weight, prefix);
 
565
}
 
566
 
 
567
/*
 
568
 * in without morphology
 
569
 */
 
570
Datum
 
571
tsqueryin(PG_FUNCTION_ARGS)
 
572
{
 
573
        char       *in = PG_GETARG_CSTRING(0);
 
574
 
 
575
        pg_verifymbstr(in, strlen(in), false);
 
576
 
 
577
        PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), false));
 
578
}
 
579
 
 
580
/*
 
581
 * out function
 
582
 */
 
583
typedef struct
 
584
{
 
585
        QueryItem  *curpol;
 
586
        char       *buf;
 
587
        char       *cur;
 
588
        char       *op;
 
589
        int                     buflen;
 
590
} INFIX;
 
591
 
 
592
/* Makes sure inf->buf is large enough for adding 'addsize' bytes */
 
593
#define RESIZEBUF(inf, addsize) \
 
594
while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
 
595
{ \
 
596
        int len = (inf)->cur - (inf)->buf; \
 
597
        (inf)->buflen *= 2; \
 
598
        (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
 
599
        (inf)->cur = (inf)->buf + len; \
 
600
}
 
601
 
 
602
/*
 
603
 * recursive walk on tree and print it in
 
604
 * infix (human-readable) view
 
605
 */
 
606
static void
 
607
infix(INFIX *in, bool first)
 
608
{
 
609
        /* since this function recurses, it could be driven to stack overflow. */
 
610
        check_stack_depth();
 
611
 
 
612
        if (in->curpol->type == QI_VAL)
 
613
        {
 
614
                QueryOperand *curpol = &in->curpol->qoperand;
 
615
                char       *op = in->op + curpol->distance;
 
616
                int                     clen;
 
617
 
 
618
                RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 6);
 
619
                *(in->cur) = '\'';
 
620
                in->cur++;
 
621
                while (*op)
 
622
                {
 
623
                        if (t_iseq(op, '\''))
 
624
                        {
 
625
                                *(in->cur) = '\'';
 
626
                                in->cur++;
 
627
                        }
 
628
                        else if (t_iseq(op, '\\'))
 
629
                        {
 
630
                                *(in->cur) = '\\';
 
631
                                in->cur++;
 
632
                        }
 
633
                        COPYCHAR(in->cur, op);
 
634
 
 
635
                        clen = pg_mblen(op);
 
636
                        op += clen;
 
637
                        in->cur += clen;
 
638
                }
 
639
                *(in->cur) = '\'';
 
640
                in->cur++;
 
641
                if (curpol->weight || curpol->prefix)
 
642
                {
 
643
                        *(in->cur) = ':';
 
644
                        in->cur++;
 
645
                        if (curpol->prefix)
 
646
                        {
 
647
                                *(in->cur) = '*';
 
648
                                in->cur++;
 
649
                        }
 
650
                        if (curpol->weight & (1 << 3))
 
651
                        {
 
652
                                *(in->cur) = 'A';
 
653
                                in->cur++;
 
654
                        }
 
655
                        if (curpol->weight & (1 << 2))
 
656
                        {
 
657
                                *(in->cur) = 'B';
 
658
                                in->cur++;
 
659
                        }
 
660
                        if (curpol->weight & (1 << 1))
 
661
                        {
 
662
                                *(in->cur) = 'C';
 
663
                                in->cur++;
 
664
                        }
 
665
                        if (curpol->weight & 1)
 
666
                        {
 
667
                                *(in->cur) = 'D';
 
668
                                in->cur++;
 
669
                        }
 
670
                }
 
671
                *(in->cur) = '\0';
 
672
                in->curpol++;
 
673
        }
 
674
        else if (in->curpol->qoperator.oper == OP_NOT)
 
675
        {
 
676
                bool            isopr = false;
 
677
 
 
678
                RESIZEBUF(in, 1);
 
679
                *(in->cur) = '!';
 
680
                in->cur++;
 
681
                *(in->cur) = '\0';
 
682
                in->curpol++;
 
683
 
 
684
                if (in->curpol->type == QI_OPR)
 
685
                {
 
686
                        isopr = true;
 
687
                        RESIZEBUF(in, 2);
 
688
                        sprintf(in->cur, "( ");
 
689
                        in->cur = strchr(in->cur, '\0');
 
690
                }
 
691
 
 
692
                infix(in, isopr);
 
693
                if (isopr)
 
694
                {
 
695
                        RESIZEBUF(in, 2);
 
696
                        sprintf(in->cur, " )");
 
697
                        in->cur = strchr(in->cur, '\0');
 
698
                }
 
699
        }
 
700
        else
 
701
        {
 
702
                int8            op = in->curpol->qoperator.oper;
 
703
                INFIX           nrm;
 
704
 
 
705
                in->curpol++;
 
706
                if (op == OP_OR && !first)
 
707
                {
 
708
                        RESIZEBUF(in, 2);
 
709
                        sprintf(in->cur, "( ");
 
710
                        in->cur = strchr(in->cur, '\0');
 
711
                }
 
712
 
 
713
                nrm.curpol = in->curpol;
 
714
                nrm.op = in->op;
 
715
                nrm.buflen = 16;
 
716
                nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
 
717
 
 
718
                /* get right operand */
 
719
                infix(&nrm, false);
 
720
 
 
721
                /* get & print left operand */
 
722
                in->curpol = nrm.curpol;
 
723
                infix(in, false);
 
724
 
 
725
                /* print operator & right operand */
 
726
                RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
 
727
                switch (op)
 
728
                {
 
729
                        case OP_OR:
 
730
                                sprintf(in->cur, " | %s", nrm.buf);
 
731
                                break;
 
732
                        case OP_AND:
 
733
                                sprintf(in->cur, " & %s", nrm.buf);
 
734
                                break;
 
735
                        default:
 
736
                                /* OP_NOT is handled in above if-branch */
 
737
                                elog(ERROR, "unrecognized operator type: %d", op);
 
738
                }
 
739
                in->cur = strchr(in->cur, '\0');
 
740
                pfree(nrm.buf);
 
741
 
 
742
                if (op == OP_OR && !first)
 
743
                {
 
744
                        RESIZEBUF(in, 2);
 
745
                        sprintf(in->cur, " )");
 
746
                        in->cur = strchr(in->cur, '\0');
 
747
                }
 
748
        }
 
749
}
 
750
 
 
751
 
 
752
Datum
 
753
tsqueryout(PG_FUNCTION_ARGS)
 
754
{
 
755
        TSQuery         query = PG_GETARG_TSQUERY(0);
 
756
        INFIX           nrm;
 
757
 
 
758
        if (query->size == 0)
 
759
        {
 
760
                char       *b = palloc(1);
 
761
 
 
762
                *b = '\0';
 
763
                PG_RETURN_POINTER(b);
 
764
        }
 
765
        nrm.curpol = GETQUERY(query);
 
766
        nrm.buflen = 32;
 
767
        nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
 
768
        *(nrm.cur) = '\0';
 
769
        nrm.op = GETOPERAND(query);
 
770
        infix(&nrm, true);
 
771
 
 
772
        PG_FREE_IF_COPY(query, 0);
 
773
        PG_RETURN_CSTRING(nrm.buf);
 
774
}
 
775
 
 
776
/*
 
777
 * Binary Input / Output functions. The binary format is as follows:
 
778
 *
 
779
 * uint32        number of operators/operands in the query
 
780
 *
 
781
 * Followed by the operators and operands, in prefix notation. For each
 
782
 * operand:
 
783
 *
 
784
 * uint8        type, QI_VAL
 
785
 * uint8        weight
 
786
 *                      operand text in client encoding, null-terminated
 
787
 * uint8        prefix
 
788
 *
 
789
 * For each operator:
 
790
 * uint8        type, QI_OPR
 
791
 * uint8        operator, one of OP_AND, OP_OR, OP_NOT.
 
792
 */
 
793
Datum
 
794
tsquerysend(PG_FUNCTION_ARGS)
 
795
{
 
796
        TSQuery         query = PG_GETARG_TSQUERY(0);
 
797
        StringInfoData buf;
 
798
        int                     i;
 
799
        QueryItem  *item = GETQUERY(query);
 
800
 
 
801
        pq_begintypsend(&buf);
 
802
 
 
803
        pq_sendint(&buf, query->size, sizeof(uint32));
 
804
        for (i = 0; i < query->size; i++)
 
805
        {
 
806
                pq_sendint(&buf, item->type, sizeof(item->type));
 
807
 
 
808
                switch (item->type)
 
809
                {
 
810
                        case QI_VAL:
 
811
                                pq_sendint(&buf, item->qoperand.weight, sizeof(uint8));
 
812
                                pq_sendint(&buf, item->qoperand.prefix, sizeof(uint8));
 
813
                                pq_sendstring(&buf, GETOPERAND(query) + item->qoperand.distance);
 
814
                                break;
 
815
                        case QI_OPR:
 
816
                                pq_sendint(&buf, item->qoperator.oper, sizeof(item->qoperator.oper));
 
817
                                break;
 
818
                        default:
 
819
                                elog(ERROR, "unrecognized tsquery node type: %d", item->type);
 
820
                }
 
821
                item++;
 
822
        }
 
823
 
 
824
        PG_FREE_IF_COPY(query, 0);
 
825
 
 
826
        PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
 
827
}
 
828
 
 
829
Datum
 
830
tsqueryrecv(PG_FUNCTION_ARGS)
 
831
{
 
832
        StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
 
833
        TSQuery         query;
 
834
        int                     i,
 
835
                                len;
 
836
        QueryItem  *item;
 
837
        int                     datalen;
 
838
        char       *ptr;
 
839
        uint32          size;
 
840
        const char **operands;
 
841
 
 
842
        size = pq_getmsgint(buf, sizeof(uint32));
 
843
        if (size > (MaxAllocSize / sizeof(QueryItem)))
 
844
                elog(ERROR, "invalid size of tsquery");
 
845
 
 
846
        /* Allocate space to temporarily hold operand strings */
 
847
        operands = palloc(size * sizeof(char *));
 
848
 
 
849
        /* Allocate space for all the QueryItems. */
 
850
        len = HDRSIZETQ + sizeof(QueryItem) * size;
 
851
        query = (TSQuery) palloc0(len);
 
852
        query->size = size;
 
853
        item = GETQUERY(query);
 
854
 
 
855
        datalen = 0;
 
856
        for (i = 0; i < size; i++)
 
857
        {
 
858
                item->type = (int8) pq_getmsgint(buf, sizeof(int8));
 
859
 
 
860
                if (item->type == QI_VAL)
 
861
                {
 
862
                        size_t          val_len;        /* length after recoding to server encoding */
 
863
                        uint8           weight;
 
864
                        uint8           prefix;
 
865
                        const char *val;
 
866
                        pg_crc32        valcrc;
 
867
 
 
868
                        weight = (uint8) pq_getmsgint(buf, sizeof(uint8));
 
869
                        prefix = (uint8) pq_getmsgint(buf, sizeof(uint8));
 
870
                        val = pq_getmsgstring(buf);
 
871
                        val_len = strlen(val);
 
872
 
 
873
                        /* Sanity checks */
 
874
 
 
875
                        if (weight > 0xF)
 
876
                                elog(ERROR, "invalid tsquery: invalid weight bitmap");
 
877
 
 
878
                        if (val_len > MAXSTRLEN)
 
879
                                elog(ERROR, "invalid tsquery: operand too long");
 
880
 
 
881
                        if (datalen > MAXSTRPOS)
 
882
                                elog(ERROR, "invalid tsquery: total operand length exceeded");
 
883
 
 
884
                        /* Looks valid. */
 
885
 
 
886
                        INIT_CRC32(valcrc);
 
887
                        COMP_CRC32(valcrc, val, val_len);
 
888
                        FIN_CRC32(valcrc);
 
889
 
 
890
                        item->qoperand.weight = weight;
 
891
                        item->qoperand.prefix = (prefix) ? true : false;
 
892
                        item->qoperand.valcrc = (int32) valcrc;
 
893
                        item->qoperand.length = val_len;
 
894
                        item->qoperand.distance = datalen;
 
895
 
 
896
                        /*
 
897
                         * Operand strings are copied to the final struct after this loop;
 
898
                         * here we just collect them to an array
 
899
                         */
 
900
                        operands[i] = val;
 
901
 
 
902
                        datalen += val_len + 1;         /* + 1 for the '\0' terminator */
 
903
                }
 
904
                else if (item->type == QI_OPR)
 
905
                {
 
906
                        int8            oper;
 
907
 
 
908
                        oper = (int8) pq_getmsgint(buf, sizeof(int8));
 
909
                        if (oper != OP_NOT && oper != OP_OR && oper != OP_AND)
 
910
                                elog(ERROR, "invalid tsquery: unrecognized operator type %d",
 
911
                                         (int) oper);
 
912
                        if (i == size - 1)
 
913
                                elog(ERROR, "invalid pointer to right operand");
 
914
 
 
915
                        item->qoperator.oper = oper;
 
916
                }
 
917
                else
 
918
                        elog(ERROR, "unrecognized tsquery node type: %d", item->type);
 
919
 
 
920
                item++;
 
921
        }
 
922
 
 
923
        /* Enlarge buffer to make room for the operand values. */
 
924
        query = (TSQuery) repalloc(query, len + datalen);
 
925
        item = GETQUERY(query);
 
926
        ptr = GETOPERAND(query);
 
927
 
 
928
        /*
 
929
         * Fill in the left-pointers. Checks that the tree is well-formed as a
 
930
         * side-effect.
 
931
         */
 
932
        findoprnd(item, size);
 
933
 
 
934
        /* Copy operands to output struct */
 
935
        for (i = 0; i < size; i++)
 
936
        {
 
937
                if (item->type == QI_VAL)
 
938
                {
 
939
                        memcpy(ptr, operands[i], item->qoperand.length + 1);
 
940
                        ptr += item->qoperand.length + 1;
 
941
                }
 
942
                item++;
 
943
        }
 
944
 
 
945
        pfree(operands);
 
946
 
 
947
        Assert(ptr - GETOPERAND(query) == datalen);
 
948
 
 
949
        SET_VARSIZE(query, len + datalen);
 
950
 
 
951
        PG_RETURN_TSVECTOR(query);
 
952
}
 
953
 
 
954
/*
 
955
 * debug function, used only for view query
 
956
 * which will be executed in non-leaf pages in index
 
957
 */
 
958
Datum
 
959
tsquerytree(PG_FUNCTION_ARGS)
 
960
{
 
961
        TSQuery         query = PG_GETARG_TSQUERY(0);
 
962
        INFIX           nrm;
 
963
        text       *res;
 
964
        QueryItem  *q;
 
965
        int                     len;
 
966
 
 
967
        if (query->size == 0)
 
968
        {
 
969
                res = (text *) palloc(VARHDRSZ);
 
970
                SET_VARSIZE(res, VARHDRSZ);
 
971
                PG_RETURN_POINTER(res);
 
972
        }
 
973
 
 
974
        q = clean_NOT(GETQUERY(query), &len);
 
975
 
 
976
        if (!q)
 
977
        {
 
978
                res = cstring_to_text("T");
 
979
        }
 
980
        else
 
981
        {
 
982
                nrm.curpol = q;
 
983
                nrm.buflen = 32;
 
984
                nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
 
985
                *(nrm.cur) = '\0';
 
986
                nrm.op = GETOPERAND(query);
 
987
                infix(&nrm, true);
 
988
                res = cstring_to_text_with_len(nrm.buf, nrm.cur - nrm.buf);
 
989
                pfree(q);
 
990
        }
 
991
 
 
992
        PG_FREE_IF_COPY(query, 0);
 
993
 
 
994
        PG_RETURN_TEXT_P(res);
 
995
}