~ubuntu-branches/ubuntu/oneiric/postgresql-9.1/oneiric-security

« back to all changes in this revision

Viewing changes to src/backend/tsearch/to_tsany.c

  • Committer: Bazaar Package Importer
  • Author(s): Martin Pitt
  • Date: 2011-05-11 10:41:53 UTC
  • Revision ID: james.westby@ubuntu.com-20110511104153-psbh2o58553fv1m0
Tags: upstream-9.1~beta1
ImportĀ upstreamĀ versionĀ 9.1~beta1

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*-------------------------------------------------------------------------
 
2
 *
 
3
 * to_tsany.c
 
4
 *              to_ts* function definitions
 
5
 *
 
6
 * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
 
7
 *
 
8
 *
 
9
 * IDENTIFICATION
 
10
 *        src/backend/tsearch/to_tsany.c
 
11
 *
 
12
 *-------------------------------------------------------------------------
 
13
 */
 
14
#include "postgres.h"
 
15
 
 
16
#include "catalog/namespace.h"
 
17
#include "tsearch/ts_cache.h"
 
18
#include "tsearch/ts_utils.h"
 
19
#include "utils/builtins.h"
 
20
#include "utils/syscache.h"
 
21
 
 
22
 
 
23
Datum
 
24
get_current_ts_config(PG_FUNCTION_ARGS)
 
25
{
 
26
        PG_RETURN_OID(getTSCurrentConfig(true));
 
27
}
 
28
 
 
29
/*
 
30
 * to_tsvector
 
31
 */
 
32
static int
 
33
compareWORD(const void *a, const void *b)
 
34
{
 
35
        int                     res;
 
36
 
 
37
        res = tsCompareString(
 
38
                                                  ((ParsedWord *) a)->word, ((ParsedWord *) a)->len,
 
39
                                                  ((ParsedWord *) b)->word, ((ParsedWord *) b)->len,
 
40
                                                  false);
 
41
 
 
42
        if (res == 0)
 
43
        {
 
44
                if (((ParsedWord *) a)->pos.pos == ((ParsedWord *) b)->pos.pos)
 
45
                        return 0;
 
46
 
 
47
                res = (((ParsedWord *) a)->pos.pos > ((ParsedWord *) b)->pos.pos) ? 1 : -1;
 
48
        }
 
49
 
 
50
        return res;
 
51
}
 
52
 
 
53
static int
 
54
uniqueWORD(ParsedWord *a, int4 l)
 
55
{
 
56
        ParsedWord *ptr,
 
57
                           *res;
 
58
        int                     tmppos;
 
59
 
 
60
        if (l == 1)
 
61
        {
 
62
                tmppos = LIMITPOS(a->pos.pos);
 
63
                a->alen = 2;
 
64
                a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen);
 
65
                a->pos.apos[0] = 1;
 
66
                a->pos.apos[1] = tmppos;
 
67
                return l;
 
68
        }
 
69
 
 
70
        res = a;
 
71
        ptr = a + 1;
 
72
 
 
73
        /*
 
74
         * Sort words with its positions
 
75
         */
 
76
        qsort((void *) a, l, sizeof(ParsedWord), compareWORD);
 
77
 
 
78
        /*
 
79
         * Initialize first word and its first position
 
80
         */
 
81
        tmppos = LIMITPOS(a->pos.pos);
 
82
        a->alen = 2;
 
83
        a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen);
 
84
        a->pos.apos[0] = 1;
 
85
        a->pos.apos[1] = tmppos;
 
86
 
 
87
        /*
 
88
         * Summarize position information for each word
 
89
         */
 
90
        while (ptr - a < l)
 
91
        {
 
92
                if (!(ptr->len == res->len &&
 
93
                          strncmp(ptr->word, res->word, res->len) == 0))
 
94
                {
 
95
                        /*
 
96
                         * Got a new word, so put it in result
 
97
                         */
 
98
                        res++;
 
99
                        res->len = ptr->len;
 
100
                        res->word = ptr->word;
 
101
                        tmppos = LIMITPOS(ptr->pos.pos);
 
102
                        res->alen = 2;
 
103
                        res->pos.apos = (uint16 *) palloc(sizeof(uint16) * res->alen);
 
104
                        res->pos.apos[0] = 1;
 
105
                        res->pos.apos[1] = tmppos;
 
106
                }
 
107
                else
 
108
                {
 
109
                        /*
 
110
                         * The word already exists, so adjust position information. But
 
111
                         * before we should check size of position's array, max allowed
 
112
                         * value for position and uniqueness of position
 
113
                         */
 
114
                        pfree(ptr->word);
 
115
                        if (res->pos.apos[0] < MAXNUMPOS - 1 && res->pos.apos[res->pos.apos[0]] != MAXENTRYPOS - 1 &&
 
116
                                res->pos.apos[res->pos.apos[0]] != LIMITPOS(ptr->pos.pos))
 
117
                        {
 
118
                                if (res->pos.apos[0] + 1 >= res->alen)
 
119
                                {
 
120
                                        res->alen *= 2;
 
121
                                        res->pos.apos = (uint16 *) repalloc(res->pos.apos, sizeof(uint16) * res->alen);
 
122
                                }
 
123
                                if (res->pos.apos[0] == 0 || res->pos.apos[res->pos.apos[0]] != LIMITPOS(ptr->pos.pos))
 
124
                                {
 
125
                                        res->pos.apos[res->pos.apos[0] + 1] = LIMITPOS(ptr->pos.pos);
 
126
                                        res->pos.apos[0]++;
 
127
                                }
 
128
                        }
 
129
                }
 
130
                ptr++;
 
131
        }
 
132
 
 
133
        return res + 1 - a;
 
134
}
 
135
 
 
136
/*
 
137
 * make value of tsvector, given parsed text
 
138
 */
 
139
TSVector
 
140
make_tsvector(ParsedText *prs)
 
141
{
 
142
        int                     i,
 
143
                                j,
 
144
                                lenstr = 0,
 
145
                                totallen;
 
146
        TSVector        in;
 
147
        WordEntry  *ptr;
 
148
        char       *str;
 
149
        int                     stroff;
 
150
 
 
151
        prs->curwords = uniqueWORD(prs->words, prs->curwords);
 
152
        for (i = 0; i < prs->curwords; i++)
 
153
        {
 
154
                lenstr += prs->words[i].len;
 
155
                if (prs->words[i].alen)
 
156
                {
 
157
                        lenstr = SHORTALIGN(lenstr);
 
158
                        lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
 
159
                }
 
160
        }
 
161
 
 
162
        if (lenstr > MAXSTRPOS)
 
163
                ereport(ERROR,
 
164
                                (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 
165
                                 errmsg("string is too long for tsvector (%d bytes, max %d bytes)", lenstr, MAXSTRPOS)));
 
166
 
 
167
        totallen = CALCDATASIZE(prs->curwords, lenstr);
 
168
        in = (TSVector) palloc0(totallen);
 
169
        SET_VARSIZE(in, totallen);
 
170
        in->size = prs->curwords;
 
171
 
 
172
        ptr = ARRPTR(in);
 
173
        str = STRPTR(in);
 
174
        stroff = 0;
 
175
        for (i = 0; i < prs->curwords; i++)
 
176
        {
 
177
                ptr->len = prs->words[i].len;
 
178
                ptr->pos = stroff;
 
179
                memcpy(str + stroff, prs->words[i].word, prs->words[i].len);
 
180
                stroff += prs->words[i].len;
 
181
                pfree(prs->words[i].word);
 
182
                if (prs->words[i].alen)
 
183
                {
 
184
                        int                     k = prs->words[i].pos.apos[0];
 
185
                        WordEntryPos *wptr;
 
186
 
 
187
                        if (k > 0xFFFF)
 
188
                                elog(ERROR, "positions array too long");
 
189
 
 
190
                        ptr->haspos = 1;
 
191
                        stroff = SHORTALIGN(stroff);
 
192
                        *(uint16 *) (str + stroff) = (uint16) k;
 
193
                        wptr = POSDATAPTR(in, ptr);
 
194
                        for (j = 0; j < k; j++)
 
195
                        {
 
196
                                WEP_SETWEIGHT(wptr[j], 0);
 
197
                                WEP_SETPOS(wptr[j], prs->words[i].pos.apos[j + 1]);
 
198
                        }
 
199
                        stroff += sizeof(uint16) + k * sizeof(WordEntryPos);
 
200
                        pfree(prs->words[i].pos.apos);
 
201
                }
 
202
                else
 
203
                        ptr->haspos = 0;
 
204
                ptr++;
 
205
        }
 
206
        pfree(prs->words);
 
207
        return in;
 
208
}
 
209
 
 
210
Datum
 
211
to_tsvector_byid(PG_FUNCTION_ARGS)
 
212
{
 
213
        Oid                     cfgId = PG_GETARG_OID(0);
 
214
        text       *in = PG_GETARG_TEXT_P(1);
 
215
        ParsedText      prs;
 
216
        TSVector        out;
 
217
 
 
218
        prs.lenwords = (VARSIZE(in) - VARHDRSZ) / 6;            /* just estimation of
 
219
                                                                                                                 * word's number */
 
220
        if (prs.lenwords == 0)
 
221
                prs.lenwords = 2;
 
222
        prs.curwords = 0;
 
223
        prs.pos = 0;
 
224
        prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
 
225
 
 
226
        parsetext(cfgId, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
 
227
        PG_FREE_IF_COPY(in, 1);
 
228
 
 
229
        if (prs.curwords)
 
230
                out = make_tsvector(&prs);
 
231
        else
 
232
        {
 
233
                pfree(prs.words);
 
234
                out = palloc(CALCDATASIZE(0, 0));
 
235
                SET_VARSIZE(out, CALCDATASIZE(0, 0));
 
236
                out->size = 0;
 
237
        }
 
238
 
 
239
        PG_RETURN_POINTER(out);
 
240
}
 
241
 
 
242
Datum
 
243
to_tsvector(PG_FUNCTION_ARGS)
 
244
{
 
245
        text       *in = PG_GETARG_TEXT_P(0);
 
246
        Oid                     cfgId;
 
247
 
 
248
        cfgId = getTSCurrentConfig(true);
 
249
        PG_RETURN_DATUM(DirectFunctionCall2(to_tsvector_byid,
 
250
                                                                                ObjectIdGetDatum(cfgId),
 
251
                                                                                PointerGetDatum(in)));
 
252
}
 
253
 
 
254
/*
 
255
 * to_tsquery
 
256
 */
 
257
 
 
258
 
 
259
/*
 
260
 * This function is used for morph parsing.
 
261
 *
 
262
 * The value is passed to parsetext which will call the right dictionary to
 
263
 * lexize the word. If it turns out to be a stopword, we push a QI_VALSTOP
 
264
 * to the stack.
 
265
 *
 
266
 * All words belonging to the same variant are pushed as an ANDed list,
 
267
 * and different variants are ORred together.
 
268
 */
 
269
static void
 
270
pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval, int2 weight, bool prefix)
 
271
{
 
272
        int4            count = 0;
 
273
        ParsedText      prs;
 
274
        uint32          variant,
 
275
                                pos,
 
276
                                cntvar = 0,
 
277
                                cntpos = 0,
 
278
                                cnt = 0;
 
279
        Oid                     cfg_id = DatumGetObjectId(opaque);              /* the input is actually
 
280
                                                                                                                 * an Oid, not a pointer */
 
281
 
 
282
        prs.lenwords = 4;
 
283
        prs.curwords = 0;
 
284
        prs.pos = 0;
 
285
        prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
 
286
 
 
287
        parsetext(cfg_id, &prs, strval, lenval);
 
288
 
 
289
        if (prs.curwords > 0)
 
290
        {
 
291
 
 
292
                while (count < prs.curwords)
 
293
                {
 
294
                        pos = prs.words[count].pos.pos;
 
295
                        cntvar = 0;
 
296
                        while (count < prs.curwords && pos == prs.words[count].pos.pos)
 
297
                        {
 
298
                                variant = prs.words[count].nvariant;
 
299
 
 
300
                                cnt = 0;
 
301
                                while (count < prs.curwords && pos == prs.words[count].pos.pos && variant == prs.words[count].nvariant)
 
302
                                {
 
303
 
 
304
                                        pushValue(state, prs.words[count].word, prs.words[count].len, weight,
 
305
                                                          ((prs.words[count].flags & TSL_PREFIX) || prefix) ? true : false);
 
306
                                        pfree(prs.words[count].word);
 
307
                                        if (cnt)
 
308
                                                pushOperator(state, OP_AND);
 
309
                                        cnt++;
 
310
                                        count++;
 
311
                                }
 
312
 
 
313
                                if (cntvar)
 
314
                                        pushOperator(state, OP_OR);
 
315
                                cntvar++;
 
316
                        }
 
317
 
 
318
                        if (cntpos)
 
319
                                pushOperator(state, OP_AND);
 
320
 
 
321
                        cntpos++;
 
322
                }
 
323
 
 
324
                pfree(prs.words);
 
325
 
 
326
        }
 
327
        else
 
328
                pushStop(state);
 
329
}
 
330
 
 
331
Datum
 
332
to_tsquery_byid(PG_FUNCTION_ARGS)
 
333
{
 
334
        Oid                     cfgid = PG_GETARG_OID(0);
 
335
        text       *in = PG_GETARG_TEXT_P(1);
 
336
        TSQuery         query;
 
337
        QueryItem  *res;
 
338
        int4            len;
 
339
 
 
340
        query = parse_tsquery(text_to_cstring(in), pushval_morph, ObjectIdGetDatum(cfgid), false);
 
341
 
 
342
        if (query->size == 0)
 
343
                PG_RETURN_TSQUERY(query);
 
344
 
 
345
        res = clean_fakeval(GETQUERY(query), &len);
 
346
        if (!res)
 
347
        {
 
348
                SET_VARSIZE(query, HDRSIZETQ);
 
349
                query->size = 0;
 
350
                PG_RETURN_POINTER(query);
 
351
        }
 
352
        memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(QueryItem));
 
353
 
 
354
        if (len != query->size)
 
355
        {
 
356
                char       *oldoperand = GETOPERAND(query);
 
357
                int4            lenoperand = VARSIZE(query) - (oldoperand - (char *) query);
 
358
 
 
359
                Assert(len < query->size);
 
360
 
 
361
                query->size = len;
 
362
                memcpy((void *) GETOPERAND(query), oldoperand, VARSIZE(query) - (oldoperand - (char *) query));
 
363
                SET_VARSIZE(query, COMPUTESIZE(len, lenoperand));
 
364
        }
 
365
 
 
366
        pfree(res);
 
367
        PG_RETURN_TSQUERY(query);
 
368
}
 
369
 
 
370
Datum
 
371
to_tsquery(PG_FUNCTION_ARGS)
 
372
{
 
373
        text       *in = PG_GETARG_TEXT_P(0);
 
374
        Oid                     cfgId;
 
375
 
 
376
        cfgId = getTSCurrentConfig(true);
 
377
        PG_RETURN_DATUM(DirectFunctionCall2(to_tsquery_byid,
 
378
                                                                                ObjectIdGetDatum(cfgId),
 
379
                                                                                PointerGetDatum(in)));
 
380
}
 
381
 
 
382
Datum
 
383
plainto_tsquery_byid(PG_FUNCTION_ARGS)
 
384
{
 
385
        Oid                     cfgid = PG_GETARG_OID(0);
 
386
        text       *in = PG_GETARG_TEXT_P(1);
 
387
        TSQuery         query;
 
388
        QueryItem  *res;
 
389
        int4            len;
 
390
 
 
391
        query = parse_tsquery(text_to_cstring(in), pushval_morph, ObjectIdGetDatum(cfgid), true);
 
392
 
 
393
        if (query->size == 0)
 
394
                PG_RETURN_TSQUERY(query);
 
395
 
 
396
        res = clean_fakeval(GETQUERY(query), &len);
 
397
        if (!res)
 
398
        {
 
399
                SET_VARSIZE(query, HDRSIZETQ);
 
400
                query->size = 0;
 
401
                PG_RETURN_POINTER(query);
 
402
        }
 
403
        memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(QueryItem));
 
404
 
 
405
        if (len != query->size)
 
406
        {
 
407
                char       *oldoperand = GETOPERAND(query);
 
408
                int4            lenoperand = VARSIZE(query) - (oldoperand - (char *) query);
 
409
 
 
410
                Assert(len < query->size);
 
411
 
 
412
                query->size = len;
 
413
                memcpy((void *) GETOPERAND(query), oldoperand, lenoperand);
 
414
                SET_VARSIZE(query, COMPUTESIZE(len, lenoperand));
 
415
        }
 
416
 
 
417
        pfree(res);
 
418
        PG_RETURN_POINTER(query);
 
419
}
 
420
 
 
421
Datum
 
422
plainto_tsquery(PG_FUNCTION_ARGS)
 
423
{
 
424
        text       *in = PG_GETARG_TEXT_P(0);
 
425
        Oid                     cfgId;
 
426
 
 
427
        cfgId = getTSCurrentConfig(true);
 
428
        PG_RETURN_DATUM(DirectFunctionCall2(plainto_tsquery_byid,
 
429
                                                                                ObjectIdGetDatum(cfgId),
 
430
                                                                                PointerGetDatum(in)));
 
431
}