1
/*-------------------------------------------------------------------------
4
* to_ts* function definitions
6
* Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
10
* src/backend/tsearch/to_tsany.c
12
*-------------------------------------------------------------------------
16
#include "catalog/namespace.h"
17
#include "tsearch/ts_cache.h"
18
#include "tsearch/ts_utils.h"
19
#include "utils/builtins.h"
20
#include "utils/syscache.h"
24
get_current_ts_config(PG_FUNCTION_ARGS)
26
PG_RETURN_OID(getTSCurrentConfig(true));
33
compareWORD(const void *a, const void *b)
37
res = tsCompareString(
38
((ParsedWord *) a)->word, ((ParsedWord *) a)->len,
39
((ParsedWord *) b)->word, ((ParsedWord *) b)->len,
44
if (((ParsedWord *) a)->pos.pos == ((ParsedWord *) b)->pos.pos)
47
res = (((ParsedWord *) a)->pos.pos > ((ParsedWord *) b)->pos.pos) ? 1 : -1;
54
uniqueWORD(ParsedWord *a, int4 l)
62
tmppos = LIMITPOS(a->pos.pos);
64
a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen);
66
a->pos.apos[1] = tmppos;
74
* Sort words with its positions
76
qsort((void *) a, l, sizeof(ParsedWord), compareWORD);
79
* Initialize first word and its first position
81
tmppos = LIMITPOS(a->pos.pos);
83
a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen);
85
a->pos.apos[1] = tmppos;
88
* Summarize position information for each word
92
if (!(ptr->len == res->len &&
93
strncmp(ptr->word, res->word, res->len) == 0))
96
* Got a new word, so put it in result
100
res->word = ptr->word;
101
tmppos = LIMITPOS(ptr->pos.pos);
103
res->pos.apos = (uint16 *) palloc(sizeof(uint16) * res->alen);
104
res->pos.apos[0] = 1;
105
res->pos.apos[1] = tmppos;
110
* The word already exists, so adjust position information. But
111
* before we should check size of position's array, max allowed
112
* value for position and uniqueness of position
115
if (res->pos.apos[0] < MAXNUMPOS - 1 && res->pos.apos[res->pos.apos[0]] != MAXENTRYPOS - 1 &&
116
res->pos.apos[res->pos.apos[0]] != LIMITPOS(ptr->pos.pos))
118
if (res->pos.apos[0] + 1 >= res->alen)
121
res->pos.apos = (uint16 *) repalloc(res->pos.apos, sizeof(uint16) * res->alen);
123
if (res->pos.apos[0] == 0 || res->pos.apos[res->pos.apos[0]] != LIMITPOS(ptr->pos.pos))
125
res->pos.apos[res->pos.apos[0] + 1] = LIMITPOS(ptr->pos.pos);
137
* make value of tsvector, given parsed text
140
make_tsvector(ParsedText *prs)
151
prs->curwords = uniqueWORD(prs->words, prs->curwords);
152
for (i = 0; i < prs->curwords; i++)
154
lenstr += prs->words[i].len;
155
if (prs->words[i].alen)
157
lenstr = SHORTALIGN(lenstr);
158
lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
162
if (lenstr > MAXSTRPOS)
164
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
165
errmsg("string is too long for tsvector (%d bytes, max %d bytes)", lenstr, MAXSTRPOS)));
167
totallen = CALCDATASIZE(prs->curwords, lenstr);
168
in = (TSVector) palloc0(totallen);
169
SET_VARSIZE(in, totallen);
170
in->size = prs->curwords;
175
for (i = 0; i < prs->curwords; i++)
177
ptr->len = prs->words[i].len;
179
memcpy(str + stroff, prs->words[i].word, prs->words[i].len);
180
stroff += prs->words[i].len;
181
pfree(prs->words[i].word);
182
if (prs->words[i].alen)
184
int k = prs->words[i].pos.apos[0];
188
elog(ERROR, "positions array too long");
191
stroff = SHORTALIGN(stroff);
192
*(uint16 *) (str + stroff) = (uint16) k;
193
wptr = POSDATAPTR(in, ptr);
194
for (j = 0; j < k; j++)
196
WEP_SETWEIGHT(wptr[j], 0);
197
WEP_SETPOS(wptr[j], prs->words[i].pos.apos[j + 1]);
199
stroff += sizeof(uint16) + k * sizeof(WordEntryPos);
200
pfree(prs->words[i].pos.apos);
211
to_tsvector_byid(PG_FUNCTION_ARGS)
213
Oid cfgId = PG_GETARG_OID(0);
214
text *in = PG_GETARG_TEXT_P(1);
218
prs.lenwords = (VARSIZE(in) - VARHDRSZ) / 6; /* just estimation of
220
if (prs.lenwords == 0)
224
prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
226
parsetext(cfgId, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
227
PG_FREE_IF_COPY(in, 1);
230
out = make_tsvector(&prs);
234
out = palloc(CALCDATASIZE(0, 0));
235
SET_VARSIZE(out, CALCDATASIZE(0, 0));
239
PG_RETURN_POINTER(out);
243
to_tsvector(PG_FUNCTION_ARGS)
245
text *in = PG_GETARG_TEXT_P(0);
248
cfgId = getTSCurrentConfig(true);
249
PG_RETURN_DATUM(DirectFunctionCall2(to_tsvector_byid,
250
ObjectIdGetDatum(cfgId),
251
PointerGetDatum(in)));
260
* This function is used for morph parsing.
262
* The value is passed to parsetext which will call the right dictionary to
263
* lexize the word. If it turns out to be a stopword, we push a QI_VALSTOP
266
* All words belonging to the same variant are pushed as an ANDed list,
267
* and different variants are ORred together.
270
pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval, int2 weight, bool prefix)
279
Oid cfg_id = DatumGetObjectId(opaque); /* the input is actually
280
* an Oid, not a pointer */
285
prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
287
parsetext(cfg_id, &prs, strval, lenval);
289
if (prs.curwords > 0)
292
while (count < prs.curwords)
294
pos = prs.words[count].pos.pos;
296
while (count < prs.curwords && pos == prs.words[count].pos.pos)
298
variant = prs.words[count].nvariant;
301
while (count < prs.curwords && pos == prs.words[count].pos.pos && variant == prs.words[count].nvariant)
304
pushValue(state, prs.words[count].word, prs.words[count].len, weight,
305
((prs.words[count].flags & TSL_PREFIX) || prefix) ? true : false);
306
pfree(prs.words[count].word);
308
pushOperator(state, OP_AND);
314
pushOperator(state, OP_OR);
319
pushOperator(state, OP_AND);
332
to_tsquery_byid(PG_FUNCTION_ARGS)
334
Oid cfgid = PG_GETARG_OID(0);
335
text *in = PG_GETARG_TEXT_P(1);
340
query = parse_tsquery(text_to_cstring(in), pushval_morph, ObjectIdGetDatum(cfgid), false);
342
if (query->size == 0)
343
PG_RETURN_TSQUERY(query);
345
res = clean_fakeval(GETQUERY(query), &len);
348
SET_VARSIZE(query, HDRSIZETQ);
350
PG_RETURN_POINTER(query);
352
memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(QueryItem));
354
if (len != query->size)
356
char *oldoperand = GETOPERAND(query);
357
int4 lenoperand = VARSIZE(query) - (oldoperand - (char *) query);
359
Assert(len < query->size);
362
memcpy((void *) GETOPERAND(query), oldoperand, VARSIZE(query) - (oldoperand - (char *) query));
363
SET_VARSIZE(query, COMPUTESIZE(len, lenoperand));
367
PG_RETURN_TSQUERY(query);
371
to_tsquery(PG_FUNCTION_ARGS)
373
text *in = PG_GETARG_TEXT_P(0);
376
cfgId = getTSCurrentConfig(true);
377
PG_RETURN_DATUM(DirectFunctionCall2(to_tsquery_byid,
378
ObjectIdGetDatum(cfgId),
379
PointerGetDatum(in)));
383
plainto_tsquery_byid(PG_FUNCTION_ARGS)
385
Oid cfgid = PG_GETARG_OID(0);
386
text *in = PG_GETARG_TEXT_P(1);
391
query = parse_tsquery(text_to_cstring(in), pushval_morph, ObjectIdGetDatum(cfgid), true);
393
if (query->size == 0)
394
PG_RETURN_TSQUERY(query);
396
res = clean_fakeval(GETQUERY(query), &len);
399
SET_VARSIZE(query, HDRSIZETQ);
401
PG_RETURN_POINTER(query);
403
memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(QueryItem));
405
if (len != query->size)
407
char *oldoperand = GETOPERAND(query);
408
int4 lenoperand = VARSIZE(query) - (oldoperand - (char *) query);
410
Assert(len < query->size);
413
memcpy((void *) GETOPERAND(query), oldoperand, lenoperand);
414
SET_VARSIZE(query, COMPUTESIZE(len, lenoperand));
418
PG_RETURN_POINTER(query);
422
plainto_tsquery(PG_FUNCTION_ARGS)
424
text *in = PG_GETARG_TEXT_P(0);
427
cfgId = getTSCurrentConfig(true);
428
PG_RETURN_DATUM(DirectFunctionCall2(plainto_tsquery_byid,
429
ObjectIdGetDatum(cfgId),
430
PointerGetDatum(in)));