~ubuntu-branches/debian/sid/postgresql-9.3/sid

« back to all changes in this revision

Viewing changes to src/backend/tsearch/dict_synonym.c

  • Committer: Package Import Robot
  • Author(s): Martin Pitt
  • Date: 2013-05-08 05:39:52 UTC
  • Revision ID: package-import@ubuntu.com-20130508053952-1j7uilp7mjtrvq8q
Tags: upstream-9.3~beta1
ImportĀ upstreamĀ versionĀ 9.3~beta1

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*-------------------------------------------------------------------------
 
2
 *
 
3
 * dict_synonym.c
 
4
 *              Synonym dictionary: replace word by its synonym
 
5
 *
 
6
 * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group
 
7
 *
 
8
 *
 
9
 * IDENTIFICATION
 
10
 *        src/backend/tsearch/dict_synonym.c
 
11
 *
 
12
 *-------------------------------------------------------------------------
 
13
 */
 
14
#include "postgres.h"
 
15
 
 
16
#include "commands/defrem.h"
 
17
#include "tsearch/ts_locale.h"
 
18
#include "tsearch/ts_utils.h"
 
19
 
 
20
typedef struct
 
21
{
 
22
        char       *in;
 
23
        char       *out;
 
24
        int                     outlen;
 
25
        uint16          flags;
 
26
} Syn;
 
27
 
 
28
typedef struct
 
29
{
 
30
        int                     len;                    /* length of syn array */
 
31
        Syn                *syn;
 
32
        bool            case_sensitive;
 
33
} DictSyn;
 
34
 
 
35
/*
 
36
 * Finds the next whitespace-delimited word within the 'in' string.
 
37
 * Returns a pointer to the first character of the word, and a pointer
 
38
 * to the next byte after the last character in the word (in *end).
 
39
 * Character '*' at the end of word will not be threated as word
 
40
 * charater if flags is not null.
 
41
 */
 
42
static char *
 
43
findwrd(char *in, char **end, uint16 *flags)
 
44
{
 
45
        char       *start;
 
46
        char       *lastchar;
 
47
 
 
48
        /* Skip leading spaces */
 
49
        while (*in && t_isspace(in))
 
50
                in += pg_mblen(in);
 
51
 
 
52
        /* Return NULL on empty lines */
 
53
        if (*in == '\0')
 
54
        {
 
55
                *end = NULL;
 
56
                return NULL;
 
57
        }
 
58
 
 
59
        lastchar = start = in;
 
60
 
 
61
        /* Find end of word */
 
62
        while (*in && !t_isspace(in))
 
63
        {
 
64
                lastchar = in;
 
65
                in += pg_mblen(in);
 
66
        }
 
67
 
 
68
        if (in - lastchar == 1 && t_iseq(lastchar, '*') && flags)
 
69
        {
 
70
                *flags = TSL_PREFIX;
 
71
                *end = lastchar;
 
72
        }
 
73
        else
 
74
        {
 
75
                if (flags)
 
76
                        *flags = 0;
 
77
                *end = in;
 
78
        }
 
79
 
 
80
        return start;
 
81
}
 
82
 
 
83
static int
 
84
compareSyn(const void *a, const void *b)
 
85
{
 
86
        return strcmp(((const Syn *) a)->in, ((const Syn *) b)->in);
 
87
}
 
88
 
 
89
 
 
90
Datum
 
91
dsynonym_init(PG_FUNCTION_ARGS)
 
92
{
 
93
        List       *dictoptions = (List *) PG_GETARG_POINTER(0);
 
94
        DictSyn    *d;
 
95
        ListCell   *l;
 
96
        char       *filename = NULL;
 
97
        bool            case_sensitive = false;
 
98
        tsearch_readline_state trst;
 
99
        char       *starti,
 
100
                           *starto,
 
101
                           *end = NULL;
 
102
        int                     cur = 0;
 
103
        char       *line = NULL;
 
104
        uint16          flags = 0;
 
105
 
 
106
        foreach(l, dictoptions)
 
107
        {
 
108
                DefElem    *defel = (DefElem *) lfirst(l);
 
109
 
 
110
                if (pg_strcasecmp("Synonyms", defel->defname) == 0)
 
111
                        filename = defGetString(defel);
 
112
                else if (pg_strcasecmp("CaseSensitive", defel->defname) == 0)
 
113
                        case_sensitive = defGetBoolean(defel);
 
114
                else
 
115
                        ereport(ERROR,
 
116
                                        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 
117
                                         errmsg("unrecognized synonym parameter: \"%s\"",
 
118
                                                        defel->defname)));
 
119
        }
 
120
 
 
121
        if (!filename)
 
122
                ereport(ERROR,
 
123
                                (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 
124
                                 errmsg("missing Synonyms parameter")));
 
125
 
 
126
        filename = get_tsearch_config_filename(filename, "syn");
 
127
 
 
128
        if (!tsearch_readline_begin(&trst, filename))
 
129
                ereport(ERROR,
 
130
                                (errcode(ERRCODE_CONFIG_FILE_ERROR),
 
131
                                 errmsg("could not open synonym file \"%s\": %m",
 
132
                                                filename)));
 
133
 
 
134
        d = (DictSyn *) palloc0(sizeof(DictSyn));
 
135
 
 
136
        while ((line = tsearch_readline(&trst)) != NULL)
 
137
        {
 
138
                starti = findwrd(line, &end, NULL);
 
139
                if (!starti)
 
140
                {
 
141
                        /* Empty line */
 
142
                        goto skipline;
 
143
                }
 
144
                if (*end == '\0')
 
145
                {
 
146
                        /* A line with only one word. Ignore silently. */
 
147
                        goto skipline;
 
148
                }
 
149
                *end = '\0';
 
150
 
 
151
                starto = findwrd(end + 1, &end, &flags);
 
152
                if (!starto)
 
153
                {
 
154
                        /* A line with only one word (+whitespace). Ignore silently. */
 
155
                        goto skipline;
 
156
                }
 
157
                *end = '\0';
 
158
 
 
159
                /*
 
160
                 * starti now points to the first word, and starto to the second word
 
161
                 * on the line, with a \0 terminator at the end of both words.
 
162
                 */
 
163
 
 
164
                if (cur >= d->len)
 
165
                {
 
166
                        if (d->len == 0)
 
167
                        {
 
168
                                d->len = 64;
 
169
                                d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
 
170
                        }
 
171
                        else
 
172
                        {
 
173
                                d->len *= 2;
 
174
                                d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
 
175
                        }
 
176
                }
 
177
 
 
178
                if (case_sensitive)
 
179
                {
 
180
                        d->syn[cur].in = pstrdup(starti);
 
181
                        d->syn[cur].out = pstrdup(starto);
 
182
                }
 
183
                else
 
184
                {
 
185
                        d->syn[cur].in = lowerstr(starti);
 
186
                        d->syn[cur].out = lowerstr(starto);
 
187
                }
 
188
 
 
189
                d->syn[cur].outlen = strlen(starto);
 
190
                d->syn[cur].flags = flags;
 
191
 
 
192
                cur++;
 
193
 
 
194
skipline:
 
195
                pfree(line);
 
196
        }
 
197
 
 
198
        tsearch_readline_end(&trst);
 
199
 
 
200
        d->len = cur;
 
201
        qsort(d->syn, d->len, sizeof(Syn), compareSyn);
 
202
 
 
203
        d->case_sensitive = case_sensitive;
 
204
 
 
205
        PG_RETURN_POINTER(d);
 
206
}
 
207
 
 
208
Datum
 
209
dsynonym_lexize(PG_FUNCTION_ARGS)
 
210
{
 
211
        DictSyn    *d = (DictSyn *) PG_GETARG_POINTER(0);
 
212
        char       *in = (char *) PG_GETARG_POINTER(1);
 
213
        int32           len = PG_GETARG_INT32(2);
 
214
        Syn                     key,
 
215
                           *found;
 
216
        TSLexeme   *res;
 
217
 
 
218
        /* note: d->len test protects against Solaris bsearch-of-no-items bug */
 
219
        if (len <= 0 || d->len <= 0)
 
220
                PG_RETURN_POINTER(NULL);
 
221
 
 
222
        if (d->case_sensitive)
 
223
                key.in = pnstrdup(in, len);
 
224
        else
 
225
                key.in = lowerstr_with_len(in, len);
 
226
 
 
227
        key.out = NULL;
 
228
 
 
229
        found = (Syn *) bsearch(&key, d->syn, d->len, sizeof(Syn), compareSyn);
 
230
        pfree(key.in);
 
231
 
 
232
        if (!found)
 
233
                PG_RETURN_POINTER(NULL);
 
234
 
 
235
        res = palloc0(sizeof(TSLexeme) * 2);
 
236
        res[0].lexeme = pnstrdup(found->out, found->outlen);
 
237
        res[0].flags = found->flags;
 
238
 
 
239
        PG_RETURN_POINTER(res);
 
240
}