~ubuntu-branches/ubuntu/hardy/postgresql-8.4/hardy-backports

« back to all changes in this revision

Viewing changes to contrib/dict_xsyn/dict_xsyn.c

  • Committer: Bazaar Package Importer
  • Author(s): Martin Pitt
  • Date: 2009-03-20 12:00:13 UTC
  • Revision ID: james.westby@ubuntu.com-20090320120013-hogj7egc5mjncc5g
Tags: upstream-8.4~0cvs20090328
ImportĀ upstreamĀ versionĀ 8.4~0cvs20090328

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*-------------------------------------------------------------------------
 
2
 *
 
3
 * dict_xsyn.c
 
4
 *        Extended synonym dictionary
 
5
 *
 
6
 * Copyright (c) 2007-2009, PostgreSQL Global Development Group
 
7
 *
 
8
 * IDENTIFICATION
 
9
 *        $PostgreSQL$
 
10
 *
 
11
 *-------------------------------------------------------------------------
 
12
 */
 
13
#include "postgres.h"
 
14
 
 
15
#include <ctype.h>
 
16
 
 
17
#include "commands/defrem.h"
 
18
#include "fmgr.h"
 
19
#include "tsearch/ts_locale.h"
 
20
#include "tsearch/ts_utils.h"
 
21
 
 
22
PG_MODULE_MAGIC;
 
23
 
 
24
typedef struct
 
25
{
 
26
        char       *key;                        /* Word */
 
27
        char       *value;                      /* Unparsed list of synonyms, including the
 
28
                                                                 * word itself */
 
29
} Syn;
 
30
 
 
31
typedef struct
 
32
{
 
33
        int                     len;
 
34
        Syn                *syn;
 
35
 
 
36
        bool            keeporig;
 
37
} DictSyn;
 
38
 
 
39
 
 
40
PG_FUNCTION_INFO_V1(dxsyn_init);
 
41
Datum           dxsyn_init(PG_FUNCTION_ARGS);
 
42
 
 
43
PG_FUNCTION_INFO_V1(dxsyn_lexize);
 
44
Datum           dxsyn_lexize(PG_FUNCTION_ARGS);
 
45
 
 
46
static char *
 
47
find_word(char *in, char **end)
 
48
{
 
49
        char       *start;
 
50
 
 
51
        *end = NULL;
 
52
        while (*in && t_isspace(in))
 
53
                in += pg_mblen(in);
 
54
 
 
55
        if (!*in || *in == '#')
 
56
                return NULL;
 
57
        start = in;
 
58
 
 
59
        while (*in && !t_isspace(in))
 
60
                in += pg_mblen(in);
 
61
 
 
62
        *end = in;
 
63
 
 
64
        return start;
 
65
}
 
66
 
 
67
static int
 
68
compare_syn(const void *a, const void *b)
 
69
{
 
70
        return strcmp(((Syn *) a)->key, ((Syn *) b)->key);
 
71
}
 
72
 
 
73
static void
 
74
read_dictionary(DictSyn *d, char *filename)
 
75
{
 
76
        char       *real_filename = get_tsearch_config_filename(filename, "rules");
 
77
        tsearch_readline_state trst;
 
78
        char       *line;
 
79
        int                     cur = 0;
 
80
 
 
81
        if (!tsearch_readline_begin(&trst, real_filename))
 
82
                ereport(ERROR,
 
83
                                (errcode(ERRCODE_CONFIG_FILE_ERROR),
 
84
                                 errmsg("could not open synonym file \"%s\": %m",
 
85
                                                real_filename)));
 
86
 
 
87
        while ((line = tsearch_readline(&trst)) != NULL)
 
88
        {
 
89
                char       *value;
 
90
                char       *key;
 
91
                char       *end = NULL;
 
92
 
 
93
                if (*line == '\0')
 
94
                        continue;
 
95
 
 
96
                value = lowerstr(line);
 
97
                pfree(line);
 
98
 
 
99
                key = find_word(value, &end);
 
100
                if (!key)
 
101
                {
 
102
                        pfree(value);
 
103
                        continue;
 
104
                }
 
105
 
 
106
                if (cur == d->len)
 
107
                {
 
108
                        d->len = (d->len > 0) ? 2 * d->len : 16;
 
109
                        if (d->syn)
 
110
                                d->syn = (Syn *) repalloc(d->syn, sizeof(Syn) * d->len);
 
111
                        else
 
112
                                d->syn = (Syn *) palloc(sizeof(Syn) * d->len);
 
113
                }
 
114
 
 
115
                d->syn[cur].key = pnstrdup(key, end - key);
 
116
                d->syn[cur].value = value;
 
117
 
 
118
                cur++;
 
119
        }
 
120
 
 
121
        tsearch_readline_end(&trst);
 
122
 
 
123
        d->len = cur;
 
124
        if (cur > 1)
 
125
                qsort(d->syn, d->len, sizeof(Syn), compare_syn);
 
126
 
 
127
        pfree(real_filename);
 
128
}
 
129
 
 
130
Datum
 
131
dxsyn_init(PG_FUNCTION_ARGS)
 
132
{
 
133
        List       *dictoptions = (List *) PG_GETARG_POINTER(0);
 
134
        DictSyn    *d;
 
135
        ListCell   *l;
 
136
 
 
137
        d = (DictSyn *) palloc0(sizeof(DictSyn));
 
138
        d->len = 0;
 
139
        d->syn = NULL;
 
140
        d->keeporig = true;
 
141
 
 
142
        foreach(l, dictoptions)
 
143
        {
 
144
                DefElem    *defel = (DefElem *) lfirst(l);
 
145
 
 
146
                if (pg_strcasecmp(defel->defname, "KEEPORIG") == 0)
 
147
                {
 
148
                        d->keeporig = defGetBoolean(defel);
 
149
                }
 
150
                else if (pg_strcasecmp(defel->defname, "RULES") == 0)
 
151
                {
 
152
                        read_dictionary(d, defGetString(defel));
 
153
                }
 
154
                else
 
155
                {
 
156
                        ereport(ERROR,
 
157
                                        (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 
158
                                         errmsg("unrecognized xsyn parameter: \"%s\"",
 
159
                                                        defel->defname)));
 
160
                }
 
161
        }
 
162
 
 
163
        PG_RETURN_POINTER(d);
 
164
}
 
165
 
 
166
Datum
 
167
dxsyn_lexize(PG_FUNCTION_ARGS)
 
168
{
 
169
        DictSyn    *d = (DictSyn *) PG_GETARG_POINTER(0);
 
170
        char       *in = (char *) PG_GETARG_POINTER(1);
 
171
        int                     length = PG_GETARG_INT32(2);
 
172
        Syn                     word;
 
173
        Syn                *found;
 
174
        TSLexeme   *res = NULL;
 
175
 
 
176
        if (!length || d->len == 0)
 
177
                PG_RETURN_POINTER(NULL);
 
178
 
 
179
        /* Create search pattern */
 
180
        {
 
181
                char       *temp = pnstrdup(in, length);
 
182
 
 
183
                word.key = lowerstr(temp);
 
184
                pfree(temp);
 
185
                word.value = NULL;
 
186
        }
 
187
 
 
188
        /* Look for matching syn */
 
189
        found = (Syn *) bsearch(&word, d->syn, d->len, sizeof(Syn), compare_syn);
 
190
        pfree(word.key);
 
191
 
 
192
        if (!found)
 
193
                PG_RETURN_POINTER(NULL);
 
194
 
 
195
        /* Parse string of synonyms and return array of words */
 
196
        {
 
197
                char       *value = pstrdup(found->value);
 
198
                int                     value_length = strlen(value);
 
199
                char       *pos = value;
 
200
                int                     nsyns = 0;
 
201
                bool            is_first = true;
 
202
 
 
203
                res = palloc(0);
 
204
 
 
205
                while (pos < value + value_length)
 
206
                {
 
207
                        char       *end;
 
208
                        char       *syn = find_word(pos, &end);
 
209
 
 
210
                        if (!syn)
 
211
                                break;
 
212
                        *end = '\0';
 
213
 
 
214
                        res = repalloc(res, sizeof(TSLexeme) * (nsyns + 2));
 
215
                        res[nsyns].lexeme = NULL;
 
216
 
 
217
                        /* first word is added to result only if KEEPORIG flag is set */
 
218
                        if (d->keeporig || !is_first)
 
219
                        {
 
220
                                res[nsyns].lexeme = pstrdup(syn);
 
221
                                res[nsyns + 1].lexeme = NULL;
 
222
 
 
223
                                nsyns++;
 
224
                        }
 
225
 
 
226
                        is_first = false;
 
227
 
 
228
                        pos = end + 1;
 
229
                }
 
230
 
 
231
                pfree(value);
 
232
        }
 
233
 
 
234
        PG_RETURN_POINTER(res);
 
235
}