~ubuntu-branches/ubuntu/oneiric/postgresql-9.1/oneiric-security

« back to all changes in this revision

Viewing changes to src/backend/regex/regc_pg_locale.c

  • Committer: Bazaar Package Importer
  • Author(s): Martin Pitt
  • Date: 2011-05-11 10:41:53 UTC
  • Revision ID: james.westby@ubuntu.com-20110511104153-psbh2o58553fv1m0
Tags: upstream-9.1~beta1
ImportĀ upstreamĀ versionĀ 9.1~beta1

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/*-------------------------------------------------------------------------
 
2
 *
 
3
 * regc_pg_locale.c
 
4
 *        ctype functions adapted to work on pg_wchar (a/k/a chr)
 
5
 *
 
6
 * This file is #included by regcomp.c; it's not meant to compile standalone.
 
7
 *
 
8
 * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
 
9
 * Portions Copyright (c) 1994, Regents of the University of California
 
10
 *
 
11
 * IDENTIFICATION
 
12
 *        src/backend/regex/regc_pg_locale.c
 
13
 *
 
14
 *-------------------------------------------------------------------------
 
15
 */
 
16
 
 
17
#include "catalog/pg_collation.h"
 
18
#include "utils/pg_locale.h"
 
19
 
 
20
/*
 
21
 * To provide as much functionality as possible on a variety of platforms,
 
22
 * without going so far as to implement everything from scratch, we use
 
23
 * several implementation strategies depending on the situation:
 
24
 *
 
25
 * 1. In C/POSIX collations, we use hard-wired code.  We can't depend on
 
26
 * the <ctype.h> functions since those will obey LC_CTYPE.  Note that these
 
27
 * collations don't give a fig about multibyte characters.
 
28
 *
 
29
 * 2. In the "default" collation (which is supposed to obey LC_CTYPE):
 
30
 *
 
31
 * 2a. When working in UTF8 encoding, we use the <wctype.h> functions if
 
32
 * available.  This assumes that every platform uses Unicode codepoints
 
33
 * directly as the wchar_t representation of Unicode.  On some platforms
 
34
 * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF.
 
35
 *
 
36
 * 2b. In all other encodings, or on machines that lack <wctype.h>, we use
 
37
 * the <ctype.h> functions for pg_wchar values up to 255, and punt for values
 
38
 * above that.  This is only 100% correct in single-byte encodings such as
 
39
 * LATINn.  However, non-Unicode multibyte encodings are mostly Far Eastern
 
40
 * character sets for which the properties being tested here aren't very
 
41
 * relevant for higher code values anyway.  The difficulty with using the
 
42
 * <wctype.h> functions with non-Unicode multibyte encodings is that we can
 
43
 * have no certainty that the platform's wchar_t representation matches
 
44
 * what we do in pg_wchar conversions.
 
45
 *
 
46
 * 3. Other collations are only supported on platforms that HAVE_LOCALE_T.
 
47
 * Here, we use the locale_t-extended forms of the <wctype.h> and <ctype.h>
 
48
 * functions, under exactly the same cases as #2.
 
49
 *
 
50
 * There is one notable difference between cases 2 and 3: in the "default"
 
51
 * collation we force ASCII letters to follow ASCII upcase/downcase rules,
 
52
 * while in a non-default collation we just let the library functions do what
 
53
 * they will.  The case where this matters is treatment of I/i in Turkish,
 
54
 * and the behavior is meant to match the upper()/lower() SQL functions.
 
55
 *
 
56
 * We store the active collation setting in static variables.  In principle
 
57
 * it could be passed down to here via the regex library's "struct vars" data
 
58
 * structure; but that would require somewhat invasive changes in the regex
 
59
 * library, and right now there's no real benefit to be gained from that.
 
60
 *
 
61
 * NB: the coding here assumes pg_wchar is an unsigned type.
 
62
 */
 
63
 
 
64
typedef enum
 
65
{
 
66
        PG_REGEX_LOCALE_C,                      /* C locale (encoding independent) */
 
67
        PG_REGEX_LOCALE_WIDE,           /* Use <wctype.h> functions */
 
68
        PG_REGEX_LOCALE_1BYTE,          /* Use <ctype.h> functions */
 
69
        PG_REGEX_LOCALE_WIDE_L,         /* Use locale_t <wctype.h> functions */
 
70
        PG_REGEX_LOCALE_1BYTE_L         /* Use locale_t <ctype.h> functions */
 
71
} PG_Locale_Strategy;
 
72
 
 
73
static PG_Locale_Strategy pg_regex_strategy;
 
74
static pg_locale_t pg_regex_locale;
 
75
 
 
76
/*
 
77
 * Hard-wired character properties for C locale
 
78
 */
 
79
#define PG_ISDIGIT      0x01
 
80
#define PG_ISALPHA      0x02
 
81
#define PG_ISALNUM      (PG_ISDIGIT | PG_ISALPHA)
 
82
#define PG_ISUPPER      0x04
 
83
#define PG_ISLOWER      0x08
 
84
#define PG_ISGRAPH      0x10
 
85
#define PG_ISPRINT      0x20
 
86
#define PG_ISPUNCT      0x40
 
87
#define PG_ISSPACE      0x80
 
88
 
 
89
static const unsigned char pg_char_properties[128] = {
 
90
        /* NUL */       0,
 
91
        /* ^A */        0,
 
92
        /* ^B */        0,
 
93
        /* ^C */        0,
 
94
        /* ^D */        0,
 
95
        /* ^E */        0,
 
96
        /* ^F */        0,
 
97
        /* ^G */        0,
 
98
        /* ^H */        0,
 
99
        /* ^I */        PG_ISSPACE,
 
100
        /* ^J */        PG_ISSPACE,
 
101
        /* ^K */        PG_ISSPACE,
 
102
        /* ^L */        PG_ISSPACE,
 
103
        /* ^M */        PG_ISSPACE,
 
104
        /* ^N */        0,
 
105
        /* ^O */        0,
 
106
        /* ^P */        0,
 
107
        /* ^Q */        0,
 
108
        /* ^R */        0,
 
109
        /* ^S */        0,
 
110
        /* ^T */        0,
 
111
        /* ^U */        0,
 
112
        /* ^V */        0,
 
113
        /* ^W */        0,
 
114
        /* ^X */        0,
 
115
        /* ^Y */        0,
 
116
        /* ^Z */        0,
 
117
        /* ^[ */        0,
 
118
        /* ^\ */        0,
 
119
        /* ^] */        0,
 
120
        /* ^^ */        0,
 
121
        /* ^_ */        0,
 
122
        /*    */        PG_ISPRINT | PG_ISSPACE,
 
123
        /* !  */        PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
 
124
        /* "  */        PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
 
125
        /* #  */        PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
 
126
        /* $  */        PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
 
127
        /* %  */        PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
 
128
        /* &  */        PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
 
129
        /* '  */        PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
 
130
        /* (  */        PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
 
131
        /* )  */        PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
 
132
        /* *  */        PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
 
133
        /* +  */        PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
 
134
        /* ,  */        PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
 
135
        /* -  */        PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
 
136
        /* .  */        PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
 
137
        /* /  */        PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
 
138
        /* 0  */        PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
 
139
        /* 1  */        PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
 
140
        /* 2  */        PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
 
141
        /* 3  */        PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
 
142
        /* 4  */        PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
 
143
        /* 5  */        PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
 
144
        /* 6  */        PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
 
145
        /* 7  */        PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
 
146
        /* 8  */        PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
 
147
        /* 9  */        PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
 
148
        /* :  */        PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
 
149
        /* ;  */        PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
 
150
        /* <  */        PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
 
151
        /* =  */        PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
 
152
        /* >  */        PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
 
153
        /* ?  */        PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
 
154
        /* @  */        PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
 
155
        /* A  */        PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
 
156
        /* B  */        PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
 
157
        /* C  */        PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
 
158
        /* D  */        PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
 
159
        /* E  */        PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
 
160
        /* F  */        PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
 
161
        /* G  */        PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
 
162
        /* H  */        PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
 
163
        /* I  */        PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
 
164
        /* J  */        PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
 
165
        /* K  */        PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
 
166
        /* L  */        PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
 
167
        /* M  */        PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
 
168
        /* N  */        PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
 
169
        /* O  */        PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
 
170
        /* P  */        PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
 
171
        /* Q  */        PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
 
172
        /* R  */        PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
 
173
        /* S  */        PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
 
174
        /* T  */        PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
 
175
        /* U  */        PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
 
176
        /* V  */        PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
 
177
        /* W  */        PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
 
178
        /* X  */        PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
 
179
        /* Y  */        PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
 
180
        /* Z  */        PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
 
181
        /* [  */        PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
 
182
        /* \  */        PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
 
183
        /* ]  */        PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
 
184
        /* ^  */        PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
 
185
        /* _  */        PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
 
186
        /* `  */        PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
 
187
        /* a  */        PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
 
188
        /* b  */        PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
 
189
        /* c  */        PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
 
190
        /* d  */        PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
 
191
        /* e  */        PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
 
192
        /* f  */        PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
 
193
        /* g  */        PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
 
194
        /* h  */        PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
 
195
        /* i  */        PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
 
196
        /* j  */        PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
 
197
        /* k  */        PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
 
198
        /* l  */        PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
 
199
        /* m  */        PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
 
200
        /* n  */        PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
 
201
        /* o  */        PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
 
202
        /* p  */        PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
 
203
        /* q  */        PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
 
204
        /* r  */        PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
 
205
        /* s  */        PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
 
206
        /* t  */        PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
 
207
        /* u  */        PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
 
208
        /* v  */        PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
 
209
        /* w  */        PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
 
210
        /* x  */        PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
 
211
        /* y  */        PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
 
212
        /* z  */        PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
 
213
        /* {  */        PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
 
214
        /* |  */        PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
 
215
        /* }  */        PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
 
216
        /* ~  */        PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
 
217
        /* DEL */       0
 
218
};
 
219
 
 
220
 
 
221
/*
 
222
 * pg_set_regex_collation: set collation for these functions to obey
 
223
 *
 
224
 * This is called when beginning compilation or execution of a regexp.
 
225
 * Since there's no need for re-entrancy of regexp operations, it's okay
 
226
 * to store the results in static variables.
 
227
 */
 
228
void
 
229
pg_set_regex_collation(Oid collation)
 
230
{
 
231
        if (lc_ctype_is_c(collation))
 
232
        {
 
233
                /* C/POSIX collations use this path regardless of database encoding */
 
234
                pg_regex_strategy = PG_REGEX_LOCALE_C;
 
235
                pg_regex_locale = 0;
 
236
        }
 
237
        else
 
238
        {
 
239
                if (collation == DEFAULT_COLLATION_OID)
 
240
                        pg_regex_locale = 0;
 
241
                else if (OidIsValid(collation))
 
242
                {
 
243
                        /*
 
244
                         * NB: pg_newlocale_from_collation will fail if not HAVE_LOCALE_T;
 
245
                         * the case of pg_regex_locale != 0 but not HAVE_LOCALE_T does
 
246
                         * not have to be considered below.
 
247
                         */
 
248
                        pg_regex_locale = pg_newlocale_from_collation(collation);
 
249
                }
 
250
                else
 
251
                {
 
252
                        /*
 
253
                         * This typically means that the parser could not resolve a
 
254
                         * conflict of implicit collations, so report it that way.
 
255
                         */
 
256
                        ereport(ERROR,
 
257
                                        (errcode(ERRCODE_INDETERMINATE_COLLATION),
 
258
                                         errmsg("could not determine which collation to use for regular expression"),
 
259
                                         errhint("Use the COLLATE clause to set the collation explicitly.")));
 
260
                }
 
261
 
 
262
#ifdef USE_WIDE_UPPER_LOWER
 
263
                if (GetDatabaseEncoding() == PG_UTF8)
 
264
                {
 
265
                        if (pg_regex_locale)
 
266
                                pg_regex_strategy = PG_REGEX_LOCALE_WIDE_L;
 
267
                        else
 
268
                                pg_regex_strategy = PG_REGEX_LOCALE_WIDE;
 
269
                }
 
270
                else
 
271
#endif   /* USE_WIDE_UPPER_LOWER */
 
272
                {
 
273
                        if (pg_regex_locale)
 
274
                                pg_regex_strategy = PG_REGEX_LOCALE_1BYTE_L;
 
275
                        else
 
276
                                pg_regex_strategy = PG_REGEX_LOCALE_1BYTE;
 
277
                }
 
278
        }
 
279
}
 
280
 
 
281
static int
 
282
pg_wc_isdigit(pg_wchar c)
 
283
{
 
284
        switch (pg_regex_strategy)
 
285
        {
 
286
                case PG_REGEX_LOCALE_C:
 
287
                        return (c <= (pg_wchar) 127 &&
 
288
                                        (pg_char_properties[c] & PG_ISDIGIT));
 
289
                case PG_REGEX_LOCALE_WIDE:
 
290
#ifdef USE_WIDE_UPPER_LOWER
 
291
                        if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 
292
                                return iswdigit((wint_t) c);
 
293
#endif
 
294
                        /* FALL THRU */
 
295
                case PG_REGEX_LOCALE_1BYTE:
 
296
                        return (c <= (pg_wchar) UCHAR_MAX &&
 
297
                                        isdigit((unsigned char) c));
 
298
                case PG_REGEX_LOCALE_WIDE_L:
 
299
#if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
 
300
                        if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 
301
                                return iswdigit_l((wint_t) c, pg_regex_locale);
 
302
#endif
 
303
                        /* FALL THRU */
 
304
                case PG_REGEX_LOCALE_1BYTE_L:
 
305
#ifdef HAVE_LOCALE_T
 
306
                        return (c <= (pg_wchar) UCHAR_MAX &&
 
307
                                        isdigit_l((unsigned char) c, pg_regex_locale));
 
308
#endif
 
309
                        break;
 
310
        }
 
311
        return 0;                                       /* can't get here, but keep compiler quiet */
 
312
}
 
313
 
 
314
static int
 
315
pg_wc_isalpha(pg_wchar c)
 
316
{
 
317
        switch (pg_regex_strategy)
 
318
        {
 
319
                case PG_REGEX_LOCALE_C:
 
320
                        return (c <= (pg_wchar) 127 &&
 
321
                                        (pg_char_properties[c] & PG_ISALPHA));
 
322
                case PG_REGEX_LOCALE_WIDE:
 
323
#ifdef USE_WIDE_UPPER_LOWER
 
324
                        if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 
325
                                return iswalpha((wint_t) c);
 
326
#endif
 
327
                        /* FALL THRU */
 
328
                case PG_REGEX_LOCALE_1BYTE:
 
329
                        return (c <= (pg_wchar) UCHAR_MAX &&
 
330
                                        isalpha((unsigned char) c));
 
331
                case PG_REGEX_LOCALE_WIDE_L:
 
332
#if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
 
333
                        if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 
334
                                return iswalpha_l((wint_t) c, pg_regex_locale);
 
335
#endif
 
336
                        /* FALL THRU */
 
337
                case PG_REGEX_LOCALE_1BYTE_L:
 
338
#ifdef HAVE_LOCALE_T
 
339
                        return (c <= (pg_wchar) UCHAR_MAX &&
 
340
                                        isalpha_l((unsigned char) c, pg_regex_locale));
 
341
#endif
 
342
                        break;
 
343
        }
 
344
        return 0;                                       /* can't get here, but keep compiler quiet */
 
345
}
 
346
 
 
347
static int
 
348
pg_wc_isalnum(pg_wchar c)
 
349
{
 
350
        switch (pg_regex_strategy)
 
351
        {
 
352
                case PG_REGEX_LOCALE_C:
 
353
                        return (c <= (pg_wchar) 127 &&
 
354
                                        (pg_char_properties[c] & PG_ISALNUM));
 
355
                case PG_REGEX_LOCALE_WIDE:
 
356
#ifdef USE_WIDE_UPPER_LOWER
 
357
                        if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 
358
                                return iswalnum((wint_t) c);
 
359
#endif
 
360
                        /* FALL THRU */
 
361
                case PG_REGEX_LOCALE_1BYTE:
 
362
                        return (c <= (pg_wchar) UCHAR_MAX &&
 
363
                                        isalnum((unsigned char) c));
 
364
                case PG_REGEX_LOCALE_WIDE_L:
 
365
#if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
 
366
                        if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 
367
                                return iswalnum_l((wint_t) c, pg_regex_locale);
 
368
#endif
 
369
                        /* FALL THRU */
 
370
                case PG_REGEX_LOCALE_1BYTE_L:
 
371
#ifdef HAVE_LOCALE_T
 
372
                        return (c <= (pg_wchar) UCHAR_MAX &&
 
373
                                        isalnum_l((unsigned char) c, pg_regex_locale));
 
374
#endif
 
375
                        break;
 
376
        }
 
377
        return 0;                                       /* can't get here, but keep compiler quiet */
 
378
}
 
379
 
 
380
static int
 
381
pg_wc_isupper(pg_wchar c)
 
382
{
 
383
        switch (pg_regex_strategy)
 
384
        {
 
385
                case PG_REGEX_LOCALE_C:
 
386
                        return (c <= (pg_wchar) 127 &&
 
387
                                        (pg_char_properties[c] & PG_ISUPPER));
 
388
                case PG_REGEX_LOCALE_WIDE:
 
389
#ifdef USE_WIDE_UPPER_LOWER
 
390
                        if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 
391
                                return iswupper((wint_t) c);
 
392
#endif
 
393
                        /* FALL THRU */
 
394
                case PG_REGEX_LOCALE_1BYTE:
 
395
                        return (c <= (pg_wchar) UCHAR_MAX &&
 
396
                                        isupper((unsigned char) c));
 
397
                case PG_REGEX_LOCALE_WIDE_L:
 
398
#if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
 
399
                        if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 
400
                                return iswupper_l((wint_t) c, pg_regex_locale);
 
401
#endif
 
402
                        /* FALL THRU */
 
403
                case PG_REGEX_LOCALE_1BYTE_L:
 
404
#ifdef HAVE_LOCALE_T
 
405
                        return (c <= (pg_wchar) UCHAR_MAX &&
 
406
                                        isupper_l((unsigned char) c, pg_regex_locale));
 
407
#endif
 
408
                        break;
 
409
        }
 
410
        return 0;                                       /* can't get here, but keep compiler quiet */
 
411
}
 
412
 
 
413
static int
 
414
pg_wc_islower(pg_wchar c)
 
415
{
 
416
        switch (pg_regex_strategy)
 
417
        {
 
418
                case PG_REGEX_LOCALE_C:
 
419
                        return (c <= (pg_wchar) 127 &&
 
420
                                        (pg_char_properties[c] & PG_ISLOWER));
 
421
                case PG_REGEX_LOCALE_WIDE:
 
422
#ifdef USE_WIDE_UPPER_LOWER
 
423
                        if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 
424
                                return iswlower((wint_t) c);
 
425
#endif
 
426
                        /* FALL THRU */
 
427
                case PG_REGEX_LOCALE_1BYTE:
 
428
                        return (c <= (pg_wchar) UCHAR_MAX &&
 
429
                                        islower((unsigned char) c));
 
430
                case PG_REGEX_LOCALE_WIDE_L:
 
431
#if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
 
432
                        if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 
433
                                return iswlower_l((wint_t) c, pg_regex_locale);
 
434
#endif
 
435
                        /* FALL THRU */
 
436
                case PG_REGEX_LOCALE_1BYTE_L:
 
437
#ifdef HAVE_LOCALE_T
 
438
                        return (c <= (pg_wchar) UCHAR_MAX &&
 
439
                                        islower_l((unsigned char) c, pg_regex_locale));
 
440
#endif
 
441
                        break;
 
442
        }
 
443
        return 0;                                       /* can't get here, but keep compiler quiet */
 
444
}
 
445
 
 
446
static int
 
447
pg_wc_isgraph(pg_wchar c)
 
448
{
 
449
        switch (pg_regex_strategy)
 
450
        {
 
451
                case PG_REGEX_LOCALE_C:
 
452
                        return (c <= (pg_wchar) 127 &&
 
453
                                        (pg_char_properties[c] & PG_ISGRAPH));
 
454
                case PG_REGEX_LOCALE_WIDE:
 
455
#ifdef USE_WIDE_UPPER_LOWER
 
456
                        if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 
457
                                return iswgraph((wint_t) c);
 
458
#endif
 
459
                        /* FALL THRU */
 
460
                case PG_REGEX_LOCALE_1BYTE:
 
461
                        return (c <= (pg_wchar) UCHAR_MAX &&
 
462
                                        isgraph((unsigned char) c));
 
463
                case PG_REGEX_LOCALE_WIDE_L:
 
464
#if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
 
465
                        if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 
466
                                return iswgraph_l((wint_t) c, pg_regex_locale);
 
467
#endif
 
468
                        /* FALL THRU */
 
469
                case PG_REGEX_LOCALE_1BYTE_L:
 
470
#ifdef HAVE_LOCALE_T
 
471
                        return (c <= (pg_wchar) UCHAR_MAX &&
 
472
                                        isgraph_l((unsigned char) c, pg_regex_locale));
 
473
#endif
 
474
                        break;
 
475
        }
 
476
        return 0;                                       /* can't get here, but keep compiler quiet */
 
477
}
 
478
 
 
479
static int
 
480
pg_wc_isprint(pg_wchar c)
 
481
{
 
482
        switch (pg_regex_strategy)
 
483
        {
 
484
                case PG_REGEX_LOCALE_C:
 
485
                        return (c <= (pg_wchar) 127 &&
 
486
                                        (pg_char_properties[c] & PG_ISPRINT));
 
487
                case PG_REGEX_LOCALE_WIDE:
 
488
#ifdef USE_WIDE_UPPER_LOWER
 
489
                        if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 
490
                                return iswprint((wint_t) c);
 
491
#endif
 
492
                        /* FALL THRU */
 
493
                case PG_REGEX_LOCALE_1BYTE:
 
494
                        return (c <= (pg_wchar) UCHAR_MAX &&
 
495
                                        isprint((unsigned char) c));
 
496
                case PG_REGEX_LOCALE_WIDE_L:
 
497
#if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
 
498
                        if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 
499
                                return iswprint_l((wint_t) c, pg_regex_locale);
 
500
#endif
 
501
                        /* FALL THRU */
 
502
                case PG_REGEX_LOCALE_1BYTE_L:
 
503
#ifdef HAVE_LOCALE_T
 
504
                        return (c <= (pg_wchar) UCHAR_MAX &&
 
505
                                        isprint_l((unsigned char) c, pg_regex_locale));
 
506
#endif
 
507
                        break;
 
508
        }
 
509
        return 0;                                       /* can't get here, but keep compiler quiet */
 
510
}
 
511
 
 
512
static int
 
513
pg_wc_ispunct(pg_wchar c)
 
514
{
 
515
        switch (pg_regex_strategy)
 
516
        {
 
517
                case PG_REGEX_LOCALE_C:
 
518
                        return (c <= (pg_wchar) 127 &&
 
519
                                        (pg_char_properties[c] & PG_ISPUNCT));
 
520
                case PG_REGEX_LOCALE_WIDE:
 
521
#ifdef USE_WIDE_UPPER_LOWER
 
522
                        if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 
523
                                return iswpunct((wint_t) c);
 
524
#endif
 
525
                        /* FALL THRU */
 
526
                case PG_REGEX_LOCALE_1BYTE:
 
527
                        return (c <= (pg_wchar) UCHAR_MAX &&
 
528
                                        ispunct((unsigned char) c));
 
529
                case PG_REGEX_LOCALE_WIDE_L:
 
530
#if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
 
531
                        if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 
532
                                return iswpunct_l((wint_t) c, pg_regex_locale);
 
533
#endif
 
534
                        /* FALL THRU */
 
535
                case PG_REGEX_LOCALE_1BYTE_L:
 
536
#ifdef HAVE_LOCALE_T
 
537
                        return (c <= (pg_wchar) UCHAR_MAX &&
 
538
                                        ispunct_l((unsigned char) c, pg_regex_locale));
 
539
#endif
 
540
                        break;
 
541
        }
 
542
        return 0;                                       /* can't get here, but keep compiler quiet */
 
543
}
 
544
 
 
545
static int
 
546
pg_wc_isspace(pg_wchar c)
 
547
{
 
548
        switch (pg_regex_strategy)
 
549
        {
 
550
                case PG_REGEX_LOCALE_C:
 
551
                        return (c <= (pg_wchar) 127 &&
 
552
                                        (pg_char_properties[c] & PG_ISSPACE));
 
553
                case PG_REGEX_LOCALE_WIDE:
 
554
#ifdef USE_WIDE_UPPER_LOWER
 
555
                        if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 
556
                                return iswspace((wint_t) c);
 
557
#endif
 
558
                        /* FALL THRU */
 
559
                case PG_REGEX_LOCALE_1BYTE:
 
560
                        return (c <= (pg_wchar) UCHAR_MAX &&
 
561
                                        isspace((unsigned char) c));
 
562
                case PG_REGEX_LOCALE_WIDE_L:
 
563
#if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
 
564
                        if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 
565
                                return iswspace_l((wint_t) c, pg_regex_locale);
 
566
#endif
 
567
                        /* FALL THRU */
 
568
                case PG_REGEX_LOCALE_1BYTE_L:
 
569
#ifdef HAVE_LOCALE_T
 
570
                        return (c <= (pg_wchar) UCHAR_MAX &&
 
571
                                        isspace_l((unsigned char) c, pg_regex_locale));
 
572
#endif
 
573
                        break;
 
574
        }
 
575
        return 0;                                       /* can't get here, but keep compiler quiet */
 
576
}
 
577
 
 
578
static pg_wchar
 
579
pg_wc_toupper(pg_wchar c)
 
580
{
 
581
        switch (pg_regex_strategy)
 
582
        {
 
583
                case PG_REGEX_LOCALE_C:
 
584
                        if (c <= (pg_wchar) 127)
 
585
                                return pg_ascii_toupper((unsigned char) c);
 
586
                        return c;
 
587
                case PG_REGEX_LOCALE_WIDE:
 
588
                        /* force C behavior for ASCII characters, per comments above */
 
589
                        if (c <= (pg_wchar) 127)
 
590
                                return pg_ascii_toupper((unsigned char) c);
 
591
#ifdef USE_WIDE_UPPER_LOWER
 
592
                        if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 
593
                                return towupper((wint_t) c);
 
594
#endif
 
595
                        /* FALL THRU */
 
596
                case PG_REGEX_LOCALE_1BYTE:
 
597
                        /* force C behavior for ASCII characters, per comments above */
 
598
                        if (c <= (pg_wchar) 127)
 
599
                                return pg_ascii_toupper((unsigned char) c);
 
600
                        if (c <= (pg_wchar) UCHAR_MAX)
 
601
                                return toupper((unsigned char) c);
 
602
                        return c;
 
603
                case PG_REGEX_LOCALE_WIDE_L:
 
604
#if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
 
605
                        if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 
606
                                return towupper_l((wint_t) c, pg_regex_locale);
 
607
#endif
 
608
                        /* FALL THRU */
 
609
                case PG_REGEX_LOCALE_1BYTE_L:
 
610
#ifdef HAVE_LOCALE_T
 
611
                        if (c <= (pg_wchar) UCHAR_MAX)
 
612
                                return toupper_l((unsigned char) c, pg_regex_locale);
 
613
#endif
 
614
                        return c;
 
615
        }
 
616
        return 0;                                       /* can't get here, but keep compiler quiet */
 
617
}
 
618
 
 
619
static pg_wchar
 
620
pg_wc_tolower(pg_wchar c)
 
621
{
 
622
        switch (pg_regex_strategy)
 
623
        {
 
624
                case PG_REGEX_LOCALE_C:
 
625
                        if (c <= (pg_wchar) 127)
 
626
                                return pg_ascii_tolower((unsigned char) c);
 
627
                        return c;
 
628
                case PG_REGEX_LOCALE_WIDE:
 
629
                        /* force C behavior for ASCII characters, per comments above */
 
630
                        if (c <= (pg_wchar) 127)
 
631
                                return pg_ascii_tolower((unsigned char) c);
 
632
#ifdef USE_WIDE_UPPER_LOWER
 
633
                        if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 
634
                                return towlower((wint_t) c);
 
635
#endif
 
636
                        /* FALL THRU */
 
637
                case PG_REGEX_LOCALE_1BYTE:
 
638
                        /* force C behavior for ASCII characters, per comments above */
 
639
                        if (c <= (pg_wchar) 127)
 
640
                                return pg_ascii_tolower((unsigned char) c);
 
641
                        if (c <= (pg_wchar) UCHAR_MAX)
 
642
                                return tolower((unsigned char) c);
 
643
                        return c;
 
644
                case PG_REGEX_LOCALE_WIDE_L:
 
645
#if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
 
646
                        if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
 
647
                                return towlower_l((wint_t) c, pg_regex_locale);
 
648
#endif
 
649
                        /* FALL THRU */
 
650
                case PG_REGEX_LOCALE_1BYTE_L:
 
651
#ifdef HAVE_LOCALE_T
 
652
                        if (c <= (pg_wchar) UCHAR_MAX)
 
653
                                return tolower_l((unsigned char) c, pg_regex_locale);
 
654
#endif
 
655
                        return c;
 
656
        }
 
657
        return 0;                                       /* can't get here, but keep compiler quiet */
 
658
}