1
/*-------------------------------------------------------------------------
4
* ctype functions adapted to work on pg_wchar (a/k/a chr)
6
* This file is #included by regcomp.c; it's not meant to compile standalone.
8
* Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
9
* Portions Copyright (c) 1994, Regents of the University of California
12
* src/backend/regex/regc_pg_locale.c
14
*-------------------------------------------------------------------------
17
#include "catalog/pg_collation.h"
18
#include "utils/pg_locale.h"
21
* To provide as much functionality as possible on a variety of platforms,
22
* without going so far as to implement everything from scratch, we use
23
* several implementation strategies depending on the situation:
25
* 1. In C/POSIX collations, we use hard-wired code. We can't depend on
26
* the <ctype.h> functions since those will obey LC_CTYPE. Note that these
27
* collations don't give a fig about multibyte characters.
29
* 2. In the "default" collation (which is supposed to obey LC_CTYPE):
31
* 2a. When working in UTF8 encoding, we use the <wctype.h> functions if
32
* available. This assumes that every platform uses Unicode codepoints
33
* directly as the wchar_t representation of Unicode. On some platforms
34
* wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF.
36
* 2b. In all other encodings, or on machines that lack <wctype.h>, we use
37
* the <ctype.h> functions for pg_wchar values up to 255, and punt for values
38
* above that. This is only 100% correct in single-byte encodings such as
39
* LATINn. However, non-Unicode multibyte encodings are mostly Far Eastern
40
* character sets for which the properties being tested here aren't very
41
* relevant for higher code values anyway. The difficulty with using the
42
* <wctype.h> functions with non-Unicode multibyte encodings is that we can
43
* have no certainty that the platform's wchar_t representation matches
44
* what we do in pg_wchar conversions.
46
* 3. Other collations are only supported on platforms that HAVE_LOCALE_T.
47
* Here, we use the locale_t-extended forms of the <wctype.h> and <ctype.h>
48
* functions, under exactly the same cases as #2.
50
* There is one notable difference between cases 2 and 3: in the "default"
51
* collation we force ASCII letters to follow ASCII upcase/downcase rules,
52
* while in a non-default collation we just let the library functions do what
53
* they will. The case where this matters is treatment of I/i in Turkish,
54
* and the behavior is meant to match the upper()/lower() SQL functions.
56
* We store the active collation setting in static variables. In principle
57
* it could be passed down to here via the regex library's "struct vars" data
58
* structure; but that would require somewhat invasive changes in the regex
59
* library, and right now there's no real benefit to be gained from that.
61
* NB: the coding here assumes pg_wchar is an unsigned type.
66
PG_REGEX_LOCALE_C, /* C locale (encoding independent) */
67
PG_REGEX_LOCALE_WIDE, /* Use <wctype.h> functions */
68
PG_REGEX_LOCALE_1BYTE, /* Use <ctype.h> functions */
69
PG_REGEX_LOCALE_WIDE_L, /* Use locale_t <wctype.h> functions */
70
PG_REGEX_LOCALE_1BYTE_L /* Use locale_t <ctype.h> functions */
73
static PG_Locale_Strategy pg_regex_strategy;
74
static pg_locale_t pg_regex_locale;
77
* Hard-wired character properties for C locale
79
#define PG_ISDIGIT 0x01
80
#define PG_ISALPHA 0x02
81
#define PG_ISALNUM (PG_ISDIGIT | PG_ISALPHA)
82
#define PG_ISUPPER 0x04
83
#define PG_ISLOWER 0x08
84
#define PG_ISGRAPH 0x10
85
#define PG_ISPRINT 0x20
86
#define PG_ISPUNCT 0x40
87
#define PG_ISSPACE 0x80
89
static const unsigned char pg_char_properties[128] = {
122
/* */ PG_ISPRINT | PG_ISSPACE,
123
/* ! */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
124
/* " */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
125
/* # */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
126
/* $ */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
127
/* % */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
128
/* & */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
129
/* ' */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
130
/* ( */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
131
/* ) */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
132
/* * */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
133
/* + */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
134
/* , */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
135
/* - */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
136
/* . */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
137
/* / */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
138
/* 0 */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
139
/* 1 */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
140
/* 2 */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
141
/* 3 */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
142
/* 4 */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
143
/* 5 */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
144
/* 6 */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
145
/* 7 */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
146
/* 8 */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
147
/* 9 */ PG_ISDIGIT | PG_ISGRAPH | PG_ISPRINT,
148
/* : */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
149
/* ; */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
150
/* < */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
151
/* = */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
152
/* > */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
153
/* ? */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
154
/* @ */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
155
/* A */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
156
/* B */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
157
/* C */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
158
/* D */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
159
/* E */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
160
/* F */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
161
/* G */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
162
/* H */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
163
/* I */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
164
/* J */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
165
/* K */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
166
/* L */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
167
/* M */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
168
/* N */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
169
/* O */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
170
/* P */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
171
/* Q */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
172
/* R */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
173
/* S */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
174
/* T */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
175
/* U */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
176
/* V */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
177
/* W */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
178
/* X */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
179
/* Y */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
180
/* Z */ PG_ISALPHA | PG_ISUPPER | PG_ISGRAPH | PG_ISPRINT,
181
/* [ */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
182
/* \ */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
183
/* ] */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
184
/* ^ */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
185
/* _ */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
186
/* ` */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
187
/* a */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
188
/* b */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
189
/* c */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
190
/* d */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
191
/* e */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
192
/* f */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
193
/* g */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
194
/* h */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
195
/* i */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
196
/* j */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
197
/* k */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
198
/* l */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
199
/* m */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
200
/* n */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
201
/* o */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
202
/* p */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
203
/* q */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
204
/* r */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
205
/* s */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
206
/* t */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
207
/* u */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
208
/* v */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
209
/* w */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
210
/* x */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
211
/* y */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
212
/* z */ PG_ISALPHA | PG_ISLOWER | PG_ISGRAPH | PG_ISPRINT,
213
/* { */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
214
/* | */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
215
/* } */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
216
/* ~ */ PG_ISGRAPH | PG_ISPRINT | PG_ISPUNCT,
222
* pg_set_regex_collation: set collation for these functions to obey
224
* This is called when beginning compilation or execution of a regexp.
225
* Since there's no need for re-entrancy of regexp operations, it's okay
226
* to store the results in static variables.
229
pg_set_regex_collation(Oid collation)
231
if (lc_ctype_is_c(collation))
233
/* C/POSIX collations use this path regardless of database encoding */
234
pg_regex_strategy = PG_REGEX_LOCALE_C;
239
if (collation == DEFAULT_COLLATION_OID)
241
else if (OidIsValid(collation))
244
* NB: pg_newlocale_from_collation will fail if not HAVE_LOCALE_T;
245
* the case of pg_regex_locale != 0 but not HAVE_LOCALE_T does
246
* not have to be considered below.
248
pg_regex_locale = pg_newlocale_from_collation(collation);
253
* This typically means that the parser could not resolve a
254
* conflict of implicit collations, so report it that way.
257
(errcode(ERRCODE_INDETERMINATE_COLLATION),
258
errmsg("could not determine which collation to use for regular expression"),
259
errhint("Use the COLLATE clause to set the collation explicitly.")));
262
#ifdef USE_WIDE_UPPER_LOWER
263
if (GetDatabaseEncoding() == PG_UTF8)
266
pg_regex_strategy = PG_REGEX_LOCALE_WIDE_L;
268
pg_regex_strategy = PG_REGEX_LOCALE_WIDE;
271
#endif /* USE_WIDE_UPPER_LOWER */
274
pg_regex_strategy = PG_REGEX_LOCALE_1BYTE_L;
276
pg_regex_strategy = PG_REGEX_LOCALE_1BYTE;
282
pg_wc_isdigit(pg_wchar c)
284
switch (pg_regex_strategy)
286
case PG_REGEX_LOCALE_C:
287
return (c <= (pg_wchar) 127 &&
288
(pg_char_properties[c] & PG_ISDIGIT));
289
case PG_REGEX_LOCALE_WIDE:
290
#ifdef USE_WIDE_UPPER_LOWER
291
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
292
return iswdigit((wint_t) c);
295
case PG_REGEX_LOCALE_1BYTE:
296
return (c <= (pg_wchar) UCHAR_MAX &&
297
isdigit((unsigned char) c));
298
case PG_REGEX_LOCALE_WIDE_L:
299
#if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
300
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
301
return iswdigit_l((wint_t) c, pg_regex_locale);
304
case PG_REGEX_LOCALE_1BYTE_L:
306
return (c <= (pg_wchar) UCHAR_MAX &&
307
isdigit_l((unsigned char) c, pg_regex_locale));
311
return 0; /* can't get here, but keep compiler quiet */
315
pg_wc_isalpha(pg_wchar c)
317
switch (pg_regex_strategy)
319
case PG_REGEX_LOCALE_C:
320
return (c <= (pg_wchar) 127 &&
321
(pg_char_properties[c] & PG_ISALPHA));
322
case PG_REGEX_LOCALE_WIDE:
323
#ifdef USE_WIDE_UPPER_LOWER
324
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
325
return iswalpha((wint_t) c);
328
case PG_REGEX_LOCALE_1BYTE:
329
return (c <= (pg_wchar) UCHAR_MAX &&
330
isalpha((unsigned char) c));
331
case PG_REGEX_LOCALE_WIDE_L:
332
#if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
333
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
334
return iswalpha_l((wint_t) c, pg_regex_locale);
337
case PG_REGEX_LOCALE_1BYTE_L:
339
return (c <= (pg_wchar) UCHAR_MAX &&
340
isalpha_l((unsigned char) c, pg_regex_locale));
344
return 0; /* can't get here, but keep compiler quiet */
348
pg_wc_isalnum(pg_wchar c)
350
switch (pg_regex_strategy)
352
case PG_REGEX_LOCALE_C:
353
return (c <= (pg_wchar) 127 &&
354
(pg_char_properties[c] & PG_ISALNUM));
355
case PG_REGEX_LOCALE_WIDE:
356
#ifdef USE_WIDE_UPPER_LOWER
357
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
358
return iswalnum((wint_t) c);
361
case PG_REGEX_LOCALE_1BYTE:
362
return (c <= (pg_wchar) UCHAR_MAX &&
363
isalnum((unsigned char) c));
364
case PG_REGEX_LOCALE_WIDE_L:
365
#if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
366
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
367
return iswalnum_l((wint_t) c, pg_regex_locale);
370
case PG_REGEX_LOCALE_1BYTE_L:
372
return (c <= (pg_wchar) UCHAR_MAX &&
373
isalnum_l((unsigned char) c, pg_regex_locale));
377
return 0; /* can't get here, but keep compiler quiet */
381
pg_wc_isupper(pg_wchar c)
383
switch (pg_regex_strategy)
385
case PG_REGEX_LOCALE_C:
386
return (c <= (pg_wchar) 127 &&
387
(pg_char_properties[c] & PG_ISUPPER));
388
case PG_REGEX_LOCALE_WIDE:
389
#ifdef USE_WIDE_UPPER_LOWER
390
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
391
return iswupper((wint_t) c);
394
case PG_REGEX_LOCALE_1BYTE:
395
return (c <= (pg_wchar) UCHAR_MAX &&
396
isupper((unsigned char) c));
397
case PG_REGEX_LOCALE_WIDE_L:
398
#if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
399
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
400
return iswupper_l((wint_t) c, pg_regex_locale);
403
case PG_REGEX_LOCALE_1BYTE_L:
405
return (c <= (pg_wchar) UCHAR_MAX &&
406
isupper_l((unsigned char) c, pg_regex_locale));
410
return 0; /* can't get here, but keep compiler quiet */
414
pg_wc_islower(pg_wchar c)
416
switch (pg_regex_strategy)
418
case PG_REGEX_LOCALE_C:
419
return (c <= (pg_wchar) 127 &&
420
(pg_char_properties[c] & PG_ISLOWER));
421
case PG_REGEX_LOCALE_WIDE:
422
#ifdef USE_WIDE_UPPER_LOWER
423
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
424
return iswlower((wint_t) c);
427
case PG_REGEX_LOCALE_1BYTE:
428
return (c <= (pg_wchar) UCHAR_MAX &&
429
islower((unsigned char) c));
430
case PG_REGEX_LOCALE_WIDE_L:
431
#if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
432
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
433
return iswlower_l((wint_t) c, pg_regex_locale);
436
case PG_REGEX_LOCALE_1BYTE_L:
438
return (c <= (pg_wchar) UCHAR_MAX &&
439
islower_l((unsigned char) c, pg_regex_locale));
443
return 0; /* can't get here, but keep compiler quiet */
447
pg_wc_isgraph(pg_wchar c)
449
switch (pg_regex_strategy)
451
case PG_REGEX_LOCALE_C:
452
return (c <= (pg_wchar) 127 &&
453
(pg_char_properties[c] & PG_ISGRAPH));
454
case PG_REGEX_LOCALE_WIDE:
455
#ifdef USE_WIDE_UPPER_LOWER
456
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
457
return iswgraph((wint_t) c);
460
case PG_REGEX_LOCALE_1BYTE:
461
return (c <= (pg_wchar) UCHAR_MAX &&
462
isgraph((unsigned char) c));
463
case PG_REGEX_LOCALE_WIDE_L:
464
#if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
465
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
466
return iswgraph_l((wint_t) c, pg_regex_locale);
469
case PG_REGEX_LOCALE_1BYTE_L:
471
return (c <= (pg_wchar) UCHAR_MAX &&
472
isgraph_l((unsigned char) c, pg_regex_locale));
476
return 0; /* can't get here, but keep compiler quiet */
480
pg_wc_isprint(pg_wchar c)
482
switch (pg_regex_strategy)
484
case PG_REGEX_LOCALE_C:
485
return (c <= (pg_wchar) 127 &&
486
(pg_char_properties[c] & PG_ISPRINT));
487
case PG_REGEX_LOCALE_WIDE:
488
#ifdef USE_WIDE_UPPER_LOWER
489
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
490
return iswprint((wint_t) c);
493
case PG_REGEX_LOCALE_1BYTE:
494
return (c <= (pg_wchar) UCHAR_MAX &&
495
isprint((unsigned char) c));
496
case PG_REGEX_LOCALE_WIDE_L:
497
#if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
498
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
499
return iswprint_l((wint_t) c, pg_regex_locale);
502
case PG_REGEX_LOCALE_1BYTE_L:
504
return (c <= (pg_wchar) UCHAR_MAX &&
505
isprint_l((unsigned char) c, pg_regex_locale));
509
return 0; /* can't get here, but keep compiler quiet */
513
pg_wc_ispunct(pg_wchar c)
515
switch (pg_regex_strategy)
517
case PG_REGEX_LOCALE_C:
518
return (c <= (pg_wchar) 127 &&
519
(pg_char_properties[c] & PG_ISPUNCT));
520
case PG_REGEX_LOCALE_WIDE:
521
#ifdef USE_WIDE_UPPER_LOWER
522
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
523
return iswpunct((wint_t) c);
526
case PG_REGEX_LOCALE_1BYTE:
527
return (c <= (pg_wchar) UCHAR_MAX &&
528
ispunct((unsigned char) c));
529
case PG_REGEX_LOCALE_WIDE_L:
530
#if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
531
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
532
return iswpunct_l((wint_t) c, pg_regex_locale);
535
case PG_REGEX_LOCALE_1BYTE_L:
537
return (c <= (pg_wchar) UCHAR_MAX &&
538
ispunct_l((unsigned char) c, pg_regex_locale));
542
return 0; /* can't get here, but keep compiler quiet */
546
pg_wc_isspace(pg_wchar c)
548
switch (pg_regex_strategy)
550
case PG_REGEX_LOCALE_C:
551
return (c <= (pg_wchar) 127 &&
552
(pg_char_properties[c] & PG_ISSPACE));
553
case PG_REGEX_LOCALE_WIDE:
554
#ifdef USE_WIDE_UPPER_LOWER
555
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
556
return iswspace((wint_t) c);
559
case PG_REGEX_LOCALE_1BYTE:
560
return (c <= (pg_wchar) UCHAR_MAX &&
561
isspace((unsigned char) c));
562
case PG_REGEX_LOCALE_WIDE_L:
563
#if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
564
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
565
return iswspace_l((wint_t) c, pg_regex_locale);
568
case PG_REGEX_LOCALE_1BYTE_L:
570
return (c <= (pg_wchar) UCHAR_MAX &&
571
isspace_l((unsigned char) c, pg_regex_locale));
575
return 0; /* can't get here, but keep compiler quiet */
579
pg_wc_toupper(pg_wchar c)
581
switch (pg_regex_strategy)
583
case PG_REGEX_LOCALE_C:
584
if (c <= (pg_wchar) 127)
585
return pg_ascii_toupper((unsigned char) c);
587
case PG_REGEX_LOCALE_WIDE:
588
/* force C behavior for ASCII characters, per comments above */
589
if (c <= (pg_wchar) 127)
590
return pg_ascii_toupper((unsigned char) c);
591
#ifdef USE_WIDE_UPPER_LOWER
592
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
593
return towupper((wint_t) c);
596
case PG_REGEX_LOCALE_1BYTE:
597
/* force C behavior for ASCII characters, per comments above */
598
if (c <= (pg_wchar) 127)
599
return pg_ascii_toupper((unsigned char) c);
600
if (c <= (pg_wchar) UCHAR_MAX)
601
return toupper((unsigned char) c);
603
case PG_REGEX_LOCALE_WIDE_L:
604
#if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
605
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
606
return towupper_l((wint_t) c, pg_regex_locale);
609
case PG_REGEX_LOCALE_1BYTE_L:
611
if (c <= (pg_wchar) UCHAR_MAX)
612
return toupper_l((unsigned char) c, pg_regex_locale);
616
return 0; /* can't get here, but keep compiler quiet */
620
pg_wc_tolower(pg_wchar c)
622
switch (pg_regex_strategy)
624
case PG_REGEX_LOCALE_C:
625
if (c <= (pg_wchar) 127)
626
return pg_ascii_tolower((unsigned char) c);
628
case PG_REGEX_LOCALE_WIDE:
629
/* force C behavior for ASCII characters, per comments above */
630
if (c <= (pg_wchar) 127)
631
return pg_ascii_tolower((unsigned char) c);
632
#ifdef USE_WIDE_UPPER_LOWER
633
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
634
return towlower((wint_t) c);
637
case PG_REGEX_LOCALE_1BYTE:
638
/* force C behavior for ASCII characters, per comments above */
639
if (c <= (pg_wchar) 127)
640
return pg_ascii_tolower((unsigned char) c);
641
if (c <= (pg_wchar) UCHAR_MAX)
642
return tolower((unsigned char) c);
644
case PG_REGEX_LOCALE_WIDE_L:
645
#if defined(HAVE_LOCALE_T) && defined(USE_WIDE_UPPER_LOWER)
646
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
647
return towlower_l((wint_t) c, pg_regex_locale);
650
case PG_REGEX_LOCALE_1BYTE_L:
652
if (c <= (pg_wchar) UCHAR_MAX)
653
return tolower_l((unsigned char) c, pg_regex_locale);
657
return 0; /* can't get here, but keep compiler quiet */