1
/*-------------------------------------------------------------------------
4
* like expression handling code.
7
* A big hack of the regexp.c code!! Contributed by
8
* Keith Parks <emkxp01@mtcc.demon.co.uk> (7/95).
10
* Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
11
* Portions Copyright (c) 1994, Regents of the University of California
14
* src/backend/utils/adt/like.c
16
*-------------------------------------------------------------------------
22
#include "catalog/pg_collation.h"
23
#include "mb/pg_wchar.h"
24
#include "utils/builtins.h"
25
#include "utils/pg_locale.h"
30
#define LIKE_ABORT (-1)
33
static int SB_MatchText(char *t, int tlen, char *p, int plen,
34
pg_locale_t locale, bool locale_is_c);
35
static text *SB_do_like_escape(text *, text *);
37
static int MB_MatchText(char *t, int tlen, char *p, int plen,
38
pg_locale_t locale, bool locale_is_c);
39
static text *MB_do_like_escape(text *, text *);
41
static int UTF8_MatchText(char *t, int tlen, char *p, int plen,
42
pg_locale_t locale, bool locale_is_c);
44
static int SB_IMatchText(char *t, int tlen, char *p, int plen,
45
pg_locale_t locale, bool locale_is_c);
47
static int GenericMatchText(char *s, int slen, char *p, int plen);
48
static int Generic_Text_IC_like(text *str, text *pat, Oid collation);
50
/*--------------------
51
* Support routine for MatchText. Compares given multibyte streams
52
* as wide characters. If they match, returns 1 otherwise returns 0.
56
wchareq(char *p1, char *p2)
60
/* Optimization: quickly compare the first byte. */
64
p1_len = pg_mblen(p1);
65
if (pg_mblen(p2) != p1_len)
68
/* They are the same length */
78
* Formerly we had a routine iwchareq() here that tried to do case-insensitive
79
* comparison of multibyte characters. It did not work at all, however,
80
* because it relied on tolower() which has a single-byte API ... and
81
* towlower() wouldn't be much better since we have no suitably cheap way
82
* of getting a single character transformed to the system's wchar_t format.
83
* So now, we just downcase the strings using lower() and apply regular LIKE
84
* comparison. This should be revisited when we install better locale support.
88
* We do handle case-insensitive matching for single-byte encodings using
89
* fold-on-the-fly processing, however.
92
SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
95
return pg_ascii_tolower(c);
98
return tolower_l(c, locale);
101
return pg_tolower(c);
105
#define NextByte(p, plen) ((p)++, (plen)--)
107
/* Set up to compile like_match.c for multibyte characters */
108
#define CHAREQ(p1, p2) wchareq((p1), (p2))
109
#define NextChar(p, plen) \
110
do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
111
#define CopyAdvChar(dst, src, srclen) \
112
do { int __l = pg_mblen(src); \
115
*(dst)++ = *(src)++; \
118
#define MatchText MB_MatchText
119
#define do_like_escape MB_do_like_escape
121
#include "like_match.c"
123
/* Set up to compile like_match.c for single-byte characters */
124
#define CHAREQ(p1, p2) (*(p1) == *(p2))
125
#define NextChar(p, plen) NextByte((p), (plen))
126
#define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
128
#define MatchText SB_MatchText
129
#define do_like_escape SB_do_like_escape
131
#include "like_match.c"
133
/* setup to compile like_match.c for single byte case insensitive matches */
134
#define MATCH_LOWER(t) SB_lower_char((unsigned char) (t), locale, locale_is_c)
135
#define NextChar(p, plen) NextByte((p), (plen))
136
#define MatchText SB_IMatchText
138
#include "like_match.c"
140
/* setup to compile like_match.c for UTF8 encoding, using fast NextChar */
142
#define NextChar(p, plen) \
143
do { (p)++; (plen)--; } while ((plen) > 0 && (*(p) & 0xC0) == 0x80 )
144
#define MatchText UTF8_MatchText
146
#include "like_match.c"
148
/* Generic for all cases not requiring inline case-folding */
150
GenericMatchText(char *s, int slen, char *p, int plen)
152
if (pg_database_encoding_max_length() == 1)
153
return SB_MatchText(s, slen, p, plen, 0, true);
154
else if (GetDatabaseEncoding() == PG_UTF8)
155
return UTF8_MatchText(s, slen, p, plen, 0, true);
157
return MB_MatchText(s, slen, p, plen, 0, true);
161
Generic_Text_IC_like(text *str, text *pat, Oid collation)
169
* For efficiency reasons, in the single byte case we don't call lower()
170
* on the pattern and text, but instead call SB_lower_char on each
171
* character. In the multi-byte case we don't have much choice :-(
174
if (pg_database_encoding_max_length() > 1)
176
/* lower's result is never packed, so OK to use old macros here */
177
pat = DatumGetTextP(DirectFunctionCall1Coll(lower, collation,
178
PointerGetDatum(pat)));
180
plen = (VARSIZE(pat) - VARHDRSZ);
181
str = DatumGetTextP(DirectFunctionCall1Coll(lower, collation,
182
PointerGetDatum(str)));
184
slen = (VARSIZE(str) - VARHDRSZ);
185
if (GetDatabaseEncoding() == PG_UTF8)
186
return UTF8_MatchText(s, slen, p, plen, 0, true);
188
return MB_MatchText(s, slen, p, plen, 0, true);
193
* Here we need to prepare locale information for SB_lower_char. This
194
* should match the methods used in str_tolower().
196
pg_locale_t locale = 0;
197
bool locale_is_c = false;
199
if (lc_ctype_is_c(collation))
201
else if (collation != DEFAULT_COLLATION_OID)
203
if (!OidIsValid(collation))
206
* This typically means that the parser could not resolve a
207
* conflict of implicit collations, so report it that way.
210
(errcode(ERRCODE_INDETERMINATE_COLLATION),
211
errmsg("could not determine which collation to use for ILIKE"),
212
errhint("Use the COLLATE clause to set the collation explicitly.")));
214
locale = pg_newlocale_from_collation(collation);
217
p = VARDATA_ANY(pat);
218
plen = VARSIZE_ANY_EXHDR(pat);
219
s = VARDATA_ANY(str);
220
slen = VARSIZE_ANY_EXHDR(str);
221
return SB_IMatchText(s, slen, p, plen, locale, locale_is_c);
226
* interface routines called by the function manager
230
namelike(PG_FUNCTION_ARGS)
232
Name str = PG_GETARG_NAME(0);
233
text *pat = PG_GETARG_TEXT_PP(1);
242
p = VARDATA_ANY(pat);
243
plen = VARSIZE_ANY_EXHDR(pat);
245
result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE);
247
PG_RETURN_BOOL(result);
251
namenlike(PG_FUNCTION_ARGS)
253
Name str = PG_GETARG_NAME(0);
254
text *pat = PG_GETARG_TEXT_PP(1);
263
p = VARDATA_ANY(pat);
264
plen = VARSIZE_ANY_EXHDR(pat);
266
result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE);
268
PG_RETURN_BOOL(result);
272
textlike(PG_FUNCTION_ARGS)
274
text *str = PG_GETARG_TEXT_PP(0);
275
text *pat = PG_GETARG_TEXT_PP(1);
282
s = VARDATA_ANY(str);
283
slen = VARSIZE_ANY_EXHDR(str);
284
p = VARDATA_ANY(pat);
285
plen = VARSIZE_ANY_EXHDR(pat);
287
result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE);
289
PG_RETURN_BOOL(result);
293
textnlike(PG_FUNCTION_ARGS)
295
text *str = PG_GETARG_TEXT_PP(0);
296
text *pat = PG_GETARG_TEXT_PP(1);
303
s = VARDATA_ANY(str);
304
slen = VARSIZE_ANY_EXHDR(str);
305
p = VARDATA_ANY(pat);
306
plen = VARSIZE_ANY_EXHDR(pat);
308
result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE);
310
PG_RETURN_BOOL(result);
314
bytealike(PG_FUNCTION_ARGS)
316
bytea *str = PG_GETARG_BYTEA_PP(0);
317
bytea *pat = PG_GETARG_BYTEA_PP(1);
324
s = VARDATA_ANY(str);
325
slen = VARSIZE_ANY_EXHDR(str);
326
p = VARDATA_ANY(pat);
327
plen = VARSIZE_ANY_EXHDR(pat);
329
result = (SB_MatchText(s, slen, p, plen, 0, true) == LIKE_TRUE);
331
PG_RETURN_BOOL(result);
335
byteanlike(PG_FUNCTION_ARGS)
337
bytea *str = PG_GETARG_BYTEA_PP(0);
338
bytea *pat = PG_GETARG_BYTEA_PP(1);
345
s = VARDATA_ANY(str);
346
slen = VARSIZE_ANY_EXHDR(str);
347
p = VARDATA_ANY(pat);
348
plen = VARSIZE_ANY_EXHDR(pat);
350
result = (SB_MatchText(s, slen, p, plen, 0, true) != LIKE_TRUE);
352
PG_RETURN_BOOL(result);
356
* Case-insensitive versions
360
nameiclike(PG_FUNCTION_ARGS)
362
Name str = PG_GETARG_NAME(0);
363
text *pat = PG_GETARG_TEXT_PP(1);
367
strtext = DatumGetTextP(DirectFunctionCall1(name_text,
369
result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) == LIKE_TRUE);
371
PG_RETURN_BOOL(result);
375
nameicnlike(PG_FUNCTION_ARGS)
377
Name str = PG_GETARG_NAME(0);
378
text *pat = PG_GETARG_TEXT_PP(1);
382
strtext = DatumGetTextP(DirectFunctionCall1(name_text,
384
result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) != LIKE_TRUE);
386
PG_RETURN_BOOL(result);
390
texticlike(PG_FUNCTION_ARGS)
392
text *str = PG_GETARG_TEXT_PP(0);
393
text *pat = PG_GETARG_TEXT_PP(1);
396
result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) == LIKE_TRUE);
398
PG_RETURN_BOOL(result);
402
texticnlike(PG_FUNCTION_ARGS)
404
text *str = PG_GETARG_TEXT_PP(0);
405
text *pat = PG_GETARG_TEXT_PP(1);
408
result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) != LIKE_TRUE);
410
PG_RETURN_BOOL(result);
414
* like_escape() --- given a pattern and an ESCAPE string,
415
* convert the pattern to use Postgres' standard backslash escape convention.
418
like_escape(PG_FUNCTION_ARGS)
420
text *pat = PG_GETARG_TEXT_PP(0);
421
text *esc = PG_GETARG_TEXT_PP(1);
424
if (pg_database_encoding_max_length() == 1)
425
result = SB_do_like_escape(pat, esc);
427
result = MB_do_like_escape(pat, esc);
429
PG_RETURN_TEXT_P(result);
433
* like_escape_bytea() --- given a pattern and an ESCAPE string,
434
* convert the pattern to use Postgres' standard backslash escape convention.
437
like_escape_bytea(PG_FUNCTION_ARGS)
439
bytea *pat = PG_GETARG_BYTEA_PP(0);
440
bytea *esc = PG_GETARG_BYTEA_PP(1);
441
bytea *result = SB_do_like_escape((text *) pat, (text *) esc);
443
PG_RETURN_BYTEA_P((bytea *) result);