1190
* Divide pattern into fixed prefix and remainder. XXX we have to assume
1191
* default collation here, because we don't have access to the actual
1192
* input collation for the operator. FIXME ...
1204
* Pull out any fixed prefix implied by the pattern, and estimate the
1205
* fractional selectivity of the remainder of the pattern. Unlike many of
1206
* the other functions in this file, we use the pattern operator's actual
1207
* collation for this step. This is not because we expect the collation
1208
* to make a big difference in the selectivity estimate (it seldom would),
1209
* but because we want to be sure we cache compiled regexps under the
1210
* right cache key, so that they can be re-used at runtime.
1194
1212
patt = (Const *) other;
1195
pstatus = pattern_fixed_prefix(patt, ptype, DEFAULT_COLLATION_OID,
1213
pstatus = pattern_fixed_prefix(patt, ptype, collation,
1214
&prefix, &rest_selec);
1199
* If necessary, coerce the prefix constant to the right type. (The "rest"
1200
* constant need not be changed.)
1217
* If necessary, coerce the prefix constant to the right type.
1202
1219
if (prefix && prefix->consttype != vartype)
5017
5028
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5018
5029
errmsg("regular-expression matching not supported on type bytea")));
5020
if (case_insensitive)
5022
/* If case-insensitive, we need locale info */
5023
if (lc_ctype_is_c(collation))
5025
else if (collation != DEFAULT_COLLATION_OID)
5027
if (!OidIsValid(collation))
5030
* This typically means that the parser could not resolve a
5031
* conflict of implicit collations, so report it that way.
5034
(errcode(ERRCODE_INDETERMINATE_COLLATION),
5035
errmsg("could not determine which collation to use for regular expression"),
5036
errhint("Use the COLLATE clause to set the collation explicitly.")));
5038
locale = pg_newlocale_from_collation(collation);
5042
/* the right-hand const is type text for all of these */
5043
patt = TextDatumGetCString(patt_const->constvalue);
5046
* Check for ARE director prefix. It's worth our trouble to recognize
5047
* this because similar_escape() used to use it, and some other code might
5048
* still use it, to force ARE mode.
5051
if (strncmp(patt, "***:", 4) == 0)
5054
/* Pattern must be anchored left */
5055
if (patt[pos] != '^')
5059
*prefix_const = NULL;
5060
*rest_const = string_to_const(rest, typeid);
5062
return Pattern_Prefix_None;
5067
* If '|' is present in pattern, then there may be multiple alternatives
5068
* for the start of the string. (There are cases where this isn't so, for
5069
* instance if the '|' is inside parens, but detecting that reliably is
5072
if (strchr(patt + pos, '|') != NULL)
5076
*prefix_const = NULL;
5077
*rest_const = string_to_const(rest, typeid);
5079
return Pattern_Prefix_None;
5082
/* OK, allocate space for pattern */
5083
match = palloc(strlen(patt) + 1);
5084
prev_match_pos = match_pos = 0;
5087
* We special-case the syntax '^(...)$' because psql uses it. But beware:
5088
* sequences beginning "(?" are not what they seem, unless they're "(?:".
5089
* (We must recognize that because of similar_escape().)
5091
have_leading_paren = false;
5092
if (patt[pos] == '(' &&
5093
(patt[pos + 1] != '?' || patt[pos + 2] == ':'))
5095
have_leading_paren = true;
5096
pos += (patt[pos + 1] != '?' ? 1 : 3);
5099
/* Scan remainder of pattern */
5106
* Check for characters that indicate multiple possible matches here.
5107
* Also, drop out at ')' or '$' so the termination test works right.
5109
if (patt[pos] == '.' ||
5117
/* Stop if case-varying character (it's sort of a wildcard) */
5118
if (case_insensitive &&
5119
pattern_char_isalpha(patt[pos], is_multibyte, locale, locale_is_c))
5123
* Check for quantifiers. Except for +, this means the preceding
5124
* character is optional, so we must remove it from the prefix too!
5126
if (patt[pos] == '*' ||
5130
match_pos = prev_match_pos;
5134
if (patt[pos] == '+')
5141
* Normally, backslash quotes the next character. But in AREs,
5142
* backslash followed by alphanumeric is an escape, not a quoted
5143
* character. Must treat it as having multiple possible matches.
5144
* Note: since only ASCII alphanumerics are escapes, we don't have to
5145
* be paranoid about multibyte or collations here.
5147
if (patt[pos] == '\\')
5149
if (isalnum((unsigned char) patt[pos + 1]))
5152
if (patt[pos] == '\0')
5155
/* save position in case we need to back up on next loop cycle */
5156
prev_match_pos = match_pos;
5158
/* must use encoding-aware processing here */
5159
len = pg_mblen(&patt[pos]);
5160
memcpy(&match[match_pos], &patt[pos], len);
5165
match[match_pos] = '\0';
5168
if (have_leading_paren && patt[pos] == ')')
5171
if (patt[pos] == '$' && patt[pos + 1] == '\0')
5173
rest = &patt[pos + 1];
5175
*prefix_const = string_to_const(match, typeid);
5176
*rest_const = string_to_const(rest, typeid);
5031
/* Use the regexp machinery to extract the prefix, if any */
5032
prefix = regexp_fixed_prefix(DatumGetTextPP(patt_const->constvalue),
5033
case_insensitive, collation,
5038
*prefix_const = NULL;
5040
if (rest_selec != NULL)
5042
char *patt = TextDatumGetCString(patt_const->constvalue);
5044
*rest_selec = regex_selectivity(patt, strlen(patt),
5050
return Pattern_Prefix_None;
5053
*prefix_const = string_to_const(prefix, typeid);
5055
if (rest_selec != NULL)
5059
/* Exact match, so there's no additional selectivity */
5064
char *patt = TextDatumGetCString(patt_const->constvalue);
5066
*rest_selec = regex_selectivity(patt, strlen(patt),
5181
5076
return Pattern_Prefix_Exact; /* pattern specifies exact match */
5184
*prefix_const = string_to_const(match, typeid);
5185
*rest_const = string_to_const(rest, typeid);
5191
5078
return Pattern_Prefix_Partial;
5193
return Pattern_Prefix_None;
5196
5081
Pattern_Prefix_Status
5197
5082
pattern_fixed_prefix(Const *patt, Pattern_Type ptype, Oid collation,
5198
Const **prefix, Const **rest)
5083
Const **prefix, Selectivity *rest_selec)
5200
5085
Pattern_Prefix_Status result;
5204
5089
case Pattern_Type_Like:
5205
result = like_fixed_prefix(patt, false, collation, prefix, rest);
5090
result = like_fixed_prefix(patt, false, collation,
5091
prefix, rest_selec);
5207
5093
case Pattern_Type_Like_IC:
5208
result = like_fixed_prefix(patt, true, collation, prefix, rest);
5094
result = like_fixed_prefix(patt, true, collation,
5095
prefix, rest_selec);
5210
5097
case Pattern_Type_Regex:
5211
result = regex_fixed_prefix(patt, false, collation, prefix, rest);
5098
result = regex_fixed_prefix(patt, false, collation,
5099
prefix, rest_selec);
5213
5101
case Pattern_Type_Regex_IC:
5214
result = regex_fixed_prefix(patt, true, collation, prefix, rest);
5102
result = regex_fixed_prefix(patt, true, collation,
5103
prefix, rest_selec);
5217
5106
elog(ERROR, "unrecognized ptype: %d", (int) ptype);