1
1
/* Extended regular expression matching and search library.
2
Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free
3
Software Foundation, Inc.
2
Copyright (C) 2002-2013 Free Software Foundation, Inc.
4
3
This file is part of the GNU C Library.
5
4
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
7
This program is free software; you can redistribute it and/or modify
8
it under the terms of the GNU General Public License as published by
9
the Free Software Foundation; either version 3, or (at your option)
6
The GNU C Library is free software; you can redistribute it and/or
7
modify it under the terms of the GNU General Public
8
License as published by the Free Software Foundation; either
9
version 3 of the License, or (at your option) any later version.
12
This program is distributed in the hope that it will be useful,
11
The GNU C Library is distributed in the hope that it will be useful,
13
12
but WITHOUT ANY WARRANTY; without even the implied warranty of
14
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
GNU General Public License for more details.
13
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
General Public License for more details.
17
You should have received a copy of the GNU General Public License along
18
with this program; if not, write to the Free Software Foundation,
19
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
16
You should have received a copy of the GNU General Public
17
License along with the GNU C Library; if not, see
18
<http://www.gnu.org/licenses/>. */
21
20
static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
22
21
size_t length, reg_syntax_t syntax);
96
95
re_charset_t *mbcset,
97
96
Idx *char_class_alloc,
98
const unsigned char *class_name,
97
const char *class_name,
99
98
reg_syntax_t syntax);
100
99
#else /* not RE_ENABLE_I18N */
101
100
static reg_errcode_t build_equiv_class (bitset_t sbcset,
102
101
const unsigned char *name);
103
102
static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
105
const unsigned char *class_name,
104
const char *class_name,
106
105
reg_syntax_t syntax);
107
106
#endif /* not RE_ENABLE_I18N */
108
107
static bin_tree_t *build_charclass_op (re_dfa_t *dfa,
109
108
RE_TRANSLATE_TYPE trans,
110
const unsigned char *class_name,
111
const unsigned char *extra,
109
const char *class_name,
112
111
bool non_match, reg_errcode_t *err);
113
112
static bin_tree_t *create_tree (re_dfa_t *dfa,
114
113
bin_tree_t *left, bin_tree_t *right,
207
206
compiles PATTERN (of length LENGTH) and puts the result in BUFP.
208
207
Returns 0 if the pattern was valid, otherwise an error string.
210
Assumes the `allocated' (and perhaps `buffer') and `translate' fields
209
Assumes the 'allocated' (and perhaps 'buffer') and 'translate' fields
211
210
are set in BUFP on entry. */
242
241
weak_alias (__re_compile_pattern, re_compile_pattern)
245
/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
244
/* Set by 're_set_syntax' to the current regexp syntax to recognize. Can
246
245
also be assigned to arbitrarily: each pattern buffer stores its own
247
246
syntax, so it can be changed between regex compilations. */
248
247
/* This has no initializer because initialized variables in Emacs
274
273
re_compile_fastmap (bufp)
275
274
struct re_pattern_buffer *bufp;
277
re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
276
re_dfa_t *dfa = bufp->buffer;
278
277
char *fastmap = bufp->fastmap;
280
279
memset (fastmap, '\0', sizeof (char) * SBC_MAX);
308
307
re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
311
re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
310
re_dfa_t *dfa = bufp->buffer;
313
312
bool icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE));
314
313
for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
440
439
PREG is a regex_t *. We do not expect any fields to be initialized,
441
440
since POSIX says we shouldn't. Thus, we set
443
`buffer' to the compiled pattern;
444
`used' to the length of the compiled pattern;
445
`syntax' to RE_SYNTAX_POSIX_EXTENDED if the
442
'buffer' to the compiled pattern;
443
'used' to the length of the compiled pattern;
444
'syntax' to RE_SYNTAX_POSIX_EXTENDED if the
446
445
REG_EXTENDED bit in CFLAGS is set; otherwise, to
447
446
RE_SYNTAX_POSIX_BASIC;
448
`newline_anchor' to REG_NEWLINE being set in CFLAGS;
449
`fastmap' to an allocated space for the fastmap;
450
`fastmap_accurate' to zero;
451
`re_nsub' to the number of subexpressions in PATTERN.
447
'newline_anchor' to REG_NEWLINE being set in CFLAGS;
448
'fastmap' to an allocated space for the fastmap;
449
'fastmap_accurate' to zero;
450
're_nsub' to the number of subexpressions in PATTERN.
453
452
PATTERN is the address of the pattern string.
551
550
if (BE (errcode < 0
552
551
|| errcode >= (int) (sizeof (__re_error_msgid_idx)
553
552
/ sizeof (__re_error_msgid_idx[0])), 0))
554
/* Only error codes returned by the rest of the code should be passed
555
to this routine. If we are given anything else, or if other regex
556
code generates an invalid error code, then the program has a bug.
557
Dump core so we can fix it. */
558
553
msg = gettext ("unknown regexp error");
560
555
msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);
587
582
static const bitset_t utf8_sb_map =
589
584
/* Set the first 128 bits. */
590
# if 4 * BITSET_WORD_BITS < ASCII_CHARS
591
# error "bitset_word_t is narrower than 32 bits"
592
# elif 3 * BITSET_WORD_BITS < ASCII_CHARS
586
[0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX
588
# if 4 * BITSET_WORD_BITS < ASCII_CHARS
589
# error "bitset_word_t is narrower than 32 bits"
590
# elif 3 * BITSET_WORD_BITS < ASCII_CHARS
593
591
BITSET_WORD_MAX, BITSET_WORD_MAX, BITSET_WORD_MAX,
594
# elif 2 * BITSET_WORD_BITS < ASCII_CHARS
592
# elif 2 * BITSET_WORD_BITS < ASCII_CHARS
595
593
BITSET_WORD_MAX, BITSET_WORD_MAX,
596
# elif 1 * BITSET_WORD_BITS < ASCII_CHARS
594
# elif 1 * BITSET_WORD_BITS < ASCII_CHARS
600
598
>> (SBC_MAX % BITSET_WORD_BITS == 0
602
600
: BITSET_WORD_BITS - SBC_MAX % BITSET_WORD_BITS))
719
718
+ __re_error_msgid_idx[(int) REG_ESPACE]);
722
/* Since `re_exec' always passes NULL for the `regs' argument, we
721
/* Since 're_exec' always passes NULL for the 'regs' argument, we
723
722
don't need to initialize the pattern buffer fields which affect it. */
725
724
/* Match anchors at newlines. */
765
764
preg->regs_allocated = REGS_UNALLOCATED;
767
766
/* Initialize the dfa. */
768
dfa = (re_dfa_t *) preg->buffer;
769
768
if (BE (preg->allocated < sizeof (re_dfa_t), 0))
771
770
/* If zero allocated, but buffer is non-null, try to realloc
874
873
calculation below, and for similar doubling calculations
875
874
elsewhere. And it's <= rather than <, because some of the
876
875
doubling calculations add 1 afterwards. */
877
if (BE (SIZE_MAX / max_object_size / 2 <= pat_len, 0))
876
if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) / 2 <= pat_len, 0))
878
877
return REG_ESPACE;
880
879
dfa->nodes_alloc = pat_len + 1;
899
898
codeset_name = nl_langinfo (CODESET);
900
if (strcasecmp (codeset_name, "UTF-8") == 0
901
|| strcasecmp (codeset_name, "UTF8") == 0)
899
if ((codeset_name[0] == 'U' || codeset_name[0] == 'u')
900
&& (codeset_name[1] == 'T' || codeset_name[1] == 't')
901
&& (codeset_name[2] == 'F' || codeset_name[2] == 'f')
902
&& strcmp (codeset_name + 3 + (codeset_name[3] == '-'), "8") == 0)
902
903
dfa->is_utf8 = 1;
904
905
/* We check exhaustively in the loop below if this charset is a
948
949
internal_function
949
950
init_word_char (re_dfa_t *dfa)
952
955
dfa->word_ops_used = 1;
953
for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
956
if (BE (dfa->map_notascii == 0, 1))
958
bitset_word_t bits0 = 0x00000000;
959
bitset_word_t bits1 = 0x03ff0000;
960
bitset_word_t bits2 = 0x87fffffe;
961
bitset_word_t bits3 = 0x07fffffe;
962
if (BITSET_WORD_BITS == 64)
964
dfa->word_char[0] = bits1 << 31 << 1 | bits0;
965
dfa->word_char[1] = bits3 << 31 << 1 | bits2;
968
else if (BITSET_WORD_BITS == 32)
970
dfa->word_char[0] = bits0;
971
dfa->word_char[1] = bits1;
972
dfa->word_char[2] = bits2;
973
dfa->word_char[3] = bits3;
980
if (BE (dfa->is_utf8, 1))
982
memset (&dfa->word_char[i], '\0', (SBC_MAX - ch) / 8);
988
for (; i < BITSET_WORDS; ++i)
954
989
for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
955
990
if (isalnum (ch) || ch == '_')
956
991
dfa->word_char[i] |= (bitset_word_t) 1 << j;
962
997
free_workarea_compile (regex_t *preg)
964
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
999
re_dfa_t *dfa = preg->buffer;
965
1000
bin_tree_storage_t *storage, *next;
966
1001
for (storage = dfa->str_tree_storage; storage; storage = next)
1326
1361
static bin_tree_t *
1327
1362
lower_subexp (reg_errcode_t *err, regex_t *preg, bin_tree_t *node)
1329
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
1364
re_dfa_t *dfa = preg->buffer;
1330
1365
bin_tree_t *body = node->left;
1331
1366
bin_tree_t *op, *cls, *tree1, *tree;
1660
1695
/* If we have already calculated, skip it. */
1661
1696
if (dfa->eclosures[node_idx].nelem != 0)
1663
/* Calculate epsilon closure of `node_idx'. */
1698
/* Calculate epsilon closure of 'node_idx'. */
1664
1699
err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, true);
1665
1700
if (BE (err != REG_NOERROR, 0))
1711
1746
re_node_set eclosure_elem;
1712
1747
Idx edest = dfa->edests[node].elems[i];
1713
/* If calculating the epsilon closure of `edest' is in progress,
1748
/* If calculating the epsilon closure of 'edest' is in progress,
1714
1749
return intermediate result. */
1715
1750
if (dfa->eclosures[edest].nelem == REG_MISSING)
1717
1752
incomplete = true;
1720
/* If we haven't calculated the epsilon closure of `edest' yet,
1755
/* If we haven't calculated the epsilon closure of 'edest' yet,
1721
1756
calculate now. Otherwise use calculated epsilon closure. */
1722
1757
if (dfa->eclosures[edest].nelem == 0)
1729
1764
eclosure_elem = dfa->eclosures[edest];
1730
/* Merge the epsilon closure of `edest'. */
1765
/* Merge the epsilon closure of 'edest'. */
1731
1766
err = re_node_set_merge (&eclosure, &eclosure_elem);
1732
1767
if (BE (err != REG_NOERROR, 0))
1734
/* If the epsilon closure of `edest' is incomplete,
1769
/* If the epsilon closure of 'edest' is incomplete,
1735
1770
the epsilon closure of this node is also incomplete. */
1736
1771
if (dfa->eclosures[edest].nelem == 0)
2094
2129
/* Entry point of the parser.
2095
2130
Parse the regular expression REGEXP and return the structure tree.
2096
If an error is occured, ERR is set by error code, and return NULL.
2131
If an error occurs, ERR is set by error code, and return NULL.
2097
2132
This function build the following tree, from regular expression <reg_exp>:
2107
2142
parse (re_string_t *regexp, regex_t *preg, reg_syntax_t syntax,
2108
2143
reg_errcode_t *err)
2110
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2145
re_dfa_t *dfa = preg->buffer;
2111
2146
bin_tree_t *tree, *eor, *root;
2112
2147
re_token_t current_token;
2113
2148
dfa->syntax = syntax;
2136
2171
<branch1> <branch2>
2138
ALT means alternative, which represents the operator `|'. */
2173
ALT means alternative, which represents the operator '|'. */
2140
2175
static bin_tree_t *
2141
2176
parse_reg_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
2142
2177
reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
2144
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2179
re_dfa_t *dfa = preg->buffer;
2145
2180
bin_tree_t *tree, *branch = NULL;
2146
2181
tree = parse_branch (regexp, preg, token, syntax, nest, err);
2147
2182
if (BE (*err != REG_NOERROR && tree == NULL, 0))
2183
2218
reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
2185
2220
bin_tree_t *tree, *expr;
2186
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2221
re_dfa_t *dfa = preg->buffer;
2187
2222
tree = parse_expression (regexp, preg, token, syntax, nest, err);
2188
2223
if (BE (*err != REG_NOERROR && tree == NULL, 0))
2194
2229
expr = parse_expression (regexp, preg, token, syntax, nest, err);
2195
2230
if (BE (*err != REG_NOERROR && expr == NULL, 0))
2233
postorder (tree, free_tree, NULL);
2199
2236
if (tree != NULL && expr != NULL)
2201
tree = create_tree (dfa, tree, expr, CONCAT);
2238
bin_tree_t *newtree = create_tree (dfa, tree, expr, CONCAT);
2239
if (newtree == NULL)
2241
postorder (expr, free_tree, NULL);
2242
postorder (tree, free_tree, NULL);
2204
2243
*err = REG_ESPACE;
2208
2248
else if (tree == NULL)
2222
2262
parse_expression (re_string_t *regexp, regex_t *preg, re_token_t *token,
2223
2263
reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
2225
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2265
re_dfa_t *dfa = preg->buffer;
2226
2266
bin_tree_t *tree;
2227
2267
switch (token->type)
2379
2419
case OP_NOTWORD:
2380
2420
tree = build_charclass_op (dfa, regexp->trans,
2381
(const unsigned char *) "alnum",
2382
(const unsigned char *) "_",
2383
2423
token->type == OP_NOTWORD, err);
2384
2424
if (BE (*err != REG_NOERROR && tree == NULL, 0))
2388
2428
case OP_NOTSPACE:
2389
2429
tree = build_charclass_op (dfa, regexp->trans,
2390
(const unsigned char *) "space",
2391
(const unsigned char *) "",
2392
2432
token->type == OP_NOTSPACE, err);
2393
2433
if (BE (*err != REG_NOERROR && tree == NULL, 0))
2438
2478
parse_sub_exp (re_string_t *regexp, regex_t *preg, re_token_t *token,
2439
2479
reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
2441
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
2481
re_dfa_t *dfa = preg->buffer;
2442
2482
bin_tree_t *tree;
2443
2483
size_t cur_nsub;
2444
2484
cur_nsub = preg->re_nsub++;
2453
2493
tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
2454
2494
if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0))
2497
postorder (tree, free_tree, NULL);
2456
2500
if (BE (*err != REG_NOERROR, 0))
2570
2620
old_tree = NULL;
2572
2622
if (elem->token.type == SUBEXP)
2573
postorder (elem, mark_opt_subexp, (void *) (long) elem->token.opr.idx);
2624
uintptr_t subidx = elem->token.opr.idx;
2625
postorder (elem, mark_opt_subexp, (void *) subidx);
2575
2628
tree = create_tree (dfa, elem, NULL,
2576
2629
(end == REG_MISSING ? OP_DUP_ASTERISK : OP_ALT));
2616
2669
Build the range expression which starts from START_ELEM, and ends
2617
2670
at END_ELEM. The result are written to MBCSET and SBCSET.
2618
2671
RANGE_ALLOC is the allocated size of mbcset->range_starts, and
2619
mbcset->range_ends, is a pointer argument sinse we may
2672
mbcset->range_ends, is a pointer argument since we may
2622
2675
static reg_errcode_t
2656
2709
wint_t start_wc;
2658
wchar_t cmp_buf[6] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
2660
2712
start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
2661
2713
: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
2669
2721
? __btowc (end_ch) : end_elem->opr.wch);
2670
2722
if (start_wc == WEOF || end_wc == WEOF)
2671
2723
return REG_ECOLLATE;
2672
cmp_buf[0] = start_wc;
2673
cmp_buf[4] = end_wc;
2675
if (BE ((syntax & RE_NO_EMPTY_RANGES)
2676
&& wcscoll (cmp_buf, cmp_buf + 4) > 0, 0))
2724
else if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_wc > end_wc, 0))
2677
2725
return REG_ERANGE;
2679
2727
/* Got valid collation sequence values, add them as a new entry.
2714
2762
/* Build the table for single byte characters. */
2715
2763
for (wc = 0; wc < SBC_MAX; ++wc)
2718
if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
2719
&& wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
2765
if (start_wc <= wc && wc <= end_wc)
2720
2766
bitset_set (sbcset, wc);
2751
2797
static reg_errcode_t
2752
2798
internal_function
2753
build_collating_symbol (bitset_t sbcset,
2754
2799
# ifdef RE_ENABLE_I18N
2755
re_charset_t *mbcset, Idx *coll_sym_alloc,
2757
const unsigned char *name)
2800
build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
2801
Idx *coll_sym_alloc, const unsigned char *name)
2802
# else /* not RE_ENABLE_I18N */
2803
build_collating_symbol (bitset_t sbcset, const unsigned char *name)
2804
# endif /* not RE_ENABLE_I18N */
2759
2806
size_t name_len = strlen ((const char *) name);
2760
2807
if (BE (name_len != 1, 0))
2782
2829
const int32_t *symb_table;
2783
2830
const unsigned char *extra;
2785
/* Local function for parse_bracket_exp used in _LIBC environement.
2786
Seek the collating symbol entry correspondings to NAME.
2787
Return the index of the symbol in the SYMB_TABLE. */
2832
/* Local function for parse_bracket_exp used in _LIBC environment.
2833
Seek the collating symbol entry corresponding to NAME.
2834
Return the index of the symbol in the SYMB_TABLE,
2835
or -1 if not found. */
2789
2837
auto inline int32_t
2790
__attribute ((always_inline))
2791
seek_collating_symbol_entry (name, name_len)
2792
const unsigned char *name;
2838
__attribute__ ((always_inline))
2839
seek_collating_symbol_entry (const unsigned char *name, size_t name_len)
2795
int32_t hash = elem_hash ((const char *) name, name_len);
2796
int32_t elem = hash % table_size;
2797
if (symb_table[2 * elem] != 0)
2799
int32_t second = hash % (table_size - 2) + 1;
2803
/* First compare the hashing value. */
2804
if (symb_table[2 * elem] == hash
2805
/* Compare the length of the name. */
2806
&& name_len == extra[symb_table[2 * elem + 1]]
2807
/* Compare the name. */
2808
&& memcmp (name, &extra[symb_table[2 * elem + 1] + 1],
2811
/* Yep, this is the entry. */
2818
while (symb_table[2 * elem] != 0);
2843
for (elem = 0; elem < table_size; elem++)
2844
if (symb_table[2 * elem] != 0)
2846
int32_t idx = symb_table[2 * elem + 1];
2847
/* Skip the name of collating element name. */
2848
idx += 1 + extra[idx];
2849
if (/* Compare the length of the name. */
2850
name_len == extra[idx]
2851
/* Compare the name. */
2852
&& memcmp (name, &extra[idx + 1], name_len) == 0)
2853
/* Yep, this is the entry. */
2823
2859
/* Local function for parse_bracket_exp used in _LIBC environment.
2825
2861
Return the value if succeeded, UINT_MAX otherwise. */
2827
2863
auto inline unsigned int
2828
__attribute ((always_inline))
2829
lookup_collation_sequence_value (br_elem)
2830
bracket_elem_t *br_elem;
2864
__attribute__ ((always_inline))
2865
lookup_collation_sequence_value (bracket_elem_t *br_elem)
2832
2867
if (br_elem->type == SB_CHAR)
2873
2908
/* Return the collation sequence value. */
2874
2909
return *(unsigned int *) (extra + idx);
2876
else if (symb_table[2 * elem] == 0 && sym_name_len == 1)
2911
else if (sym_name_len == 1)
2878
2913
/* No valid character. Match it as a single byte
2886
2921
return UINT_MAX;
2889
/* Local function for parse_bracket_exp used in _LIBC environement.
2924
/* Local function for parse_bracket_exp used in _LIBC environment.
2890
2925
Build the range expression which starts from START_ELEM, and ends
2891
2926
at END_ELEM. The result are written to MBCSET and SBCSET.
2892
2927
RANGE_ALLOC is the allocated size of mbcset->range_starts, and
2893
mbcset->range_ends, is a pointer argument sinse we may
2928
mbcset->range_ends, is a pointer argument since we may
2896
2931
auto inline reg_errcode_t
2897
__attribute ((always_inline))
2898
build_range_exp (sbcset, mbcset, range_alloc, start_elem, end_elem)
2899
re_charset_t *mbcset;
2902
bracket_elem_t *start_elem, *end_elem;
2932
__attribute__ ((always_inline))
2933
build_range_exp (bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,
2934
bracket_elem_t *start_elem, bracket_elem_t *end_elem)
2904
2936
unsigned int ch;
2905
2937
uint32_t start_collseq;
2970
3003
return REG_NOERROR;
2973
/* Local function for parse_bracket_exp used in _LIBC environement.
3006
/* Local function for parse_bracket_exp used in _LIBC environment.
2974
3007
Build the collating element which is represented by NAME.
2975
3008
The result are written to MBCSET and SBCSET.
2976
3009
COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
2977
pointer argument sinse we may update it. */
3010
pointer argument since we may update it. */
2979
3012
auto inline reg_errcode_t
2980
__attribute ((always_inline))
2981
build_collating_symbol (sbcset, mbcset, coll_sym_alloc, name)
2982
re_charset_t *mbcset;
2983
Idx *coll_sym_alloc;
2985
const unsigned char *name;
3013
__attribute__ ((always_inline))
3014
build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
3015
Idx *coll_sym_alloc, const unsigned char *name)
2987
3017
int32_t elem, idx;
2988
3018
size_t name_len = strlen ((const char *) name);
2989
3019
if (nrules != 0)
2991
3021
elem = seek_collating_symbol_entry (name, name_len);
2992
if (symb_table[2 * elem] != 0)
2994
3024
/* We found the entry. */
2995
3025
idx = symb_table[2 * elem + 1];
2996
3026
/* Skip the name of collating element name. */
2997
3027
idx += 1 + extra[idx];
2999
else if (symb_table[2 * elem] == 0 && name_len == 1)
3029
else if (name_len == 1)
3001
3031
/* No valid character, treat it as a normal
3235
3269
#ifdef RE_ENABLE_I18N
3236
3270
mbcset, &char_class_alloc,
3237
3271
#endif /* RE_ENABLE_I18N */
3238
start_elem.opr.name, syntax);
3272
(const char *) start_elem.opr.name,
3239
3274
if (BE (*err != REG_NOERROR, 0))
3240
3275
goto parse_bracket_exp_free_return;
3414
3449
Build the equivalence class which is represented by NAME.
3415
3450
The result are written to MBCSET and SBCSET.
3416
3451
EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,
3417
is a pointer argument sinse we may update it. */
3452
is a pointer argument since we may update it. */
3419
3454
static reg_errcode_t
3420
3455
#ifdef RE_ENABLE_I18N
3445
3480
_NL_COLLATE_EXTRAMB);
3446
3481
indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
3447
3482
_NL_COLLATE_INDIRECTMB);
3448
idx1 = findidx (&cp);
3449
if (BE (idx1 == 0 || cp < name + strlen ((const char *) name), 0))
3483
idx1 = findidx (&cp, -1);
3484
if (BE (idx1 == 0 || *cp != '\0', 0))
3450
3485
/* This isn't a valid character. */
3451
3486
return REG_ECOLLATE;
3453
/* Build single byte matcing table for this equivalence class. */
3454
char_buf[1] = (unsigned char) '\0';
3488
/* Build single byte matching table for this equivalence class. */
3455
3489
len = weights[idx1 & 0xffffff];
3456
3490
for (ch = 0; ch < SBC_MAX; ++ch)
3458
3492
char_buf[0] = ch;
3460
idx2 = findidx (&cp);
3494
idx2 = findidx (&cp, 1);
3462
3496
idx2 = table[ch];
3510
3544
Build the character class which is represented by NAME.
3511
3545
The result are written to MBCSET and SBCSET.
3512
3546
CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,
3513
is a pointer argument sinse we may update it. */
3547
is a pointer argument since we may update it. */
3515
3549
static reg_errcode_t
3516
3550
#ifdef RE_ENABLE_I18N
3517
3551
build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
3518
3552
re_charset_t *mbcset, Idx *char_class_alloc,
3519
const unsigned char *class_name, reg_syntax_t syntax)
3553
const char *class_name, reg_syntax_t syntax)
3520
3554
#else /* not RE_ENABLE_I18N */
3521
3555
build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
3522
const unsigned char *class_name, reg_syntax_t syntax)
3556
const char *class_name, reg_syntax_t syntax)
3523
3557
#endif /* not RE_ENABLE_I18N */
3526
const char *name = (const char *) class_name;
3560
const char *name = class_name;
3528
3562
/* In case of REG_ICASE "upper" and "lower" match the both of
3529
3563
upper and lower cases. */
3598
3632
static bin_tree_t *
3599
3633
build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
3600
const unsigned char *class_name,
3601
const unsigned char *extra, bool non_match,
3634
const char *class_name,
3635
const char *extra, bool non_match,
3602
3636
reg_errcode_t *err)
3604
3638
re_bitset_ptr_t sbcset;
3706
3740
/* This is intended for the expressions like "a{1,3}".
3707
Fetch a number from `input', and return the number.
3741
Fetch a number from 'input', and return the number.
3708
3742
Return REG_MISSING if the number field is empty like "{,1}".
3743
Return RE_DUP_MAX + 1 if the number field is too large.
3709
3744
Return REG_ERROR if an error occurred. */
3799
3835
static reg_errcode_t
3800
3836
mark_opt_subexp (void *extra, bin_tree_t *node)
3802
Idx idx = (Idx) (long) extra;
3838
Idx idx = (uintptr_t) extra;
3803
3839
if (node->token.type == SUBEXP && node->token.opr.idx == idx)
3804
3840
node->token.opt_subexp = 1;