~ubuntu-branches/ubuntu/vivid/parted/vivid : contents of lib/regcomp.c at revision 133

~ubuntu-branches/ubuntu/vivid/parted/vivid : (revision 133)

1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	1	/* Extended regular expression matching and search library.
1.3.2 by Colin Watson Import upstream version 3.2	2	Copyright (C) 2002-2014 Free Software Foundation, Inc.
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	3	This file is part of the GNU C Library.
	4	Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
	5
1.3.2 by Colin Watson Import upstream version 3.2	6	The GNU C Library is free software; you can redistribute it and/or
	7	modify it under the terms of the GNU General Public
	8	License as published by the Free Software Foundation; either
	9	version 3 of the License, or (at your option) any later version.
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	10
1.3.2 by Colin Watson Import upstream version 3.2	11	The GNU C Library is distributed in the hope that it will be useful,
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	12	but WITHOUT ANY WARRANTY; without even the implied warranty of
1.3.2 by Colin Watson Import upstream version 3.2	13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1.3.2 by Colin Watson Import upstream version 3.2	14	General Public License for more details.
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	15
1.3.2 by Colin Watson Import upstream version 3.2	16	You should have received a copy of the GNU General Public
	17	License along with the GNU C Library; if not, see
	18	<http://www.gnu.org/licenses/>. */
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	19
	20	static reg_errcode_t re_compile_internal (regex_t preg, const char pattern,
	21	size_t length, reg_syntax_t syntax);
	22	static void re_compile_fastmap_iter (regex_t *bufp,
	23	const re_dfastate_t *init_state,
	24	char *fastmap);
	25	static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
	26	#ifdef RE_ENABLE_I18N
	27	static void free_charset (re_charset_t *cset);
	28	#endif /* RE_ENABLE_I18N */
	29	static void free_workarea_compile (regex_t *preg);
	30	static reg_errcode_t create_initial_state (re_dfa_t *dfa);
	31	#ifdef RE_ENABLE_I18N
	32	static void optimize_utf8 (re_dfa_t *dfa);
	33	#endif
	34	static reg_errcode_t analyze (regex_t *preg);
	35	static reg_errcode_t preorder (bin_tree_t *root,
	36	reg_errcode_t (fn (void , bin_tree_t )),
	37	void *extra);
	38	static reg_errcode_t postorder (bin_tree_t *root,
	39	reg_errcode_t (fn (void , bin_tree_t )),
	40	void *extra);
	41	static reg_errcode_t optimize_subexps (void extra, bin_tree_t node);
	42	static reg_errcode_t lower_subexps (void extra, bin_tree_t node);
	43	static bin_tree_t lower_subexp (reg_errcode_t err, regex_t *preg,
	44	bin_tree_t *node);
	45	static reg_errcode_t calc_first (void extra, bin_tree_t node);
	46	static reg_errcode_t calc_next (void extra, bin_tree_t node);
	47	static reg_errcode_t link_nfa_nodes (void extra, bin_tree_t node);
	48	static Idx duplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint);
	49	static Idx search_duplicated_node (const re_dfa_t *dfa, Idx org_node,
	50	unsigned int constraint);
	51	static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
	52	static reg_errcode_t calc_eclosure_iter (re_node_set new_set, re_dfa_t dfa,
	53	Idx node, bool root);
	54	static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);
	55	static Idx fetch_number (re_string_t input, re_token_t token,
	56	reg_syntax_t syntax);
	57	static int peek_token (re_token_t token, re_string_t input,
	58	reg_syntax_t syntax) internal_function;
	59	static bin_tree_t parse (re_string_t regexp, regex_t *preg,
	60	reg_syntax_t syntax, reg_errcode_t *err);
	61	static bin_tree_t parse_reg_exp (re_string_t regexp, regex_t *preg,
	62	re_token_t *token, reg_syntax_t syntax,
	63	Idx nest, reg_errcode_t *err);
	64	static bin_tree_t parse_branch (re_string_t regexp, regex_t *preg,
	65	re_token_t *token, reg_syntax_t syntax,
	66	Idx nest, reg_errcode_t *err);
	67	static bin_tree_t parse_expression (re_string_t regexp, regex_t *preg,
	68	re_token_t *token, reg_syntax_t syntax,
	69	Idx nest, reg_errcode_t *err);
	70	static bin_tree_t parse_sub_exp (re_string_t regexp, regex_t *preg,
	71	re_token_t *token, reg_syntax_t syntax,
	72	Idx nest, reg_errcode_t *err);
	73	static bin_tree_t parse_dup_op (bin_tree_t dup_elem, re_string_t *regexp,
	74	re_dfa_t dfa, re_token_t token,
	75	reg_syntax_t syntax, reg_errcode_t *err);
	76	static bin_tree_t parse_bracket_exp (re_string_t regexp, re_dfa_t *dfa,
	77	re_token_t *token, reg_syntax_t syntax,
	78	reg_errcode_t *err);
	79	static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
	80	re_string_t *regexp,
	81	re_token_t *token, int token_len,
	82	re_dfa_t *dfa,
83	reg_syntax_t syntax,
84	bool accept_hyphen);
85	static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
86	re_string_t *regexp,
87	re_token_t *token);
88	#ifdef RE_ENABLE_I18N
89	static reg_errcode_t build_equiv_class (bitset_t sbcset,
90	re_charset_t *mbcset,
91	Idx *equiv_class_alloc,
92	const unsigned char *name);
93	static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
94	bitset_t sbcset,
95	re_charset_t *mbcset,
96	Idx *char_class_alloc,
1.3.2 by Colin Watson Import upstream version 3.2	97	const char *class_name,
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	98	reg_syntax_t syntax);
	99	#else /* not RE_ENABLE_I18N */
	100	static reg_errcode_t build_equiv_class (bitset_t sbcset,
	101	const unsigned char *name);
	102	static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
	103	bitset_t sbcset,
1.3.2 by Colin Watson Import upstream version 3.2	104	const char *class_name,
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	105	reg_syntax_t syntax);
	106	#endif /* not RE_ENABLE_I18N */
	107	static bin_tree_t build_charclass_op (re_dfa_t dfa,
	108	RE_TRANSLATE_TYPE trans,
1.3.2 by Colin Watson Import upstream version 3.2	109	const char *class_name,
1.3.2 by Colin Watson Import upstream version 3.2	110	const char *extra,
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	111	bool non_match, reg_errcode_t *err);
	112	static bin_tree_t create_tree (re_dfa_t dfa,
	113	bin_tree_t left, bin_tree_t right,
	114	re_token_type_t type);
	115	static bin_tree_t create_token_tree (re_dfa_t dfa,
	116	bin_tree_t left, bin_tree_t right,
	117	const re_token_t *token);
	118	static bin_tree_t duplicate_tree (const bin_tree_t src, re_dfa_t *dfa);
	119	static void free_token (re_token_t *node);
	120	static reg_errcode_t free_tree (void extra, bin_tree_t node);
	121	static reg_errcode_t mark_opt_subexp (void extra, bin_tree_t node);
	122
	123	/* This table gives an error message for each of the error codes listed
	124	in regex.h. Obviously the order here has to be same as there.
	125	POSIX doesn't require that we do anything for REG_NOERROR,
	126	but why not be nice? */
	127
	128	static const char __re_error_msgid[] =
	129	{
	130	#define REG_NOERROR_IDX 0
	131	gettext_noop ("Success") /* REG_NOERROR */
	132	"\0"
	133	#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
	134	gettext_noop ("No match") /* REG_NOMATCH */
	135	"\0"
	136	#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match")
	137	gettext_noop ("Invalid regular expression") /* REG_BADPAT */
	138	"\0"
	139	#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
	140	gettext_noop ("Invalid collation character") /* REG_ECOLLATE */
	141	"\0"
	142	#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
	143	gettext_noop ("Invalid character class name") /* REG_ECTYPE */
	144	"\0"
	145	#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name")
	146	gettext_noop ("Trailing backslash") /* REG_EESCAPE */
	147	"\0"
	148	#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash")
	149	gettext_noop ("Invalid back reference") /* REG_ESUBREG */
	150	"\0"
	151	#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
	152	gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */
	153	"\0"
	154	#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
	155	gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
	156	"\0"
	157	#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
	158	gettext_noop ("Unmatched \\{") /* REG_EBRACE */
	159	"\0"
	160	#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{")
	161	gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */
	162	"\0"
	163	#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
	164	gettext_noop ("Invalid range end") /* REG_ERANGE */
	165	"\0"
	166	#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end")
	167	gettext_noop ("Memory exhausted") /* REG_ESPACE */
	168	"\0"
	169	#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted")
	170	gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */
	171	"\0"
	172	#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
	173	gettext_noop ("Premature end of regular expression") /* REG_EEND */
	174	"\0"
175	#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression")
176	gettext_noop ("Regular expression too big") /* REG_ESIZE */
177	"\0"
178	#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big")
179	gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
180	};
181
182	static const size_t __re_error_msgid_idx[] =
183	{
184	REG_NOERROR_IDX,
185	REG_NOMATCH_IDX,
186	REG_BADPAT_IDX,
187	REG_ECOLLATE_IDX,
188	REG_ECTYPE_IDX,
189	REG_EESCAPE_IDX,
190	REG_ESUBREG_IDX,
191	REG_EBRACK_IDX,
192	REG_EPAREN_IDX,
193	REG_EBRACE_IDX,
194	REG_BADBR_IDX,
195	REG_ERANGE_IDX,
196	REG_ESPACE_IDX,
197	REG_BADRPT_IDX,
198	REG_EEND_IDX,
199	REG_ESIZE_IDX,
200	REG_ERPAREN_IDX
201	};
202
203	/* Entry points for GNU code. */
204
205	/* re_compile_pattern is the GNU regular expression compiler: it
206	compiles PATTERN (of length LENGTH) and puts the result in BUFP.
207	Returns 0 if the pattern was valid, otherwise an error string.
208
1.3.1 by Colin Watson Import upstream version 3.1	209	Assumes the 'allocated' (and perhaps 'buffer') and 'translate' fields
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	210	are set in BUFP on entry. */
	211
	212	#ifdef _LIBC
	213	const char *
	214	re_compile_pattern (pattern, length, bufp)
	215	const char *pattern;
	216	size_t length;
	217	struct re_pattern_buffer *bufp;
	218	#else /* size_t might promote */
	219	const char *
	220	re_compile_pattern (const char *pattern, size_t length,
	221	struct re_pattern_buffer *bufp)
	222	#endif
	223	{
	224	reg_errcode_t ret;
	225
	226	/* And GNU code determines whether or not to get register information
	227	by passing null for the REGS argument to re_match, etc., not by
	228	setting no_sub, unless RE_NO_SUB is set. */
	229	bufp->no_sub = !!(re_syntax_options & RE_NO_SUB);
	230
	231	/* Match anchors at newline. */
	232	bufp->newline_anchor = 1;
	233
	234	ret = re_compile_internal (bufp, pattern, length, re_syntax_options);
	235
	236	if (!ret)
	237	return NULL;
	238	return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
	239	}
	240	#ifdef _LIBC
	241	weak_alias (__re_compile_pattern, re_compile_pattern)
	242	#endif
	243
1.3.1 by Colin Watson Import upstream version 3.1	244	/* Set by 're_set_syntax' to the current regexp syntax to recognize. Can
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	245	also be assigned to arbitrarily: each pattern buffer stores its own
	246	syntax, so it can be changed between regex compilations. */
	247	/* This has no initializer because initialized variables in Emacs
	248	become read-only after dumping. */
	249	reg_syntax_t re_syntax_options;
	250
	251
	252	/* Specify the precise syntax of regexps for compilation. This provides
	253	for compatibility for various utilities which historically have
	254	different, incompatible syntaxes.
	255
	256	The argument SYNTAX is a bit mask comprised of the various bits
	257	defined in regex.h. We return the old syntax. */
	258
	259	reg_syntax_t
	260	re_set_syntax (syntax)
	261	reg_syntax_t syntax;
	262	{
	263	reg_syntax_t ret = re_syntax_options;
	264
	265	re_syntax_options = syntax;
	266	return ret;
	267	}
	268	#ifdef _LIBC
	269	weak_alias (__re_set_syntax, re_set_syntax)
	270	#endif
	271
	272	int
	273	re_compile_fastmap (bufp)
	274	struct re_pattern_buffer *bufp;
	275	{
1.3.2 by Colin Watson Import upstream version 3.2	276	re_dfa_t *dfa = bufp->buffer;
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	277	char *fastmap = bufp->fastmap;
	278
	279	memset (fastmap, '\0', sizeof (char) * SBC_MAX);
	280	re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
	281	if (dfa->init_state != dfa->init_state_word)
	282	re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);
	283	if (dfa->init_state != dfa->init_state_nl)
	284	re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);
	285	if (dfa->init_state != dfa->init_state_begbuf)
	286	re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);
	287	bufp->fastmap_accurate = 1;
	288	return 0;
	289	}
	290	#ifdef _LIBC
	291	weak_alias (__re_compile_fastmap, re_compile_fastmap)
	292	#endif
	293
	294	static inline void
1.3.2 by Colin Watson Import upstream version 3.2	295	__attribute__ ((always_inline))
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	296	re_set_fastmap (char *fastmap, bool icase, int ch)
	297	{
	298	fastmap[ch] = 1;
	299	if (icase)
	300	fastmap[tolower (ch)] = 1;
	301	}
	302
	303	/* Helper function for re_compile_fastmap.
	304	Compile fastmap for the initial_state INIT_STATE. */
	305
	306	static void
	307	re_compile_fastmap_iter (regex_t bufp, const re_dfastate_t init_state,
	308	char *fastmap)
	309	{
1.3.2 by Colin Watson Import upstream version 3.2	310	re_dfa_t *dfa = bufp->buffer;
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	311	Idx node_cnt;
	312	bool icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE));
	313	for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)
	314	{
	315	Idx node = init_state->nodes.elems[node_cnt];
	316	re_token_type_t type = dfa->nodes[node].type;
	317
	318	if (type == CHARACTER)
	319	{
	320	re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
	321	#ifdef RE_ENABLE_I18N
	322	if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
	323	{
	324	unsigned char buf[MB_LEN_MAX];
	325	unsigned char *p;
	326	wchar_t wc;
	327	mbstate_t state;
	328
	329	p = buf;
	330	*p++ = dfa->nodes[node].opr.c;
	331	while (++node < dfa->nodes_len
	332	&& dfa->nodes[node].type == CHARACTER
	333	&& dfa->nodes[node].mb_partial)
	334	*p++ = dfa->nodes[node].opr.c;
	335	memset (&state, '\0', sizeof (state));
	336	if (__mbrtowc (&wc, (const char *) buf, p - buf,
	337	&state) == p - buf
	338	&& (__wcrtomb ((char *) buf, towlower (wc), &state)
	339	!= (size_t) -1))
	340	re_set_fastmap (fastmap, false, buf[0]);
	341	}
	342	#endif
	343	}
	344	else if (type == SIMPLE_BRACKET)
	345	{
	346	int i, ch;
	347	for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
	348	{
	349	int j;
	350	bitset_word_t w = dfa->nodes[node].opr.sbcset[i];
	351	for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
	352	if (w & ((bitset_word_t) 1 << j))
	353	re_set_fastmap (fastmap, icase, ch);
	354	}
	355	}
	356	#ifdef RE_ENABLE_I18N
	357	else if (type == COMPLEX_BRACKET)
	358	{
	359	re_charset_t *cset = dfa->nodes[node].opr.mbcset;
	360	Idx i;
	361
	362	# ifdef _LIBC
	363	/* See if we have to try all bytes which start multiple collation
	364	elements.
	365	e.g. In da_DK, we want to catch 'a' since "aa" is a valid
	366	collation element, and don't catch 'b' since 'b' is
	367	the only collation element which starts from 'b' (and
	368	it is caught by SIMPLE_BRACKET). */
	369	if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0
	370	&& (cset->ncoll_syms \|\| cset->nranges))
	371	{
	372	const int32_t table = (const int32_t )
	373	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
	374	for (i = 0; i < SBC_MAX; ++i)
375	if (table[i] < 0)
376	re_set_fastmap (fastmap, icase, i);
377	}
378	# endif /* _LIBC */
379
380	/* See if we have to start the match at all multibyte characters,
381	i.e. where we would not find an invalid sequence. This only
382	applies to multibyte character sets; for single byte character
383	sets, the SIMPLE_BRACKET again suffices. */
384	if (dfa->mb_cur_max > 1
1.2.2 by Otavio Salvador Import upstream version 2.1	385	&& (cset->nchar_classes \|\| cset->non_match \|\| cset->nranges
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	386	# ifdef _LIBC
	387	\|\| cset->nequiv_classes
	388	# endif /* _LIBC */
	389	))
	390	{
	391	unsigned char c = 0;
	392	do
	393	{
	394	mbstate_t mbs;
	395	memset (&mbs, 0, sizeof (mbs));
	396	if (__mbrtowc (NULL, (char *) &c, 1, &mbs) == (size_t) -2)
	397	re_set_fastmap (fastmap, false, (int) c);
	398	}
	399	while (++c != 0);
	400	}
	401
	402	else
	403	{
	404	/* ... Else catch all bytes which can start the mbchars. */
	405	for (i = 0; i < cset->nmbchars; ++i)
	406	{
	407	char buf[256];
	408	mbstate_t state;
	409	memset (&state, '\0', sizeof (state));
	410	if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)
	411	re_set_fastmap (fastmap, icase, (unsigned char ) buf);
	412	if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)
	413	{
	414	if (__wcrtomb (buf, towlower (cset->mbchars[i]), &state)
	415	!= (size_t) -1)
	416	re_set_fastmap (fastmap, false, (unsigned char ) buf);
	417	}
	418	}
	419	}
	420	}
	421	#endif /* RE_ENABLE_I18N */
	422	else if (type == OP_PERIOD
	423	#ifdef RE_ENABLE_I18N
	424	\|\| type == OP_UTF8_PERIOD
	425	#endif /* RE_ENABLE_I18N */
	426	\|\| type == END_OF_RE)
	427	{
	428	memset (fastmap, '\1', sizeof (char) * SBC_MAX);
	429	if (type == END_OF_RE)
	430	bufp->can_be_null = 1;
	431	return;
	432	}
	433	}
	434	}
	435
	436	/* Entry point for POSIX code. */
	437	/* regcomp takes a regular expression as a string and compiles it.
	438
	439	PREG is a regex_t *. We do not expect any fields to be initialized,
	440	since POSIX says we shouldn't. Thus, we set
	441
1.3.1 by Colin Watson Import upstream version 3.1	442	'buffer' to the compiled pattern;
	443	'used' to the length of the compiled pattern;
	444	'syntax' to RE_SYNTAX_POSIX_EXTENDED if the
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	445	REG_EXTENDED bit in CFLAGS is set; otherwise, to
	446	RE_SYNTAX_POSIX_BASIC;
1.3.1 by Colin Watson Import upstream version 3.1	447	'newline_anchor' to REG_NEWLINE being set in CFLAGS;
	448	'fastmap' to an allocated space for the fastmap;
	449	'fastmap_accurate' to zero;
	450	're_nsub' to the number of subexpressions in PATTERN.
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	451
	452	PATTERN is the address of the pattern string.
	453
	454	CFLAGS is a series of bits which affect compilation.
	455
	456	If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
	457	use POSIX basic syntax.
	458
	459	If REG_NEWLINE is set, then . and [^...] don't match newline.
	460	Also, regexec will try a match beginning after every newline.
	461
	462	If REG_ICASE is set, then we considers upper- and lowercase
	463	versions of letters to be equivalent when matching.
	464
	465	If REG_NOSUB is set, then when PREG is passed to regexec, that
	466	routine will report only success or failure, and nothing about the
	467	registers.
	468
	469	It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
	470	the return codes and their meanings.) */
	471
	472	int
	473	regcomp (preg, pattern, cflags)
	474	regex_t *_Restrict_ preg;
	475	const char *_Restrict_ pattern;
	476	int cflags;
	477	{
	478	reg_errcode_t ret;
	479	reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
	480	: RE_SYNTAX_POSIX_BASIC);
	481
	482	preg->buffer = NULL;
	483	preg->allocated = 0;
	484	preg->used = 0;
	485
	486	/* Try to allocate space for the fastmap. */
	487	preg->fastmap = re_malloc (char, SBC_MAX);
	488	if (BE (preg->fastmap == NULL, 0))
	489	return REG_ESPACE;
	490
	491	syntax \|= (cflags & REG_ICASE) ? RE_ICASE : 0;
	492
	493	/* If REG_NEWLINE is set, newlines are treated differently. */
	494	if (cflags & REG_NEWLINE)
	495	{ /* REG_NEWLINE implies neither . nor [^...] match newline. */
	496	syntax &= ~RE_DOT_NEWLINE;
	497	syntax \|= RE_HAT_LISTS_NOT_NEWLINE;
	498	/* It also changes the matching behavior. */
	499	preg->newline_anchor = 1;
	500	}
	501	else
	502	preg->newline_anchor = 0;
	503	preg->no_sub = !!(cflags & REG_NOSUB);
	504	preg->translate = NULL;
	505
	506	ret = re_compile_internal (preg, pattern, strlen (pattern), syntax);
	507
	508	/* POSIX doesn't distinguish between an unmatched open-group and an
	509	unmatched close-group: both are REG_EPAREN. */
	510	if (ret == REG_ERPAREN)
	511	ret = REG_EPAREN;
	512
	513	/* We have already checked preg->fastmap != NULL. */
	514	if (BE (ret == REG_NOERROR, 1))
515	/* Compute the fastmap now, since regexec cannot modify the pattern
516	buffer. This function never fails in this implementation. */
517	(void) re_compile_fastmap (preg);
518	else
519	{
520	/* Some error occurred while compiling the expression. */
521	re_free (preg->fastmap);
522	preg->fastmap = NULL;
523	}
524
525	return (int) ret;
526	}
527	#ifdef _LIBC
528	weak_alias (__regcomp, regcomp)
529	#endif
530
531	/* Returns a message corresponding to an error code, ERRCODE, returned
532	from either regcomp or regexec. We don't use PREG here. */
533
534	#ifdef _LIBC
535	size_t
536	regerror (errcode, preg, errbuf, errbuf_size)
537	int errcode;
538	const regex_t *_Restrict_ preg;
539	char *_Restrict_ errbuf;
540	size_t errbuf_size;
541	#else /* size_t might promote */
542	size_t
543	regerror (int errcode, const regex_t *_Restrict_ preg,
544	char *_Restrict_ errbuf, size_t errbuf_size)
545	#endif
546	{
547	const char *msg;
548	size_t msg_size;
549
550	if (BE (errcode < 0
551	\|\| errcode >= (int) (sizeof (__re_error_msgid_idx)
552	/ sizeof (__re_error_msgid_idx[0])), 0))
553	/* Only error codes returned by the rest of the code should be passed
554	to this routine. If we are given anything else, or if other regex
555	code generates an invalid error code, then the program has a bug.
556	Dump core so we can fix it. */
557	abort ();
558
559	msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);
560
561	msg_size = strlen (msg) + 1; /* Includes the null. */
562
563	if (BE (errbuf_size != 0, 1))
564	{
565	size_t cpy_size = msg_size;
566	if (BE (msg_size > errbuf_size, 0))
567	{
568	cpy_size = errbuf_size - 1;
569	errbuf[cpy_size] = '\0';
570	}
571	memcpy (errbuf, msg, cpy_size);
572	}
573
574	return msg_size;
575	}
576	#ifdef _LIBC
577	weak_alias (__regerror, regerror)
578	#endif
579
580
581	#ifdef RE_ENABLE_I18N
582	/* This static array is used for the map to single-byte characters when
583	UTF-8 is used. Otherwise we would allocate memory just to initialize
584	it the same all the time. UTF-8 is the preferred encoding so this is
585	a worthwhile optimization. */
586	static const bitset_t utf8_sb_map =
587	{
588	/* Set the first 128 bits. */
1.3.2 by Colin Watson Import upstream version 3.2	589	# if defined __GNUC__ && !defined __STRICT_ANSI__
1.3.1 by Colin Watson Import upstream version 3.1	590	[0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX
	591	# else
	592	# if 4 * BITSET_WORD_BITS < ASCII_CHARS
	593	# error "bitset_word_t is narrower than 32 bits"
	594	# elif 3 * BITSET_WORD_BITS < ASCII_CHARS
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	595	BITSET_WORD_MAX, BITSET_WORD_MAX, BITSET_WORD_MAX,
1.3.1 by Colin Watson Import upstream version 3.1	596	# elif 2 * BITSET_WORD_BITS < ASCII_CHARS
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	597	BITSET_WORD_MAX, BITSET_WORD_MAX,
1.3.1 by Colin Watson Import upstream version 3.1	598	# elif 1 * BITSET_WORD_BITS < ASCII_CHARS
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	599	BITSET_WORD_MAX,
1.3.1 by Colin Watson Import upstream version 3.1	600	# endif
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	601	(BITSET_WORD_MAX
	602	>> (SBC_MAX % BITSET_WORD_BITS == 0
	603	? 0
	604	: BITSET_WORD_BITS - SBC_MAX % BITSET_WORD_BITS))
1.3.1 by Colin Watson Import upstream version 3.1	605	# endif
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	606	};
	607	#endif
	608
	609
	610	static void
	611	free_dfa_content (re_dfa_t *dfa)
	612	{
	613	Idx i, j;
	614
	615	if (dfa->nodes)
	616	for (i = 0; i < dfa->nodes_len; ++i)
	617	free_token (dfa->nodes + i);
	618	re_free (dfa->nexts);
	619	for (i = 0; i < dfa->nodes_len; ++i)
	620	{
	621	if (dfa->eclosures != NULL)
	622	re_node_set_free (dfa->eclosures + i);
	623	if (dfa->inveclosures != NULL)
	624	re_node_set_free (dfa->inveclosures + i);
	625	if (dfa->edests != NULL)
	626	re_node_set_free (dfa->edests + i);
	627	}
	628	re_free (dfa->edests);
	629	re_free (dfa->eclosures);
	630	re_free (dfa->inveclosures);
	631	re_free (dfa->nodes);
	632
	633	if (dfa->state_table)
	634	for (i = 0; i <= dfa->state_hash_mask; ++i)
	635	{
	636	struct re_state_table_entry *entry = dfa->state_table + i;
	637	for (j = 0; j < entry->num; ++j)
	638	{
	639	re_dfastate_t *state = entry->array[j];
	640	free_state (state);
	641	}
1.2.3 by Colin Watson Import upstream version 2.2	642	re_free (entry->array);
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	643	}
	644	re_free (dfa->state_table);
	645	#ifdef RE_ENABLE_I18N
	646	if (dfa->sb_char != utf8_sb_map)
	647	re_free (dfa->sb_char);
	648	#endif
	649	re_free (dfa->subexp_map);
	650	#ifdef DEBUG
	651	re_free (dfa->re_str);
	652	#endif
	653
	654	re_free (dfa);
	655	}
	656
	657
	658	/* Free dynamically allocated space used by PREG. */
	659
	660	void
	661	regfree (preg)
	662	regex_t *preg;
	663	{
1.3.2 by Colin Watson Import upstream version 3.2	664	re_dfa_t *dfa = preg->buffer;
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	665	if (BE (dfa != NULL, 1))
1.3.2 by Colin Watson Import upstream version 3.2	666	{
	667	lock_fini (dfa->lock);
	668	free_dfa_content (dfa);
	669	}
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	670	preg->buffer = NULL;
	671	preg->allocated = 0;
	672
	673	re_free (preg->fastmap);
	674	preg->fastmap = NULL;
	675
	676	re_free (preg->translate);
	677	preg->translate = NULL;
	678	}
	679	#ifdef _LIBC
	680	weak_alias (__regfree, regfree)
	681	#endif
	682
	683	/* Entry points compatible with 4.2 BSD regex library. We don't define
	684	them unless specifically requested. */
	685
	686	#if defined _REGEX_RE_COMP \|\| defined _LIBC
	687
	688	/* BSD has one and only one pattern buffer. */
	689	static struct re_pattern_buffer re_comp_buf;
	690
	691	char *
	692	# ifdef _LIBC
	693	/* Make these definitions weak in libc, so POSIX programs can redefine
	694	these names if they don't use our functions, and still use
	695	regcomp/regexec above without link errors. */
	696	weak_function
	697	# endif
	698	re_comp (s)
	699	const char *s;
	700	{
	701	reg_errcode_t ret;
	702	char *fastmap;
	703
	704	if (!s)
	705	{
	706	if (!re_comp_buf.buffer)
	707	return gettext ("No previous regular expression");
	708	return 0;
	709	}
	710
	711	if (re_comp_buf.buffer)
	712	{
	713	fastmap = re_comp_buf.fastmap;
	714	re_comp_buf.fastmap = NULL;
	715	__regfree (&re_comp_buf);
	716	memset (&re_comp_buf, '\0', sizeof (re_comp_buf));
	717	re_comp_buf.fastmap = fastmap;
	718	}
	719
	720	if (re_comp_buf.fastmap == NULL)
	721	{
	722	re_comp_buf.fastmap = (char *) malloc (SBC_MAX);
	723	if (re_comp_buf.fastmap == NULL)
	724	return (char *) gettext (__re_error_msgid
	725	+ __re_error_msgid_idx[(int) REG_ESPACE]);
	726	}
	727
1.3.1 by Colin Watson Import upstream version 3.1	728	/* Since 're_exec' always passes NULL for the 'regs' argument, we
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	729	don't need to initialize the pattern buffer fields which affect it. */
	730
	731	/* Match anchors at newlines. */
	732	re_comp_buf.newline_anchor = 1;
	733
	734	ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options);
	735
	736	if (!ret)
	737	return NULL;
	738
1.3.1 by Colin Watson Import upstream version 3.1	739	/* Yes, we're discarding 'const' here if !HAVE_LIBINTL. */
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	740	return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
	741	}
	742
	743	#ifdef _LIBC
	744	libc_freeres_fn (free_mem)
	745	{
	746	__regfree (&re_comp_buf);
	747	}
	748	#endif
	749
	750	#endif /* _REGEX_RE_COMP */
	751
	752	/* Internal entry point.
	753	Compile the regular expression PATTERN, whose length is LENGTH.
	754	SYNTAX indicate regular expression's syntax. */
	755
	756	static reg_errcode_t
	757	re_compile_internal (regex_t preg, const char pattern, size_t length,
	758	reg_syntax_t syntax)
	759	{
	760	reg_errcode_t err = REG_NOERROR;
	761	re_dfa_t *dfa;
	762	re_string_t regexp;
	763
	764	/* Initialize the pattern buffer. */
	765	preg->fastmap_accurate = 0;
	766	preg->syntax = syntax;
	767	preg->not_bol = preg->not_eol = 0;
	768	preg->used = 0;
	769	preg->re_nsub = 0;
	770	preg->can_be_null = 0;
	771	preg->regs_allocated = REGS_UNALLOCATED;
	772
	773	/* Initialize the dfa. */
1.3.2 by Colin Watson Import upstream version 3.2	774	dfa = preg->buffer;
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	775	if (BE (preg->allocated < sizeof (re_dfa_t), 0))
	776	{
	777	/* If zero allocated, but buffer is non-null, try to realloc
	778	enough space. This loses if buffer's address is bogus, but
	779	that is the user's responsibility. If ->buffer is NULL this
	780	is a simple allocation. */
	781	dfa = re_realloc (preg->buffer, re_dfa_t, 1);
	782	if (dfa == NULL)
	783	return REG_ESPACE;
	784	preg->allocated = sizeof (re_dfa_t);
1.3.2 by Colin Watson Import upstream version 3.2	785	preg->buffer = dfa;
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	786	}
	787	preg->used = sizeof (re_dfa_t);
	788
	789	err = init_dfa (dfa, length);
1.3.2 by Colin Watson Import upstream version 3.2	790	if (BE (err == REG_NOERROR && lock_init (dfa->lock) != 0, 0))
1.3.2 by Colin Watson Import upstream version 3.2	791	err = REG_ESPACE;
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	792	if (BE (err != REG_NOERROR, 0))
	793	{
	794	free_dfa_content (dfa);
	795	preg->buffer = NULL;
	796	preg->allocated = 0;
	797	return err;
	798	}
	799	#ifdef DEBUG
	800	/* Note: length+1 will not overflow since it is checked in init_dfa. */
	801	dfa->re_str = re_malloc (char, length + 1);
	802	strncpy (dfa->re_str, pattern, length + 1);
	803	#endif
	804
	805	err = re_string_construct (&regexp, pattern, length, preg->translate,
	806	(syntax & RE_ICASE) != 0, dfa);
	807	if (BE (err != REG_NOERROR, 0))
	808	{
	809	re_compile_internal_free_return:
	810	free_workarea_compile (preg);
	811	re_string_destruct (&regexp);
1.3.2 by Colin Watson Import upstream version 3.2	812	lock_fini (dfa->lock);
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	813	free_dfa_content (dfa);
	814	preg->buffer = NULL;
	815	preg->allocated = 0;
	816	return err;
	817	}
	818
	819	/* Parse the regular expression, and build a structure tree. */
	820	preg->re_nsub = 0;
	821	dfa->str_tree = parse (&regexp, preg, syntax, &err);
	822	if (BE (dfa->str_tree == NULL, 0))
	823	goto re_compile_internal_free_return;
	824
	825	/* Analyze the tree and create the nfa. */
	826	err = analyze (preg);
	827	if (BE (err != REG_NOERROR, 0))
	828	goto re_compile_internal_free_return;
	829
	830	#ifdef RE_ENABLE_I18N
	831	/* If possible, do searching in single byte encoding to speed things up. */
	832	if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL)
	833	optimize_utf8 (dfa);
	834	#endif
	835
	836	/* Then create the initial state of the dfa. */
	837	err = create_initial_state (dfa);
	838
	839	/* Release work areas. */
	840	free_workarea_compile (preg);
	841	re_string_destruct (&regexp);
	842
	843	if (BE (err != REG_NOERROR, 0))
	844	{
1.3.2 by Colin Watson Import upstream version 3.2	845	lock_fini (dfa->lock);
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	846	free_dfa_content (dfa);
	847	preg->buffer = NULL;
	848	preg->allocated = 0;
	849	}
	850
	851	return err;
	852	}
	853
	854	/* Initialize DFA. We use the length of the regular expression PAT_LEN
	855	as the initial length of some arrays. */
	856
	857	static reg_errcode_t
	858	init_dfa (re_dfa_t *dfa, size_t pat_len)
	859	{
	860	__re_size_t table_size;
1.2.3 by Colin Watson Import upstream version 2.2	861	#ifndef _LIBC
1.3.2 by Colin Watson Import upstream version 3.2	862	const char *codeset_name;
1.2.3 by Colin Watson Import upstream version 2.2	863	#endif
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	864	#ifdef RE_ENABLE_I18N
	865	size_t max_i18n_object_size = MAX (sizeof (wchar_t), sizeof (wctype_t));
	866	#else
	867	size_t max_i18n_object_size = 0;
	868	#endif
	869	size_t max_object_size =
	870	MAX (sizeof (struct re_state_table_entry),
	871	MAX (sizeof (re_token_t),
	872	MAX (sizeof (re_node_set),
	873	MAX (sizeof (regmatch_t),
	874	max_i18n_object_size))));
	875
	876	memset (dfa, '\0', sizeof (re_dfa_t));
	877
	878	/* Force allocation of str_tree_storage the first time. */
	879	dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
	880
	881	/* Avoid overflows. The extra "/ 2" is for the table_size doubling
	882	calculation below, and for similar doubling calculations
	883	elsewhere. And it's <= rather than <, because some of the
	884	doubling calculations add 1 afterwards. */
1.3.1 by Colin Watson Import upstream version 3.1	885	if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) / 2 <= pat_len, 0))
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	886	return REG_ESPACE;
	887
	888	dfa->nodes_alloc = pat_len + 1;
	889	dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
	890
	891	/* table_size = 2 ^ ceil(log pat_len) */
	892	for (table_size = 1; ; table_size <<= 1)
	893	if (table_size > pat_len)
	894	break;
	895
	896	dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
	897	dfa->state_hash_mask = table_size - 1;
	898
	899	dfa->mb_cur_max = MB_CUR_MAX;
	900	#ifdef _LIBC
	901	if (dfa->mb_cur_max == 6
	902	&& strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0)
	903	dfa->is_utf8 = 1;
	904	dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
	905	!= 0);
	906	#else
1.2.3 by Colin Watson Import upstream version 2.2	907	codeset_name = nl_langinfo (CODESET);
1.3.2 by Colin Watson Import upstream version 3.2	908	if ((codeset_name[0] == 'U' \|\| codeset_name[0] == 'u')
	909	&& (codeset_name[1] == 'T' \|\| codeset_name[1] == 't')
	910	&& (codeset_name[2] == 'F' \|\| codeset_name[2] == 'f')
	911	&& strcmp (codeset_name + 3 + (codeset_name[3] == '-'), "8") == 0)
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	912	dfa->is_utf8 = 1;
	913
	914	/* We check exhaustively in the loop below if this charset is a
	915	superset of ASCII. */
	916	dfa->map_notascii = 0;
	917	#endif
	918
	919	#ifdef RE_ENABLE_I18N
	920	if (dfa->mb_cur_max > 1)
	921	{
	922	if (dfa->is_utf8)
	923	dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
	924	else
	925	{
	926	int i, j, ch;
	927
	928	dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
	929	if (BE (dfa->sb_char == NULL, 0))
	930	return REG_ESPACE;
	931
	932	/* Set the bits corresponding to single byte chars. */
	933	for (i = 0, ch = 0; i < BITSET_WORDS; ++i)
	934	for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
	935	{
	936	wint_t wch = __btowc (ch);
	937	if (wch != WEOF)
	938	dfa->sb_char[i] \|= (bitset_word_t) 1 << j;
	939	# ifndef _LIBC
	940	if (isascii (ch) && wch != ch)
	941	dfa->map_notascii = 1;
	942	# endif
	943	}
	944	}
	945	}
	946	#endif
	947
	948	if (BE (dfa->nodes == NULL \|\| dfa->state_table == NULL, 0))
	949	return REG_ESPACE;
	950	return REG_NOERROR;
	951	}
	952
	953	/* Initialize WORD_CHAR table, which indicate which character is
	954	"word". In this case "word" means that it is the word construction
	955	character used by some operators like "\<", "\>", etc. */
	956
	957	static void
	958	internal_function
	959	init_word_char (re_dfa_t *dfa)
	960	{
1.3.1 by Colin Watson Import upstream version 3.1	961	int i = 0;
	962	int j;
	963	int ch = 0;
1.3.2 by Colin Watson Import upstream version 3.2	964	dfa->word_ops_used = 1;
1.3.1 by Colin Watson Import upstream version 3.1	965	if (BE (dfa->map_notascii == 0, 1))
1.3.1 by Colin Watson Import upstream version 3.1	966	{
1.3.2 by Colin Watson Import upstream version 3.2	967	bitset_word_t bits0 = 0x00000000;
	968	bitset_word_t bits1 = 0x03ff0000;
	969	bitset_word_t bits2 = 0x87fffffe;
	970	bitset_word_t bits3 = 0x07fffffe;
1.3.1 by Colin Watson Import upstream version 3.1	971	if (BITSET_WORD_BITS == 64)
1.3.1 by Colin Watson Import upstream version 3.1	972	{
1.3.2 by Colin Watson Import upstream version 3.2	973	dfa->word_char[0] = bits1 << 31 << 1 \| bits0;
1.3.2 by Colin Watson Import upstream version 3.2	974	dfa->word_char[1] = bits3 << 31 << 1 \| bits2;
1.3.1 by Colin Watson Import upstream version 3.1	975	i = 2;
	976	}
	977	else if (BITSET_WORD_BITS == 32)
	978	{
1.3.2 by Colin Watson Import upstream version 3.2	979	dfa->word_char[0] = bits0;
	980	dfa->word_char[1] = bits1;
	981	dfa->word_char[2] = bits2;
	982	dfa->word_char[3] = bits3;
1.3.1 by Colin Watson Import upstream version 3.1	983	i = 4;
	984	}
	985	else
	986	goto general_case;
	987	ch = 128;
	988
	989	if (BE (dfa->is_utf8, 1))
	990	{
	991	memset (&dfa->word_char[i], '\0', (SBC_MAX - ch) / 8);
	992	return;
	993	}
	994	}
	995
	996	general_case:
	997	for (; i < BITSET_WORDS; ++i)
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	998	for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)
	999	if (isalnum (ch) \|\| ch == '_')
	1000	dfa->word_char[i] \|= (bitset_word_t) 1 << j;
	1001	}
	1002
	1003	/* Free the work area which are only used while compiling. */
	1004
	1005	static void
	1006	free_workarea_compile (regex_t *preg)
	1007	{
1.3.2 by Colin Watson Import upstream version 3.2	1008	re_dfa_t *dfa = preg->buffer;
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	1009	bin_tree_storage_t storage, next;
	1010	for (storage = dfa->str_tree_storage; storage; storage = next)
	1011	{
	1012	next = storage->next;
	1013	re_free (storage);
	1014	}
	1015	dfa->str_tree_storage = NULL;
	1016	dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
	1017	dfa->str_tree = NULL;
	1018	re_free (dfa->org_indices);
	1019	dfa->org_indices = NULL;
	1020	}
	1021
	1022	/* Create initial states for all contexts. */
	1023
	1024	static reg_errcode_t
	1025	create_initial_state (re_dfa_t *dfa)
	1026	{
	1027	Idx first, i;
	1028	reg_errcode_t err;
	1029	re_node_set init_nodes;
	1030
	1031	/* Initial states have the epsilon closure of the node which is
	1032	the first node of the regular expression. */
	1033	first = dfa->str_tree->first->node_idx;
	1034	dfa->init_node = first;
	1035	err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
	1036	if (BE (err != REG_NOERROR, 0))
	1037	return err;
	1038
	1039	/* The back-references which are in initial states can epsilon transit,
	1040	since in this case all of the subexpressions can be null.
	1041	Then we add epsilon closures of the nodes which are the next nodes of
	1042	the back-references. */
	1043	if (dfa->nbackref > 0)
	1044	for (i = 0; i < init_nodes.nelem; ++i)
	1045	{
	1046	Idx node_idx = init_nodes.elems[i];
	1047	re_token_type_t type = dfa->nodes[node_idx].type;
	1048
	1049	Idx clexp_idx;
	1050	if (type != OP_BACK_REF)
	1051	continue;
	1052	for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)
	1053	{
	1054	re_token_t *clexp_node;
	1055	clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
	1056	if (clexp_node->type == OP_CLOSE_SUBEXP
	1057	&& clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx)
	1058	break;
	1059	}
	1060	if (clexp_idx == init_nodes.nelem)
	1061	continue;
	1062
	1063	if (type == OP_BACK_REF)
	1064	{
	1065	Idx dest_idx = dfa->edests[node_idx].elems[0];
	1066	if (!re_node_set_contains (&init_nodes, dest_idx))
	1067	{
1.2.3 by Colin Watson Import upstream version 2.2	1068	reg_errcode_t merge_err
	1069	= re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
	1070	if (merge_err != REG_NOERROR)
	1071	return merge_err;
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	1072	i = 0;
	1073	}
	1074	}
	1075	}
	1076
	1077	/* It must be the first time to invoke acquire_state. */
	1078	dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
	1079	/* We don't check ERR here, since the initial state must not be NULL. */
	1080	if (BE (dfa->init_state == NULL, 0))
	1081	return err;
	1082	if (dfa->init_state->has_constraint)
	1083	{
	1084	dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
	1085	CONTEXT_WORD);
	1086	dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
	1087	CONTEXT_NEWLINE);
	1088	dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
	1089	&init_nodes,
	1090	CONTEXT_NEWLINE
	1091	\| CONTEXT_BEGBUF);
	1092	if (BE (dfa->init_state_word == NULL \|\| dfa->init_state_nl == NULL
	1093	\|\| dfa->init_state_begbuf == NULL, 0))
	1094	return err;
	1095	}
	1096	else
	1097	dfa->init_state_word = dfa->init_state_nl
	1098	= dfa->init_state_begbuf = dfa->init_state;
	1099
	1100	re_node_set_free (&init_nodes);
	1101	return REG_NOERROR;
	1102	}
	1103
	1104	#ifdef RE_ENABLE_I18N
	1105	/* If it is possible to do searching in single byte encoding instead of UTF-8
	1106	to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change
	1107	DFA nodes where needed. */
	1108
	1109	static void
	1110	optimize_utf8 (re_dfa_t *dfa)
	1111	{
	1112	Idx node;
	1113	int i;
	1114	bool mb_chars = false;
	1115	bool has_period = false;
	1116
	1117	for (node = 0; node < dfa->nodes_len; ++node)
	1118	switch (dfa->nodes[node].type)
	1119	{
	1120	case CHARACTER:
	1121	if (dfa->nodes[node].opr.c >= ASCII_CHARS)
	1122	mb_chars = true;
	1123	break;
	1124	case ANCHOR:
	1125	switch (dfa->nodes[node].opr.ctx_type)
	1126	{
	1127	case LINE_FIRST:
	1128	case LINE_LAST:
	1129	case BUF_FIRST:
	1130	case BUF_LAST:
	1131	break;
	1132	default:
	1133	/* Word anchors etc. cannot be handled. It's okay to test
	1134	opr.ctx_type since constraints (for all DFA nodes) are
	1135	created by ORing one or more opr.ctx_type values. */
1136	return;
1137	}
1138	break;
1139	case OP_PERIOD:
1.2.3 by Colin Watson Import upstream version 2.2	1140	has_period = true;
1.2.3 by Colin Watson Import upstream version 2.2	1141	break;
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	1142	case OP_BACK_REF:
	1143	case OP_ALT:
	1144	case END_OF_RE:
	1145	case OP_DUP_ASTERISK:
	1146	case OP_OPEN_SUBEXP:
	1147	case OP_CLOSE_SUBEXP:
	1148	break;
	1149	case COMPLEX_BRACKET:
	1150	return;
	1151	case SIMPLE_BRACKET:
	1152	/* Just double check. */
	1153	{
	1154	int rshift = (ASCII_CHARS % BITSET_WORD_BITS == 0
	1155	? 0
	1156	: BITSET_WORD_BITS - ASCII_CHARS % BITSET_WORD_BITS);
	1157	for (i = ASCII_CHARS / BITSET_WORD_BITS; i < BITSET_WORDS; ++i)
	1158	{
	1159	if (dfa->nodes[node].opr.sbcset[i] >> rshift != 0)
	1160	return;
	1161	rshift = 0;
	1162	}
	1163	}
	1164	break;
	1165	default:
	1166	abort ();
	1167	}
	1168
	1169	if (mb_chars \|\| has_period)
	1170	for (node = 0; node < dfa->nodes_len; ++node)
	1171	{
	1172	if (dfa->nodes[node].type == CHARACTER
	1173	&& dfa->nodes[node].opr.c >= ASCII_CHARS)
	1174	dfa->nodes[node].mb_partial = 0;
	1175	else if (dfa->nodes[node].type == OP_PERIOD)
	1176	dfa->nodes[node].type = OP_UTF8_PERIOD;
	1177	}
	1178
	1179	/* The search can be in single byte locale. */
	1180	dfa->mb_cur_max = 1;
	1181	dfa->is_utf8 = 0;
	1182	dfa->has_mb_node = dfa->nbackref > 0 \|\| has_period;
	1183	}
	1184	#endif
	1185
	1186	/* Analyze the structure tree, and calculate "first", "next", "edest",
	1187	"eclosure", and "inveclosure". */
	1188
	1189	static reg_errcode_t
	1190	analyze (regex_t *preg)
	1191	{
1.3.2 by Colin Watson Import upstream version 3.2	1192	re_dfa_t *dfa = preg->buffer;
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	1193	reg_errcode_t ret;
	1194
	1195	/* Allocate arrays. */
	1196	dfa->nexts = re_malloc (Idx, dfa->nodes_alloc);
	1197	dfa->org_indices = re_malloc (Idx, dfa->nodes_alloc);
	1198	dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
	1199	dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
	1200	if (BE (dfa->nexts == NULL \|\| dfa->org_indices == NULL \|\| dfa->edests == NULL
	1201	\|\| dfa->eclosures == NULL, 0))
	1202	return REG_ESPACE;
	1203
	1204	dfa->subexp_map = re_malloc (Idx, preg->re_nsub);
	1205	if (dfa->subexp_map != NULL)
	1206	{
	1207	Idx i;
	1208	for (i = 0; i < preg->re_nsub; i++)
	1209	dfa->subexp_map[i] = i;
	1210	preorder (dfa->str_tree, optimize_subexps, dfa);
	1211	for (i = 0; i < preg->re_nsub; i++)
	1212	if (dfa->subexp_map[i] != i)
	1213	break;
	1214	if (i == preg->re_nsub)
	1215	{
	1216	free (dfa->subexp_map);
	1217	dfa->subexp_map = NULL;
	1218	}
	1219	}
	1220
	1221	ret = postorder (dfa->str_tree, lower_subexps, preg);
	1222	if (BE (ret != REG_NOERROR, 0))
	1223	return ret;
	1224	ret = postorder (dfa->str_tree, calc_first, dfa);
	1225	if (BE (ret != REG_NOERROR, 0))
	1226	return ret;
	1227	preorder (dfa->str_tree, calc_next, dfa);
	1228	ret = preorder (dfa->str_tree, link_nfa_nodes, dfa);
	1229	if (BE (ret != REG_NOERROR, 0))
	1230	return ret;
	1231	ret = calc_eclosure (dfa);
	1232	if (BE (ret != REG_NOERROR, 0))
	1233	return ret;
	1234
	1235	/* We only need this during the prune_impossible_nodes pass in regexec.c;
	1236	skip it if p_i_n will not run, as calc_inveclosure can be quadratic. */
	1237	if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match)
	1238	\|\| dfa->nbackref)
	1239	{
	1240	dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
	1241	if (BE (dfa->inveclosures == NULL, 0))
1.2.3 by Colin Watson Import upstream version 2.2	1242	return REG_ESPACE;
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	1243	ret = calc_inveclosure (dfa);
	1244	}
	1245
	1246	return ret;
	1247	}
	1248
	1249	/* Our parse trees are very unbalanced, so we cannot use a stack to
	1250	implement parse tree visits. Instead, we use parent pointers and
	1251	some hairy code in these two functions. */
	1252	static reg_errcode_t
	1253	postorder (bin_tree_t root, reg_errcode_t (fn (void , bin_tree_t *)),
	1254	void *extra)
	1255	{
	1256	bin_tree_t node, prev;
	1257
	1258	for (node = root; ; )
	1259	{
	1260	/* Descend down the tree, preferably to the left (or to the right
	1261	if that's the only child). */
	1262	while (node->left \|\| node->right)
	1263	if (node->left)
1.2.3 by Colin Watson Import upstream version 2.2	1264	node = node->left;
	1265	else
	1266	node = node->right;
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	1267
	1268	do
	1269	{
	1270	reg_errcode_t err = fn (extra, node);
	1271	if (BE (err != REG_NOERROR, 0))
	1272	return err;
1.2.3 by Colin Watson Import upstream version 2.2	1273	if (node->parent == NULL)
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	1274	return REG_NOERROR;
	1275	prev = node;
	1276	node = node->parent;
	1277	}
	1278	/* Go up while we have a node that is reached from the right. */
	1279	while (node->right == prev \|\| node->right == NULL);
	1280	node = node->right;
	1281	}
	1282	}
	1283
	1284	static reg_errcode_t
	1285	preorder (bin_tree_t root, reg_errcode_t (fn (void , bin_tree_t *)),
	1286	void *extra)
	1287	{
	1288	bin_tree_t *node;
	1289
	1290	for (node = root; ; )
	1291	{
	1292	reg_errcode_t err = fn (extra, node);
	1293	if (BE (err != REG_NOERROR, 0))
	1294	return err;
	1295
	1296	/* Go to the left node, or up and to the right. */
	1297	if (node->left)
	1298	node = node->left;
	1299	else
	1300	{
	1301	bin_tree_t *prev = NULL;
	1302	while (node->right == prev \|\| node->right == NULL)
	1303	{
	1304	prev = node;
	1305	node = node->parent;
	1306	if (!node)
1.2.3 by Colin Watson Import upstream version 2.2	1307	return REG_NOERROR;
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	1308	}
	1309	node = node->right;
	1310	}
	1311	}
	1312	}
	1313
	1314	/* Optimization pass: if a SUBEXP is entirely contained, strip it and tell
	1315	re_search_internal to map the inner one's opr.idx to this one's. Adjust
	1316	backreferences as well. Requires a preorder visit. */
	1317	static reg_errcode_t
	1318	optimize_subexps (void extra, bin_tree_t node)
	1319	{
	1320	re_dfa_t dfa = (re_dfa_t ) extra;
	1321
	1322	if (node->token.type == OP_BACK_REF && dfa->subexp_map)
	1323	{
	1324	int idx = node->token.opr.idx;
	1325	node->token.opr.idx = dfa->subexp_map[idx];
	1326	dfa->used_bkref_map \|= 1 << node->token.opr.idx;
	1327	}
	1328
	1329	else if (node->token.type == SUBEXP
1.2.3 by Colin Watson Import upstream version 2.2	1330	&& node->left && node->left->token.type == SUBEXP)
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	1331	{
	1332	Idx other_idx = node->left->token.opr.idx;
	1333
	1334	node->left = node->left->left;
	1335	if (node->left)
1.2.3 by Colin Watson Import upstream version 2.2	1336	node->left->parent = node;
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	1337
	1338	dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
	1339	if (other_idx < BITSET_WORD_BITS)
	1340	dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx);
	1341	}
	1342
	1343	return REG_NOERROR;
	1344	}
	1345
	1346	/* Lowering pass: Turn each SUBEXP node into the appropriate concatenation
	1347	of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP. */
	1348	static reg_errcode_t
	1349	lower_subexps (void extra, bin_tree_t node)
	1350	{
	1351	regex_t preg = (regex_t ) extra;
	1352	reg_errcode_t err = REG_NOERROR;
	1353
	1354	if (node->left && node->left->token.type == SUBEXP)
	1355	{
	1356	node->left = lower_subexp (&err, preg, node->left);
	1357	if (node->left)
	1358	node->left->parent = node;
	1359	}
	1360	if (node->right && node->right->token.type == SUBEXP)
	1361	{
	1362	node->right = lower_subexp (&err, preg, node->right);
	1363	if (node->right)
	1364	node->right->parent = node;
	1365	}
	1366
	1367	return err;
	1368	}
	1369
	1370	static bin_tree_t *
	1371	lower_subexp (reg_errcode_t err, regex_t preg, bin_tree_t *node)
	1372	{
1.3.2 by Colin Watson Import upstream version 3.2	1373	re_dfa_t *dfa = preg->buffer;
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	1374	bin_tree_t *body = node->left;
	1375	bin_tree_t op, cls, tree1, tree;
	1376
	1377	if (preg->no_sub
	1378	/* We do not optimize empty subexpressions, because otherwise we may
	1379	have bad CONCAT nodes with NULL children. This is obviously not
	1380	very common, so we do not lose much. An example that triggers
	1381	this case is the sed "script" //x. */
	1382	&& node->left != NULL
	1383	&& (node->token.opr.idx >= BITSET_WORD_BITS
	1384	\|\| !(dfa->used_bkref_map
	1385	& ((bitset_word_t) 1 << node->token.opr.idx))))
	1386	return node->left;
	1387
	1388	/* Convert the SUBEXP node to the concatenation of an
	1389	OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP. */
	1390	op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP);
	1391	cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP);
	1392	tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls;
	1393	tree = create_tree (dfa, op, tree1, CONCAT);
	1394	if (BE (tree == NULL \|\| tree1 == NULL \|\| op == NULL \|\| cls == NULL, 0))
	1395	{
	1396	*err = REG_ESPACE;
	1397	return NULL;
	1398	}
	1399
	1400	op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx;
	1401	op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp;
	1402	return tree;
	1403	}
	1404
	1405	/* Pass 1 in building the NFA: compute FIRST and create unlinked automaton
	1406	nodes. Requires a postorder visit. */
	1407	static reg_errcode_t
	1408	calc_first (void extra, bin_tree_t node)
	1409	{
	1410	re_dfa_t dfa = (re_dfa_t ) extra;
	1411	if (node->token.type == CONCAT)
	1412	{
	1413	node->first = node->left->first;
	1414	node->node_idx = node->left->node_idx;
	1415	}
	1416	else
	1417	{
	1418	node->first = node;
	1419	node->node_idx = re_dfa_add_node (dfa, node->token);
	1420	if (BE (node->node_idx == REG_MISSING, 0))
1.2.3 by Colin Watson Import upstream version 2.2	1421	return REG_ESPACE;
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	1422	if (node->token.type == ANCHOR)
1.2.3 by Colin Watson Import upstream version 2.2	1423	dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type;
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	1424	}
	1425	return REG_NOERROR;
	1426	}
	1427
	1428	/* Pass 2: compute NEXT on the tree. Preorder visit. */
	1429	static reg_errcode_t
	1430	calc_next (void extra, bin_tree_t node)
	1431	{
	1432	switch (node->token.type)
	1433	{
	1434	case OP_DUP_ASTERISK:
	1435	node->left->next = node;
	1436	break;
	1437	case CONCAT:
	1438	node->left->next = node->right->first;
	1439	node->right->next = node->next;
	1440	break;
	1441	default:
	1442	if (node->left)
	1443	node->left->next = node->next;
	1444	if (node->right)
1.2.3 by Colin Watson Import upstream version 2.2	1445	node->right->next = node->next;
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	1446	break;
	1447	}
	1448	return REG_NOERROR;
	1449	}
	1450
	1451	/* Pass 3: link all DFA nodes to their NEXT node (any order will do). */
	1452	static reg_errcode_t
	1453	link_nfa_nodes (void extra, bin_tree_t node)
	1454	{
	1455	re_dfa_t dfa = (re_dfa_t ) extra;
	1456	Idx idx = node->node_idx;
	1457	reg_errcode_t err = REG_NOERROR;
	1458
	1459	switch (node->token.type)
	1460	{
	1461	case CONCAT:
	1462	break;
	1463
	1464	case END_OF_RE:
	1465	assert (node->next == NULL);
	1466	break;
	1467
	1468	case OP_DUP_ASTERISK:
	1469	case OP_ALT:
	1470	{
	1471	Idx left, right;
	1472	dfa->has_plural_match = 1;
	1473	if (node->left != NULL)
	1474	left = node->left->first->node_idx;
	1475	else
	1476	left = node->next->node_idx;
	1477	if (node->right != NULL)
	1478	right = node->right->first->node_idx;
	1479	else
	1480	right = node->next->node_idx;
	1481	assert (REG_VALID_INDEX (left));
	1482	assert (REG_VALID_INDEX (right));
	1483	err = re_node_set_init_2 (dfa->edests + idx, left, right);
	1484	}
	1485	break;
	1486
	1487	case ANCHOR:
	1488	case OP_OPEN_SUBEXP:
	1489	case OP_CLOSE_SUBEXP:
	1490	err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx);
	1491	break;
	1492
	1493	case OP_BACK_REF:
	1494	dfa->nexts[idx] = node->next->node_idx;
	1495	if (node->token.type == OP_BACK_REF)
1.2.3 by Colin Watson Import upstream version 2.2	1496	err = re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	1497	break;
	1498
	1499	default:
	1500	assert (!IS_EPSILON_NODE (node->token.type));
	1501	dfa->nexts[idx] = node->next->node_idx;
	1502	break;
	1503	}
	1504
	1505	return err;
	1506	}
	1507
	1508	/* Duplicate the epsilon closure of the node ROOT_NODE.
	1509	Note that duplicated nodes have constraint INIT_CONSTRAINT in addition
	1510	to their own constraint. */
	1511
	1512	static reg_errcode_t
	1513	internal_function
	1514	duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node,
	1515	Idx root_node, unsigned int init_constraint)
	1516	{
	1517	Idx org_node, clone_node;
	1518	bool ok;
	1519	unsigned int constraint = init_constraint;
	1520	for (org_node = top_org_node, clone_node = top_clone_node;;)
	1521	{
	1522	Idx org_dest, clone_dest;
	1523	if (dfa->nodes[org_node].type == OP_BACK_REF)
	1524	{
	1525	/* If the back reference epsilon-transit, its destination must
	1526	also have the constraint. Then duplicate the epsilon closure
	1527	of the destination of the back reference, and store it in
	1528	edests of the back reference. */
	1529	org_dest = dfa->nexts[org_node];
	1530	re_node_set_empty (dfa->edests + clone_node);
	1531	clone_dest = duplicate_node (dfa, org_dest, constraint);
	1532	if (BE (clone_dest == REG_MISSING, 0))
	1533	return REG_ESPACE;
	1534	dfa->nexts[clone_node] = dfa->nexts[org_node];
	1535	ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
	1536	if (BE (! ok, 0))
	1537	return REG_ESPACE;
	1538	}
	1539	else if (dfa->edests[org_node].nelem == 0)
	1540	{
	1541	/* In case of the node can't epsilon-transit, don't duplicate the
	1542	destination and store the original destination as the
	1543	destination of the node. */
	1544	dfa->nexts[clone_node] = dfa->nexts[org_node];
	1545	break;
	1546	}
	1547	else if (dfa->edests[org_node].nelem == 1)
	1548	{
	1549	/* In case of the node can epsilon-transit, and it has only one
	1550	destination. */
	1551	org_dest = dfa->edests[org_node].elems[0];
	1552	re_node_set_empty (dfa->edests + clone_node);
	1553	/* If the node is root_node itself, it means the epsilon closure
	1554	has a loop. Then tie it to the destination of the root_node. */
	1555	if (org_node == root_node && clone_node != org_node)
	1556	{
	1557	ok = re_node_set_insert (dfa->edests + clone_node, org_dest);
	1558	if (BE (! ok, 0))
	1559	return REG_ESPACE;
	1560	break;
1561	}
1562	/* In case the node has another constraint, append it. */
1563	constraint \|= dfa->nodes[org_node].constraint;
1564	clone_dest = duplicate_node (dfa, org_dest, constraint);
1565	if (BE (clone_dest == REG_MISSING, 0))
1566	return REG_ESPACE;
1567	ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1568	if (BE (! ok, 0))
1569	return REG_ESPACE;
1570	}
1571	else /* dfa->edests[org_node].nelem == 2 */
1572	{
1573	/* In case of the node can epsilon-transit, and it has two
1574	destinations. In the bin_tree_t and DFA, that's '\|' and ''. /
1575	org_dest = dfa->edests[org_node].elems[0];
1576	re_node_set_empty (dfa->edests + clone_node);
1577	/* Search for a duplicated node which satisfies the constraint. */
1578	clone_dest = search_duplicated_node (dfa, org_dest, constraint);
1579	if (clone_dest == REG_MISSING)
1580	{
1581	/* There is no such duplicated node, create a new one. */
1582	reg_errcode_t err;
1583	clone_dest = duplicate_node (dfa, org_dest, constraint);
1584	if (BE (clone_dest == REG_MISSING, 0))
1585	return REG_ESPACE;
1586	ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1587	if (BE (! ok, 0))
1588	return REG_ESPACE;
1589	err = duplicate_node_closure (dfa, org_dest, clone_dest,
1590	root_node, constraint);
1591	if (BE (err != REG_NOERROR, 0))
1592	return err;
1593	}
1594	else
1595	{
1.2.3 by Colin Watson Import upstream version 2.2	1596	/* There is a duplicated node which satisfies the constraint,
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	1597	use it to avoid infinite loop. */
	1598	ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
	1599	if (BE (! ok, 0))
	1600	return REG_ESPACE;
	1601	}
	1602
	1603	org_dest = dfa->edests[org_node].elems[1];
	1604	clone_dest = duplicate_node (dfa, org_dest, constraint);
	1605	if (BE (clone_dest == REG_MISSING, 0))
	1606	return REG_ESPACE;
	1607	ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
	1608	if (BE (! ok, 0))
	1609	return REG_ESPACE;
	1610	}
	1611	org_node = org_dest;
	1612	clone_node = clone_dest;
	1613	}
	1614	return REG_NOERROR;
	1615	}
	1616
	1617	/* Search for a node which is duplicated from the node ORG_NODE, and
	1618	satisfies the constraint CONSTRAINT. */
	1619
	1620	static Idx
	1621	search_duplicated_node (const re_dfa_t *dfa, Idx org_node,
	1622	unsigned int constraint)
	1623	{
	1624	Idx idx;
	1625	for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)
	1626	{
	1627	if (org_node == dfa->org_indices[idx]
	1628	&& constraint == dfa->nodes[idx].constraint)
	1629	return idx; /* Found. */
	1630	}
	1631	return REG_MISSING; /* Not found. */
	1632	}
	1633
	1634	/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
	1635	Return the index of the new node, or REG_MISSING if insufficient storage is
	1636	available. */
	1637
	1638	static Idx
	1639	duplicate_node (re_dfa_t *dfa, Idx org_idx, unsigned int constraint)
	1640	{
	1641	Idx dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
	1642	if (BE (dup_idx != REG_MISSING, 1))
	1643	{
	1644	dfa->nodes[dup_idx].constraint = constraint;
	1645	dfa->nodes[dup_idx].constraint \|= dfa->nodes[org_idx].constraint;
	1646	dfa->nodes[dup_idx].duplicated = 1;
	1647
	1648	/* Store the index of the original node. */
	1649	dfa->org_indices[dup_idx] = org_idx;
	1650	}
	1651	return dup_idx;
	1652	}
	1653
	1654	static reg_errcode_t
	1655	calc_inveclosure (re_dfa_t *dfa)
	1656	{
	1657	Idx src, idx;
	1658	bool ok;
	1659	for (idx = 0; idx < dfa->nodes_len; ++idx)
	1660	re_node_set_init_empty (dfa->inveclosures + idx);
1661
1662	for (src = 0; src < dfa->nodes_len; ++src)
1663	{
1664	Idx *elems = dfa->eclosures[src].elems;
1665	for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
1666	{
1667	ok = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);
1668	if (BE (! ok, 0))
1669	return REG_ESPACE;
1670	}
1671	}
1672
1673	return REG_NOERROR;
1674	}
1675
1676	/* Calculate "eclosure" for all the node in DFA. */
1677
1678	static reg_errcode_t
1679	calc_eclosure (re_dfa_t *dfa)
1680	{
1681	Idx node_idx;
1682	bool incomplete;
1683	#ifdef DEBUG
1684	assert (dfa->nodes_len > 0);
1685	#endif
1686	incomplete = false;
1687	/* For each nodes, calculate epsilon closure. */
1688	for (node_idx = 0; ; ++node_idx)
1689	{
1690	reg_errcode_t err;
1691	re_node_set eclosure_elem;
1692	if (node_idx == dfa->nodes_len)
1693	{
1694	if (!incomplete)
1695	break;
1696	incomplete = false;
1697	node_idx = 0;
1698	}
1699
1700	#ifdef DEBUG
1701	assert (dfa->eclosures[node_idx].nelem != REG_MISSING);
1702	#endif
1703
1704	/* If we have already calculated, skip it. */
1705	if (dfa->eclosures[node_idx].nelem != 0)
1706	continue;
1.3.1 by Colin Watson Import upstream version 3.1	1707	/* Calculate epsilon closure of 'node_idx'. */
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	1708	err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, true);
	1709	if (BE (err != REG_NOERROR, 0))
	1710	return err;
	1711
	1712	if (dfa->eclosures[node_idx].nelem == 0)
	1713	{
	1714	incomplete = true;
	1715	re_node_set_free (&eclosure_elem);
	1716	}
	1717	}
	1718	return REG_NOERROR;
	1719	}
	1720
	1721	/* Calculate epsilon closure of NODE. */
	1722
	1723	static reg_errcode_t
	1724	calc_eclosure_iter (re_node_set new_set, re_dfa_t dfa, Idx node, bool root)
	1725	{
	1726	reg_errcode_t err;
	1727	Idx i;
	1728	re_node_set eclosure;
1.2.3 by Colin Watson Import upstream version 2.2	1729	bool ok;
1.2.3 by Colin Watson Import upstream version 2.2	1730	bool incomplete = false;
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	1731	err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
	1732	if (BE (err != REG_NOERROR, 0))
	1733	return err;
	1734
	1735	/* This indicates that we are calculating this node now.
	1736	We reference this value to avoid infinite loop. */
	1737	dfa->eclosures[node].nelem = REG_MISSING;
	1738
	1739	/* If the current node has constraints, duplicate all nodes
	1740	since they must inherit the constraints. */
	1741	if (dfa->nodes[node].constraint
	1742	&& dfa->edests[node].nelem
	1743	&& !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
	1744	{
	1745	err = duplicate_node_closure (dfa, node, node, node,
	1746	dfa->nodes[node].constraint);
	1747	if (BE (err != REG_NOERROR, 0))
	1748	return err;
	1749	}
	1750
	1751	/* Expand each epsilon destination nodes. */
	1752	if (IS_EPSILON_NODE(dfa->nodes[node].type))
	1753	for (i = 0; i < dfa->edests[node].nelem; ++i)
	1754	{
	1755	re_node_set eclosure_elem;
	1756	Idx edest = dfa->edests[node].elems[i];
1.3.1 by Colin Watson Import upstream version 3.1	1757	/* If calculating the epsilon closure of 'edest' is in progress,
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	1758	return intermediate result. */
	1759	if (dfa->eclosures[edest].nelem == REG_MISSING)
	1760	{
	1761	incomplete = true;
	1762	continue;
	1763	}
1.3.1 by Colin Watson Import upstream version 3.1	1764	/* If we haven't calculated the epsilon closure of 'edest' yet,
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	1765	calculate now. Otherwise use calculated epsilon closure. */
	1766	if (dfa->eclosures[edest].nelem == 0)
	1767	{
	1768	err = calc_eclosure_iter (&eclosure_elem, dfa, edest, false);
	1769	if (BE (err != REG_NOERROR, 0))
	1770	return err;
	1771	}
	1772	else
	1773	eclosure_elem = dfa->eclosures[edest];
1.3.1 by Colin Watson Import upstream version 3.1	1774	/* Merge the epsilon closure of 'edest'. */
1.2.3 by Colin Watson Import upstream version 2.2	1775	err = re_node_set_merge (&eclosure, &eclosure_elem);
	1776	if (BE (err != REG_NOERROR, 0))
	1777	return err;
1.3.1 by Colin Watson Import upstream version 3.1	1778	/* If the epsilon closure of 'edest' is incomplete,
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	1779	the epsilon closure of this node is also incomplete. */
	1780	if (dfa->eclosures[edest].nelem == 0)
	1781	{
	1782	incomplete = true;
	1783	re_node_set_free (&eclosure_elem);
	1784	}
	1785	}
	1786
1.2.3 by Colin Watson Import upstream version 2.2	1787	/* An epsilon closure includes itself. */
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	1788	ok = re_node_set_insert (&eclosure, node);
	1789	if (BE (! ok, 0))
	1790	return REG_ESPACE;
	1791	if (incomplete && !root)
	1792	dfa->eclosures[node].nelem = 0;
	1793	else
	1794	dfa->eclosures[node] = eclosure;
	1795	*new_set = eclosure;
	1796	return REG_NOERROR;
	1797	}
	1798
	1799	/* Functions for token which are used in the parser. */
	1800
	1801	/* Fetch a token from INPUT.
	1802	We must not use this function inside bracket expressions. */
	1803
	1804	static void
	1805	internal_function
	1806	fetch_token (re_token_t result, re_string_t input, reg_syntax_t syntax)
	1807	{
	1808	re_string_skip_bytes (input, peek_token (result, input, syntax));
	1809	}
	1810
	1811	/* Peek a token from INPUT, and return the length of the token.
	1812	We must not use this function inside bracket expressions. */
	1813
	1814	static int
	1815	internal_function
	1816	peek_token (re_token_t token, re_string_t input, reg_syntax_t syntax)
	1817	{
	1818	unsigned char c;
	1819
	1820	if (re_string_eoi (input))
	1821	{
	1822	token->type = END_OF_RE;
	1823	return 0;
	1824	}
	1825
	1826	c = re_string_peek_byte (input, 0);
	1827	token->opr.c = c;
	1828
	1829	token->word_char = 0;
	1830	#ifdef RE_ENABLE_I18N
	1831	token->mb_partial = 0;
	1832	if (input->mb_cur_max > 1 &&
	1833	!re_string_first_byte (input, re_string_cur_idx (input)))
	1834	{
	1835	token->type = CHARACTER;
	1836	token->mb_partial = 1;
	1837	return 1;
	1838	}
	1839	#endif
	1840	if (c == '\\')
	1841	{
	1842	unsigned char c2;
	1843	if (re_string_cur_idx (input) + 1 >= re_string_length (input))
	1844	{
	1845	token->type = BACK_SLASH;
	1846	return 1;
	1847	}
	1848
	1849	c2 = re_string_peek_byte_case (input, 1);
	1850	token->opr.c = c2;
	1851	token->type = CHARACTER;
1852	#ifdef RE_ENABLE_I18N
1853	if (input->mb_cur_max > 1)
1854	{
1855	wint_t wc = re_string_wchar_at (input,
1856	re_string_cur_idx (input) + 1);
1857	token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
1858	}
1859	else
1860	#endif
1861	token->word_char = IS_WORD_CHAR (c2) != 0;
1862
1863	switch (c2)
1864	{
1865	case '\|':
1866	if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR))
1867	token->type = OP_ALT;
1868	break;
1869	case '1': case '2': case '3': case '4': case '5':
1870	case '6': case '7': case '8': case '9':
1871	if (!(syntax & RE_NO_BK_REFS))
1872	{
1873	token->type = OP_BACK_REF;
1874	token->opr.idx = c2 - '1';
1875	}
1876	break;
1877	case '<':
1878	if (!(syntax & RE_NO_GNU_OPS))
1879	{
1880	token->type = ANCHOR;
1881	token->opr.ctx_type = WORD_FIRST;
1882	}
1883	break;
1884	case '>':
1885	if (!(syntax & RE_NO_GNU_OPS))
1886	{
1887	token->type = ANCHOR;
1888	token->opr.ctx_type = WORD_LAST;
1889	}
1890	break;
1891	case 'b':
1892	if (!(syntax & RE_NO_GNU_OPS))
1893	{
1894	token->type = ANCHOR;
1895	token->opr.ctx_type = WORD_DELIM;
1896	}
1897	break;
1898	case 'B':
1899	if (!(syntax & RE_NO_GNU_OPS))
1900	{
1901	token->type = ANCHOR;
1902	token->opr.ctx_type = NOT_WORD_DELIM;
1903	}
1904	break;
1905	case 'w':
1906	if (!(syntax & RE_NO_GNU_OPS))
1907	token->type = OP_WORD;
1908	break;
1909	case 'W':
1910	if (!(syntax & RE_NO_GNU_OPS))
1911	token->type = OP_NOTWORD;
1912	break;
1913	case 's':
1914	if (!(syntax & RE_NO_GNU_OPS))
1915	token->type = OP_SPACE;
1916	break;
1917	case 'S':
1918	if (!(syntax & RE_NO_GNU_OPS))
1919	token->type = OP_NOTSPACE;
1920	break;
1921	case '`':
1922	if (!(syntax & RE_NO_GNU_OPS))
1923	{
1924	token->type = ANCHOR;
1925	token->opr.ctx_type = BUF_FIRST;
1926	}
1927	break;
1928	case '\'':
1929	if (!(syntax & RE_NO_GNU_OPS))
1930	{
1931	token->type = ANCHOR;
1932	token->opr.ctx_type = BUF_LAST;
1933	}
1934	break;
1935	case '(':
1936	if (!(syntax & RE_NO_BK_PARENS))
1937	token->type = OP_OPEN_SUBEXP;
1938	break;
1939	case ')':
1940	if (!(syntax & RE_NO_BK_PARENS))
1941	token->type = OP_CLOSE_SUBEXP;
1942	break;
1943	case '+':
1944	if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
1945	token->type = OP_DUP_PLUS;
1946	break;
1947	case '?':
1948	if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
1949	token->type = OP_DUP_QUESTION;
1950	break;
1951	case '{':
1952	if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
1953	token->type = OP_OPEN_DUP_NUM;
1954	break;
1955	case '}':
1956	if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
1957	token->type = OP_CLOSE_DUP_NUM;
1958	break;
1959	default:
1960	break;
1961	}
1962	return 2;
1963	}
1964
1965	token->type = CHARACTER;
1966	#ifdef RE_ENABLE_I18N
1967	if (input->mb_cur_max > 1)
1968	{
1969	wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input));
1970	token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
1971	}
1972	else
1973	#endif
1974	token->word_char = IS_WORD_CHAR (token->opr.c);
1975
1976	switch (c)
1977	{
1978	case '\n':
1979	if (syntax & RE_NEWLINE_ALT)
1980	token->type = OP_ALT;
1981	break;
1982	case '\|':
1983	if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR))
1984	token->type = OP_ALT;
1985	break;
1986	case '*':
1987	token->type = OP_DUP_ASTERISK;
1988	break;
1989	case '+':
1990	if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
1991	token->type = OP_DUP_PLUS;
1992	break;
1993	case '?':
1994	if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
1995	token->type = OP_DUP_QUESTION;
1996	break;
1997	case '{':
1998	if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1999	token->type = OP_OPEN_DUP_NUM;
2000	break;
2001	case '}':
2002	if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
2003	token->type = OP_CLOSE_DUP_NUM;
2004	break;
2005	case '(':
2006	if (syntax & RE_NO_BK_PARENS)
2007	token->type = OP_OPEN_SUBEXP;
2008	break;
2009	case ')':
2010	if (syntax & RE_NO_BK_PARENS)
2011	token->type = OP_CLOSE_SUBEXP;
2012	break;
2013	case '[':
2014	token->type = OP_OPEN_BRACKET;
2015	break;
2016	case '.':
2017	token->type = OP_PERIOD;
2018	break;
2019	case '^':
2020	if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS \| RE_CARET_ANCHORS_HERE)) &&
2021	re_string_cur_idx (input) != 0)
2022	{
2023	char prev = re_string_peek_byte (input, -1);
2024	if (!(syntax & RE_NEWLINE_ALT) \|\| prev != '\n')
2025	break;
2026	}
2027	token->type = ANCHOR;
2028	token->opr.ctx_type = LINE_FIRST;
2029	break;
2030	case '$':
2031	if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
2032	re_string_cur_idx (input) + 1 != re_string_length (input))
2033	{
2034	re_token_t next;
2035	re_string_skip_bytes (input, 1);
2036	peek_token (&next, input, syntax);
2037	re_string_skip_bytes (input, -1);
2038	if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)
2039	break;
2040	}
2041	token->type = ANCHOR;
2042	token->opr.ctx_type = LINE_LAST;
2043	break;
2044	default:
2045	break;
2046	}
2047	return 1;
2048	}
2049
2050	/* Peek a token from INPUT, and return the length of the token.
2051	We must not use this function out of bracket expressions. */
2052
2053	static int
2054	internal_function
2055	peek_token_bracket (re_token_t token, re_string_t input, reg_syntax_t syntax)
2056	{
2057	unsigned char c;
2058	if (re_string_eoi (input))
2059	{
2060	token->type = END_OF_RE;
2061	return 0;
2062	}
2063	c = re_string_peek_byte (input, 0);
2064	token->opr.c = c;
2065
2066	#ifdef RE_ENABLE_I18N
2067	if (input->mb_cur_max > 1 &&
2068	!re_string_first_byte (input, re_string_cur_idx (input)))
2069	{
2070	token->type = CHARACTER;
2071	return 1;
2072	}
2073	#endif /* RE_ENABLE_I18N */
2074
2075	if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS)
2076	&& re_string_cur_idx (input) + 1 < re_string_length (input))
2077	{
2078	/* In this case, '\' escape a character. */
2079	unsigned char c2;
2080	re_string_skip_bytes (input, 1);
2081	c2 = re_string_peek_byte (input, 0);
2082	token->opr.c = c2;
2083	token->type = CHARACTER;
2084	return 1;
2085	}
2086	if (c == '[') /* '[' is a special char in a bracket exps. */
2087	{
2088	unsigned char c2;
2089	int token_len;
2090	if (re_string_cur_idx (input) + 1 < re_string_length (input))
2091	c2 = re_string_peek_byte (input, 1);
2092	else
2093	c2 = 0;
2094	token->opr.c = c2;
2095	token_len = 2;
2096	switch (c2)
2097	{
2098	case '.':
2099	token->type = OP_OPEN_COLL_ELEM;
2100	break;
2101	case '=':
2102	token->type = OP_OPEN_EQUIV_CLASS;
2103	break;
2104	case ':':
2105	if (syntax & RE_CHAR_CLASSES)
2106	{
2107	token->type = OP_OPEN_CHAR_CLASS;
2108	break;
2109	}
2110	/* else fall through. */
2111	default:
2112	token->type = CHARACTER;
2113	token->opr.c = c;
2114	token_len = 1;
2115	break;
2116	}
2117	return token_len;
2118	}
2119	switch (c)
2120	{
2121	case '-':
2122	token->type = OP_CHARSET_RANGE;
2123	break;
2124	case ']':
2125	token->type = OP_CLOSE_BRACKET;
2126	break;
2127	case '^':
2128	token->type = OP_NON_MATCH_LIST;
2129	break;
2130	default:
2131	token->type = CHARACTER;
2132	}
2133	return 1;
2134	}
2135
2136	/* Functions for parser. */
2137
2138	/* Entry point of the parser.
2139	Parse the regular expression REGEXP and return the structure tree.
1.3.1 by Colin Watson Import upstream version 3.1	2140	If an error occurs, ERR is set by error code, and return NULL.
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2141	This function build the following tree, from regular expression <reg_exp>:
	2142	CAT
	2143	/ \
	2144	/ \
	2145	<reg_exp> EOR
	2146
	2147	CAT means concatenation.
	2148	EOR means end of regular expression. */
	2149
	2150	static bin_tree_t *
	2151	parse (re_string_t regexp, regex_t preg, reg_syntax_t syntax,
	2152	reg_errcode_t *err)
	2153	{
1.3.2 by Colin Watson Import upstream version 3.2	2154	re_dfa_t *dfa = preg->buffer;
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2155	bin_tree_t tree, eor, *root;
	2156	re_token_t current_token;
	2157	dfa->syntax = syntax;
	2158	fetch_token (&current_token, regexp, syntax \| RE_CARET_ANCHORS_HERE);
	2159	tree = parse_reg_exp (regexp, preg, &current_token, syntax, 0, err);
	2160	if (BE (*err != REG_NOERROR && tree == NULL, 0))
	2161	return NULL;
	2162	eor = create_tree (dfa, NULL, NULL, END_OF_RE);
	2163	if (tree != NULL)
	2164	root = create_tree (dfa, tree, eor, CONCAT);
	2165	else
	2166	root = eor;
	2167	if (BE (eor == NULL \|\| root == NULL, 0))
	2168	{
	2169	*err = REG_ESPACE;
	2170	return NULL;
	2171	}
	2172	return root;
	2173	}
	2174
	2175	/* This function build the following tree, from regular expression
	2176	<branch1>\|<branch2>:
	2177	ALT
	2178	/ \
	2179	/ \
	2180	<branch1> <branch2>
	2181
1.3.1 by Colin Watson Import upstream version 3.1	2182	ALT means alternative, which represents the operator '\|'. */
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2183
	2184	static bin_tree_t *
	2185	parse_reg_exp (re_string_t regexp, regex_t preg, re_token_t *token,
	2186	reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
	2187	{
1.3.2 by Colin Watson Import upstream version 3.2	2188	re_dfa_t *dfa = preg->buffer;
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2189	bin_tree_t tree, branch = NULL;
	2190	tree = parse_branch (regexp, preg, token, syntax, nest, err);
	2191	if (BE (*err != REG_NOERROR && tree == NULL, 0))
	2192	return NULL;
	2193
	2194	while (token->type == OP_ALT)
	2195	{
	2196	fetch_token (token, regexp, syntax \| RE_CARET_ANCHORS_HERE);
	2197	if (token->type != OP_ALT && token->type != END_OF_RE
	2198	&& (nest == 0 \|\| token->type != OP_CLOSE_SUBEXP))
	2199	{
	2200	branch = parse_branch (regexp, preg, token, syntax, nest, err);
	2201	if (BE (*err != REG_NOERROR && branch == NULL, 0))
	2202	return NULL;
	2203	}
	2204	else
	2205	branch = NULL;
	2206	tree = create_tree (dfa, tree, branch, OP_ALT);
	2207	if (BE (tree == NULL, 0))
	2208	{
	2209	*err = REG_ESPACE;
	2210	return NULL;
	2211	}
	2212	}
	2213	return tree;
	2214	}
	2215
	2216	/* This function build the following tree, from regular expression
	2217	<exp1><exp2>:
	2218	CAT
	2219	/ \
	2220	/ \
	2221	<exp1> <exp2>
	2222
	2223	CAT means concatenation. */
	2224
	2225	static bin_tree_t *
	2226	parse_branch (re_string_t regexp, regex_t preg, re_token_t *token,
	2227	reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
	2228	{
	2229	bin_tree_t tree, expr;
1.3.2 by Colin Watson Import upstream version 3.2	2230	re_dfa_t *dfa = preg->buffer;
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2231	tree = parse_expression (regexp, preg, token, syntax, nest, err);
	2232	if (BE (*err != REG_NOERROR && tree == NULL, 0))
	2233	return NULL;
	2234
	2235	while (token->type != OP_ALT && token->type != END_OF_RE
	2236	&& (nest == 0 \|\| token->type != OP_CLOSE_SUBEXP))
	2237	{
	2238	expr = parse_expression (regexp, preg, token, syntax, nest, err);
	2239	if (BE (*err != REG_NOERROR && expr == NULL, 0))
	2240	{
1.3.1 by Colin Watson Import upstream version 3.1	2241	if (tree != NULL)
1.3.1 by Colin Watson Import upstream version 3.1	2242	postorder (tree, free_tree, NULL);
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2243	return NULL;
	2244	}
	2245	if (tree != NULL && expr != NULL)
	2246	{
1.3.1 by Colin Watson Import upstream version 3.1	2247	bin_tree_t *newtree = create_tree (dfa, tree, expr, CONCAT);
1.3.1 by Colin Watson Import upstream version 3.1	2248	if (newtree == NULL)
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2249	{
1.3.1 by Colin Watson Import upstream version 3.1	2250	postorder (expr, free_tree, NULL);
1.3.1 by Colin Watson Import upstream version 3.1	2251	postorder (tree, free_tree, NULL);
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2252	*err = REG_ESPACE;
	2253	return NULL;
	2254	}
1.3.1 by Colin Watson Import upstream version 3.1	2255	tree = newtree;
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2256	}
	2257	else if (tree == NULL)
	2258	tree = expr;
	2259	/* Otherwise expr == NULL, we don't need to create new tree. */
	2260	}
	2261	return tree;
	2262	}
	2263
	2264	/* This function build the following tree, from regular expression a*:
	2265	*
	2266	\|
	2267	a
	2268	*/
	2269
	2270	static bin_tree_t *
	2271	parse_expression (re_string_t regexp, regex_t preg, re_token_t *token,
	2272	reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
	2273	{
1.3.2 by Colin Watson Import upstream version 3.2	2274	re_dfa_t *dfa = preg->buffer;
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2275	bin_tree_t *tree;
	2276	switch (token->type)
	2277	{
	2278	case CHARACTER:
	2279	tree = create_token_tree (dfa, NULL, NULL, token);
	2280	if (BE (tree == NULL, 0))
	2281	{
	2282	*err = REG_ESPACE;
	2283	return NULL;
	2284	}
	2285	#ifdef RE_ENABLE_I18N
	2286	if (dfa->mb_cur_max > 1)
	2287	{
	2288	while (!re_string_eoi (regexp)
	2289	&& !re_string_first_byte (regexp, re_string_cur_idx (regexp)))
	2290	{
	2291	bin_tree_t *mbc_remain;
	2292	fetch_token (token, regexp, syntax);
	2293	mbc_remain = create_token_tree (dfa, NULL, NULL, token);
	2294	tree = create_tree (dfa, tree, mbc_remain, CONCAT);
	2295	if (BE (mbc_remain == NULL \|\| tree == NULL, 0))
	2296	{
	2297	*err = REG_ESPACE;
	2298	return NULL;
	2299	}
	2300	}
	2301	}
	2302	#endif
	2303	break;
	2304	case OP_OPEN_SUBEXP:
	2305	tree = parse_sub_exp (regexp, preg, token, syntax, nest + 1, err);
	2306	if (BE (*err != REG_NOERROR && tree == NULL, 0))
	2307	return NULL;
	2308	break;
	2309	case OP_OPEN_BRACKET:
	2310	tree = parse_bracket_exp (regexp, dfa, token, syntax, err);
	2311	if (BE (*err != REG_NOERROR && tree == NULL, 0))
	2312	return NULL;
	2313	break;
	2314	case OP_BACK_REF:
	2315	if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1))
	2316	{
	2317	*err = REG_ESUBREG;
	2318	return NULL;
	2319	}
	2320	dfa->used_bkref_map \|= 1 << token->opr.idx;
	2321	tree = create_token_tree (dfa, NULL, NULL, token);
	2322	if (BE (tree == NULL, 0))
	2323	{
	2324	*err = REG_ESPACE;
	2325	return NULL;
	2326	}
	2327	++dfa->nbackref;
	2328	dfa->has_mb_node = 1;
	2329	break;
	2330	case OP_OPEN_DUP_NUM:
	2331	if (syntax & RE_CONTEXT_INVALID_DUP)
	2332	{
	2333	*err = REG_BADRPT;
	2334	return NULL;
	2335	}
	2336	/* FALLTHROUGH */
	2337	case OP_DUP_ASTERISK:
	2338	case OP_DUP_PLUS:
2339	case OP_DUP_QUESTION:
2340	if (syntax & RE_CONTEXT_INVALID_OPS)
2341	{
2342	*err = REG_BADRPT;
2343	return NULL;
2344	}
2345	else if (syntax & RE_CONTEXT_INDEP_OPS)
2346	{
2347	fetch_token (token, regexp, syntax);
2348	return parse_expression (regexp, preg, token, syntax, nest, err);
2349	}
2350	/* else fall through */
2351	case OP_CLOSE_SUBEXP:
2352	if ((token->type == OP_CLOSE_SUBEXP) &&
2353	!(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))
2354	{
2355	*err = REG_ERPAREN;
2356	return NULL;
2357	}
2358	/* else fall through */
2359	case OP_CLOSE_DUP_NUM:
2360	/* We treat it as a normal character. */
2361
2362	/* Then we can these characters as normal characters. */
2363	token->type = CHARACTER;
2364	/* mb_partial and word_char bits should be initialized already
2365	by peek_token. */
2366	tree = create_token_tree (dfa, NULL, NULL, token);
2367	if (BE (tree == NULL, 0))
2368	{
2369	*err = REG_ESPACE;
2370	return NULL;
2371	}
2372	break;
2373	case ANCHOR:
2374	if ((token->opr.ctx_type
2375	& (WORD_DELIM \| NOT_WORD_DELIM \| WORD_FIRST \| WORD_LAST))
2376	&& dfa->word_ops_used == 0)
2377	init_word_char (dfa);
2378	if (token->opr.ctx_type == WORD_DELIM
1.2.3 by Colin Watson Import upstream version 2.2	2379	\|\| token->opr.ctx_type == NOT_WORD_DELIM)
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2380	{
	2381	bin_tree_t tree_first, tree_last;
	2382	if (token->opr.ctx_type == WORD_DELIM)
	2383	{
	2384	token->opr.ctx_type = WORD_FIRST;
	2385	tree_first = create_token_tree (dfa, NULL, NULL, token);
	2386	token->opr.ctx_type = WORD_LAST;
1.2.3 by Colin Watson Import upstream version 2.2	2387	}
	2388	else
	2389	{
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2390	token->opr.ctx_type = INSIDE_WORD;
	2391	tree_first = create_token_tree (dfa, NULL, NULL, token);
	2392	token->opr.ctx_type = INSIDE_NOTWORD;
1.2.3 by Colin Watson Import upstream version 2.2	2393	}
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2394	tree_last = create_token_tree (dfa, NULL, NULL, token);
	2395	tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
	2396	if (BE (tree_first == NULL \|\| tree_last == NULL \|\| tree == NULL, 0))
	2397	{
	2398	*err = REG_ESPACE;
	2399	return NULL;
	2400	}
	2401	}
	2402	else
	2403	{
	2404	tree = create_token_tree (dfa, NULL, NULL, token);
	2405	if (BE (tree == NULL, 0))
	2406	{
	2407	*err = REG_ESPACE;
	2408	return NULL;
	2409	}
	2410	}
	2411	/* We must return here, since ANCHORs can't be followed
	2412	by repetition operators.
	2413	eg. RE"^" is invalid or "<ANCHOR(^)><CHAR()>",
	2414	it must not be "<ANCHOR(^)><REPEAT()>". /
	2415	fetch_token (token, regexp, syntax);
	2416	return tree;
	2417	case OP_PERIOD:
	2418	tree = create_token_tree (dfa, NULL, NULL, token);
	2419	if (BE (tree == NULL, 0))
	2420	{
	2421	*err = REG_ESPACE;
	2422	return NULL;
	2423	}
	2424	if (dfa->mb_cur_max > 1)
	2425	dfa->has_mb_node = 1;
	2426	break;
	2427	case OP_WORD:
	2428	case OP_NOTWORD:
	2429	tree = build_charclass_op (dfa, regexp->trans,
1.3.2 by Colin Watson Import upstream version 3.2	2430	"alnum",
1.3.2 by Colin Watson Import upstream version 3.2	2431	"_",
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2432	token->type == OP_NOTWORD, err);
	2433	if (BE (*err != REG_NOERROR && tree == NULL, 0))
	2434	return NULL;
	2435	break;
	2436	case OP_SPACE:
	2437	case OP_NOTSPACE:
	2438	tree = build_charclass_op (dfa, regexp->trans,
1.3.2 by Colin Watson Import upstream version 3.2	2439	"space",
1.3.2 by Colin Watson Import upstream version 3.2	2440	"",
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2441	token->type == OP_NOTSPACE, err);
	2442	if (BE (*err != REG_NOERROR && tree == NULL, 0))
	2443	return NULL;
	2444	break;
	2445	case OP_ALT:
	2446	case END_OF_RE:
	2447	return NULL;
	2448	case BACK_SLASH:
	2449	*err = REG_EESCAPE;
	2450	return NULL;
	2451	default:
	2452	/* Must not happen? */
	2453	#ifdef DEBUG
	2454	assert (0);
	2455	#endif
	2456	return NULL;
	2457	}
	2458	fetch_token (token, regexp, syntax);
	2459
	2460	while (token->type == OP_DUP_ASTERISK \|\| token->type == OP_DUP_PLUS
	2461	\|\| token->type == OP_DUP_QUESTION \|\| token->type == OP_OPEN_DUP_NUM)
	2462	{
	2463	tree = parse_dup_op (tree, regexp, dfa, token, syntax, err);
	2464	if (BE (*err != REG_NOERROR && tree == NULL, 0))
	2465	return NULL;
	2466	/* In BRE consecutive duplications are not allowed. */
	2467	if ((syntax & RE_CONTEXT_INVALID_DUP)
	2468	&& (token->type == OP_DUP_ASTERISK
	2469	\|\| token->type == OP_OPEN_DUP_NUM))
	2470	{
	2471	*err = REG_BADRPT;
	2472	return NULL;
	2473	}
	2474	}
	2475
	2476	return tree;
	2477	}
	2478
	2479	/* This function build the following tree, from regular expression
	2480	(<reg_exp>):
	2481	SUBEXP
	2482	\|
	2483	<reg_exp>
	2484	*/
	2485
	2486	static bin_tree_t *
	2487	parse_sub_exp (re_string_t regexp, regex_t preg, re_token_t *token,
	2488	reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
	2489	{
1.3.2 by Colin Watson Import upstream version 3.2	2490	re_dfa_t *dfa = preg->buffer;
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2491	bin_tree_t *tree;
	2492	size_t cur_nsub;
	2493	cur_nsub = preg->re_nsub++;
	2494
	2495	fetch_token (token, regexp, syntax \| RE_CARET_ANCHORS_HERE);
	2496
	2497	/* The subexpression may be a null string. */
	2498	if (token->type == OP_CLOSE_SUBEXP)
	2499	tree = NULL;
	2500	else
	2501	{
	2502	tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
	2503	if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0))
1.3.1 by Colin Watson Import upstream version 3.1	2504	{
	2505	if (tree != NULL)
	2506	postorder (tree, free_tree, NULL);
	2507	*err = REG_EPAREN;
	2508	}
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2509	if (BE (*err != REG_NOERROR, 0))
	2510	return NULL;
	2511	}
	2512
	2513	if (cur_nsub <= '9' - '1')
	2514	dfa->completed_bkref_map \|= 1 << cur_nsub;
	2515
	2516	tree = create_tree (dfa, tree, NULL, SUBEXP);
	2517	if (BE (tree == NULL, 0))
	2518	{
	2519	*err = REG_ESPACE;
	2520	return NULL;
	2521	}
	2522	tree->token.opr.idx = cur_nsub;
	2523	return tree;
	2524	}
	2525
	2526	/* This function parse repetition operators like "", "+", "{1,3}" etc. /
	2527
	2528	static bin_tree_t *
	2529	parse_dup_op (bin_tree_t elem, re_string_t regexp, re_dfa_t *dfa,
	2530	re_token_t token, reg_syntax_t syntax, reg_errcode_t err)
	2531	{
	2532	bin_tree_t tree = NULL, old_tree = NULL;
	2533	Idx i, start, end, start_idx = re_string_cur_idx (regexp);
	2534	re_token_t start_token = *token;
	2535
	2536	if (token->type == OP_OPEN_DUP_NUM)
	2537	{
	2538	end = 0;
	2539	start = fetch_number (regexp, token, syntax);
	2540	if (start == REG_MISSING)
	2541	{
	2542	if (token->type == CHARACTER && token->opr.c == ',')
	2543	start = 0; /* We treat "{,m}" as "{0,m}". */
	2544	else
	2545	{
	2546	err = REG_BADBR; / <re>{} is invalid. */
	2547	return NULL;
	2548	}
	2549	}
	2550	if (BE (start != REG_ERROR, 1))
	2551	{
	2552	/* We treat "{n}" as "{n,n}". */
	2553	end = ((token->type == OP_CLOSE_DUP_NUM) ? start
	2554	: ((token->type == CHARACTER && token->opr.c == ',')
	2555	? fetch_number (regexp, token, syntax) : REG_ERROR));
	2556	}
	2557	if (BE (start == REG_ERROR \|\| end == REG_ERROR, 0))
	2558	{
	2559	/* Invalid sequence. */
	2560	if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))
	2561	{
	2562	if (token->type == END_OF_RE)
	2563	*err = REG_EBRACE;
	2564	else
	2565	*err = REG_BADBR;
	2566
	2567	return NULL;
	2568	}
	2569
	2570	/* If the syntax bit is set, rollback. */
	2571	re_string_set_index (regexp, start_idx);
	2572	*token = start_token;
2573	token->type = CHARACTER;
2574	/* mb_partial and word_char bits should be already initialized by
2575	peek_token. */
2576	return elem;
2577	}
2578
1.2.3 by Colin Watson Import upstream version 2.2	2579	if (BE ((end != REG_MISSING && start > end)
1.2.3 by Colin Watson Import upstream version 2.2	2580	\|\| token->type != OP_CLOSE_DUP_NUM, 0))
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2581	{
	2582	/* First number greater than second. */
	2583	*err = REG_BADBR;
	2584	return NULL;
	2585	}
1.3.2 by Colin Watson Import upstream version 3.2	2586
	2587	if (BE (RE_DUP_MAX < (end == REG_MISSING ? start : end), 0))
	2588	{
	2589	*err = REG_ESIZE;
	2590	return NULL;
	2591	}
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2592	}
	2593	else
	2594	{
	2595	start = (token->type == OP_DUP_PLUS) ? 1 : 0;
	2596	end = (token->type == OP_DUP_QUESTION) ? 1 : REG_MISSING;
	2597	}
	2598
	2599	fetch_token (token, regexp, syntax);
	2600
	2601	if (BE (elem == NULL, 0))
	2602	return NULL;
	2603	if (BE (start == 0 && end == 0, 0))
	2604	{
	2605	postorder (elem, free_tree, NULL);
	2606	return NULL;
	2607	}
	2608
	2609	/* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */
	2610	if (BE (start > 0, 0))
	2611	{
	2612	tree = elem;
	2613	for (i = 2; i <= start; ++i)
	2614	{
	2615	elem = duplicate_tree (elem, dfa);
	2616	tree = create_tree (dfa, tree, elem, CONCAT);
	2617	if (BE (elem == NULL \|\| tree == NULL, 0))
	2618	goto parse_dup_op_espace;
	2619	}
	2620
	2621	if (start == end)
	2622	return tree;
	2623
	2624	/* Duplicate ELEM before it is marked optional. */
	2625	elem = duplicate_tree (elem, dfa);
	2626	old_tree = tree;
	2627	}
	2628	else
	2629	old_tree = NULL;
	2630
	2631	if (elem->token.type == SUBEXP)
1.3.2 by Colin Watson Import upstream version 3.2	2632	{
	2633	uintptr_t subidx = elem->token.opr.idx;
	2634	postorder (elem, mark_opt_subexp, (void *) subidx);
	2635	}
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2636
	2637	tree = create_tree (dfa, elem, NULL,
	2638	(end == REG_MISSING ? OP_DUP_ASTERISK : OP_ALT));
	2639	if (BE (tree == NULL, 0))
	2640	goto parse_dup_op_espace;
	2641
1.2.3 by Colin Watson Import upstream version 2.2	2642	/* From gnulib's "intprops.h":
	2643	True if the arithmetic type T is signed. */
	2644	#define TYPE_SIGNED(t) (! ((t) 0 < (t) -1))
	2645
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2646	/* This loop is actually executed only when end != REG_MISSING,
	2647	to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have
	2648	already created the start+1-th copy. */
1.2.3 by Colin Watson Import upstream version 2.2	2649	if (TYPE_SIGNED (Idx) \|\| end != REG_MISSING)
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2650	for (i = start + 2; i <= end; ++i)
	2651	{
	2652	elem = duplicate_tree (elem, dfa);
	2653	tree = create_tree (dfa, tree, elem, CONCAT);
	2654	if (BE (elem == NULL \|\| tree == NULL, 0))
	2655	goto parse_dup_op_espace;
	2656
	2657	tree = create_tree (dfa, tree, NULL, OP_ALT);
	2658	if (BE (tree == NULL, 0))
	2659	goto parse_dup_op_espace;
	2660	}
	2661
	2662	if (old_tree)
	2663	tree = create_tree (dfa, old_tree, tree, CONCAT);
	2664
	2665	return tree;
	2666
	2667	parse_dup_op_espace:
	2668	*err = REG_ESPACE;
	2669	return NULL;
	2670	}
	2671
	2672	/* Size of the names for collating symbol/equivalence_class/character_class.
	2673	I'm not sure, but maybe enough. */
	2674	#define BRACKET_NAME_BUF_SIZE 32
	2675
	2676	#ifndef _LIBC
	2677	/* Local function for parse_bracket_exp only used in case of NOT _LIBC.
	2678	Build the range expression which starts from START_ELEM, and ends
	2679	at END_ELEM. The result are written to MBCSET and SBCSET.
	2680	RANGE_ALLOC is the allocated size of mbcset->range_starts, and
1.3.1 by Colin Watson Import upstream version 3.1	2681	mbcset->range_ends, is a pointer argument since we may
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2682	update it. */
	2683
	2684	static reg_errcode_t
	2685	internal_function
	2686	# ifdef RE_ENABLE_I18N
1.2.4 by Otavio Salvador Import upstream version 2.3	2687	build_range_exp (const reg_syntax_t syntax,
	2688	bitset_t sbcset,
	2689	re_charset_t *mbcset,
	2690	Idx *range_alloc,
	2691	const bracket_elem_t *start_elem,
	2692	const bracket_elem_t *end_elem)
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2693	# else /* not RE_ENABLE_I18N */
1.2.4 by Otavio Salvador Import upstream version 2.3	2694	build_range_exp (const reg_syntax_t syntax,
	2695	bitset_t sbcset,
	2696	const bracket_elem_t *start_elem,
	2697	const bracket_elem_t *end_elem)
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2698	# endif /* not RE_ENABLE_I18N */
	2699	{
	2700	unsigned int start_ch, end_ch;
	2701	/* Equivalence Classes and Character Classes can't be a range start/end. */
	2702	if (BE (start_elem->type == EQUIV_CLASS \|\| start_elem->type == CHAR_CLASS
	2703	\|\| end_elem->type == EQUIV_CLASS \|\| end_elem->type == CHAR_CLASS,
	2704	0))
	2705	return REG_ERANGE;
	2706
	2707	/* We can handle no multi character collating elements without libc
	2708	support. */
	2709	if (BE ((start_elem->type == COLL_SYM
	2710	&& strlen ((char *) start_elem->opr.name) > 1)
	2711	\|\| (end_elem->type == COLL_SYM
	2712	&& strlen ((char *) end_elem->opr.name) > 1), 0))
	2713	return REG_ECOLLATE;
	2714
	2715	# ifdef RE_ENABLE_I18N
	2716	{
	2717	wchar_t wc;
	2718	wint_t start_wc;
	2719	wint_t end_wc;
	2720
	2721	start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
	2722	: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
	2723	: 0));
	2724	end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
	2725	: ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
	2726	: 0));
	2727	start_wc = ((start_elem->type == SB_CHAR \|\| start_elem->type == COLL_SYM)
	2728	? __btowc (start_ch) : start_elem->opr.wch);
	2729	end_wc = ((end_elem->type == SB_CHAR \|\| end_elem->type == COLL_SYM)
	2730	? __btowc (end_ch) : end_elem->opr.wch);
	2731	if (start_wc == WEOF \|\| end_wc == WEOF)
	2732	return REG_ECOLLATE;
1.3.2 by Colin Watson Import upstream version 3.2	2733	else if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_wc > end_wc, 0))
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2734	return REG_ERANGE;
	2735
	2736	/* Got valid collation sequence values, add them as a new entry.
	2737	However, for !_LIBC we have no collation elements: if the
	2738	character set is single byte, the single byte character set
	2739	that we build below suffices. parse_bracket_exp passes
	2740	no MBCSET if dfa->mb_cur_max == 1. */
	2741	if (mbcset)
	2742	{
1.2.3 by Colin Watson Import upstream version 2.2	2743	/* Check the space of the arrays. */
	2744	if (BE (*range_alloc == mbcset->nranges, 0))
	2745	{
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2746	/* There is not enough space, need realloc. */
	2747	wchar_t new_array_start, new_array_end;
	2748	Idx new_nranges;
	2749
	2750	/* +1 in case of mbcset->nranges is 0. */
	2751	new_nranges = 2 * mbcset->nranges + 1;
	2752	/* Use realloc since mbcset->range_starts and mbcset->range_ends
	2753	are NULL if range_alloc == 0. /
	2754	new_array_start = re_realloc (mbcset->range_starts, wchar_t,
1.2.3 by Colin Watson Import upstream version 2.2	2755	new_nranges);
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2756	new_array_end = re_realloc (mbcset->range_ends, wchar_t,
1.2.3 by Colin Watson Import upstream version 2.2	2757	new_nranges);
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2758
	2759	if (BE (new_array_start == NULL \|\| new_array_end == NULL, 0))
	2760	return REG_ESPACE;
	2761
	2762	mbcset->range_starts = new_array_start;
	2763	mbcset->range_ends = new_array_end;
	2764	*range_alloc = new_nranges;
1.2.3 by Colin Watson Import upstream version 2.2	2765	}
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2766
1.2.3 by Colin Watson Import upstream version 2.2	2767	mbcset->range_starts[mbcset->nranges] = start_wc;
1.2.3 by Colin Watson Import upstream version 2.2	2768	mbcset->range_ends[mbcset->nranges++] = end_wc;
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2769	}
	2770
	2771	/* Build the table for single byte characters. */
	2772	for (wc = 0; wc < SBC_MAX; ++wc)
	2773	{
1.3.2 by Colin Watson Import upstream version 3.2	2774	if (start_wc <= wc && wc <= end_wc)
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2775	bitset_set (sbcset, wc);
	2776	}
	2777	}
	2778	# else /* not RE_ENABLE_I18N */
	2779	{
	2780	unsigned int ch;
	2781	start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
	2782	: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]
	2783	: 0));
	2784	end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
	2785	: ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]
	2786	: 0));
	2787	if (start_ch > end_ch)
	2788	return REG_ERANGE;
	2789	/* Build the table for single byte characters. */
	2790	for (ch = 0; ch < SBC_MAX; ++ch)
	2791	if (start_ch <= ch && ch <= end_ch)
	2792	bitset_set (sbcset, ch);
	2793	}
	2794	# endif /* not RE_ENABLE_I18N */
	2795	return REG_NOERROR;
	2796	}
	2797	#endif /* not _LIBC */
	2798
	2799	#ifndef _LIBC
	2800	/* Helper function for parse_bracket_exp only used in case of NOT _LIBC..
	2801	Build the collating element which is represented by NAME.
	2802	The result are written to MBCSET and SBCSET.
	2803	COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
	2804	pointer argument since we may update it. */
	2805
	2806	static reg_errcode_t
	2807	internal_function
	2808	# ifdef RE_ENABLE_I18N
1.3.1 by Colin Watson Import upstream version 3.1	2809	build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
	2810	Idx coll_sym_alloc, const unsigned char name)
	2811	# else /* not RE_ENABLE_I18N */
	2812	build_collating_symbol (bitset_t sbcset, const unsigned char *name)
	2813	# endif /* not RE_ENABLE_I18N */
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2814	{
	2815	size_t name_len = strlen ((const char *) name);
	2816	if (BE (name_len != 1, 0))
	2817	return REG_ECOLLATE;
	2818	else
	2819	{
	2820	bitset_set (sbcset, name[0]);
	2821	return REG_NOERROR;
	2822	}
	2823	}
	2824	#endif /* not _LIBC */
	2825
	2826	/* This function parse bracket expression like "[abc]", "[a-c]",
	2827	"[[.a-a.]]" etc. */
	2828
	2829	static bin_tree_t *
	2830	parse_bracket_exp (re_string_t regexp, re_dfa_t dfa, re_token_t *token,
	2831	reg_syntax_t syntax, reg_errcode_t *err)
	2832	{
	2833	#ifdef _LIBC
	2834	const unsigned char *collseqmb;
	2835	const char *collseqwc;
	2836	uint32_t nrules;
	2837	int32_t table_size;
	2838	const int32_t *symb_table;
	2839	const unsigned char *extra;
	2840
1.3.1 by Colin Watson Import upstream version 3.1	2841	/* Local function for parse_bracket_exp used in _LIBC environment.
1.3.1 by Colin Watson Import upstream version 3.1	2842	Seek the collating symbol entry corresponding to NAME.
1.3.2 by Colin Watson Import upstream version 3.2	2843	Return the index of the symbol in the SYMB_TABLE,
1.3.2 by Colin Watson Import upstream version 3.2	2844	or -1 if not found. */
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2845
	2846	auto inline int32_t
1.3.2 by Colin Watson Import upstream version 3.2	2847	__attribute__ ((always_inline))
1.3.2 by Colin Watson Import upstream version 3.2	2848	seek_collating_symbol_entry (const unsigned char *name, size_t name_len)
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2849	{
1.3.2 by Colin Watson Import upstream version 3.2	2850	int32_t elem;
	2851
	2852	for (elem = 0; elem < table_size; elem++)
	2853	if (symb_table[2 * elem] != 0)
	2854	{
	2855	int32_t idx = symb_table[2 * elem + 1];
	2856	/* Skip the name of collating element name. */
	2857	idx += 1 + extra[idx];
	2858	if (/* Compare the length of the name. */
	2859	name_len == extra[idx]
	2860	/* Compare the name. */
	2861	&& memcmp (name, &extra[idx + 1], name_len) == 0)
	2862	/* Yep, this is the entry. */
	2863	return elem;
	2864	}
	2865	return -1;
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2866	}
	2867
1.2.3 by Colin Watson Import upstream version 2.2	2868	/* Local function for parse_bracket_exp used in _LIBC environment.
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2869	Look up the collation sequence value of BR_ELEM.
	2870	Return the value if succeeded, UINT_MAX otherwise. */
	2871
	2872	auto inline unsigned int
1.3.2 by Colin Watson Import upstream version 3.2	2873	__attribute__ ((always_inline))
1.3.2 by Colin Watson Import upstream version 3.2	2874	lookup_collation_sequence_value (bracket_elem_t *br_elem)
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2875	{
	2876	if (br_elem->type == SB_CHAR)
	2877	{
	2878	/*
	2879	if (MB_CUR_MAX == 1)
	2880	*/
	2881	if (nrules == 0)
	2882	return collseqmb[br_elem->opr.ch];
	2883	else
	2884	{
	2885	wint_t wc = __btowc (br_elem->opr.ch);
	2886	return __collseq_table_lookup (collseqwc, wc);
	2887	}
	2888	}
	2889	else if (br_elem->type == MB_CHAR)
	2890	{
1.2.3 by Colin Watson Import upstream version 2.2	2891	if (nrules != 0)
1.2.3 by Colin Watson Import upstream version 2.2	2892	return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2893	}
	2894	else if (br_elem->type == COLL_SYM)
	2895	{
	2896	size_t sym_name_len = strlen ((char *) br_elem->opr.name);
	2897	if (nrules != 0)
	2898	{
	2899	int32_t elem, idx;
	2900	elem = seek_collating_symbol_entry (br_elem->opr.name,
	2901	sym_name_len);
1.3.2 by Colin Watson Import upstream version 3.2	2902	if (elem != -1)
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2903	{
	2904	/* We found the entry. */
	2905	idx = symb_table[2 * elem + 1];
	2906	/* Skip the name of collating element name. */
	2907	idx += 1 + extra[idx];
	2908	/* Skip the byte sequence of the collating element. */
	2909	idx += 1 + extra[idx];
	2910	/* Adjust for the alignment. */
	2911	idx = (idx + 3) & ~3;
	2912	/* Skip the multibyte collation sequence value. */
	2913	idx += sizeof (unsigned int);
	2914	/* Skip the wide char sequence of the collating element. */
	2915	idx += sizeof (unsigned int) *
	2916	(1 + (unsigned int ) (extra + idx));
	2917	/* Return the collation sequence value. */
	2918	return (unsigned int ) (extra + idx);
	2919	}
1.3.2 by Colin Watson Import upstream version 3.2	2920	else if (sym_name_len == 1)
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2921	{
	2922	/* No valid character. Match it as a single byte
	2923	character. */
	2924	return collseqmb[br_elem->opr.name[0]];
	2925	}
	2926	}
	2927	else if (sym_name_len == 1)
	2928	return collseqmb[br_elem->opr.name[0]];
	2929	}
	2930	return UINT_MAX;
	2931	}
	2932
1.3.1 by Colin Watson Import upstream version 3.1	2933	/* Local function for parse_bracket_exp used in _LIBC environment.
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2934	Build the range expression which starts from START_ELEM, and ends
	2935	at END_ELEM. The result are written to MBCSET and SBCSET.
	2936	RANGE_ALLOC is the allocated size of mbcset->range_starts, and
1.3.1 by Colin Watson Import upstream version 3.1	2937	mbcset->range_ends, is a pointer argument since we may
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2938	update it. */
	2939
	2940	auto inline reg_errcode_t
1.3.2 by Colin Watson Import upstream version 3.2	2941	__attribute__ ((always_inline))
	2942	build_range_exp (bitset_t sbcset, re_charset_t mbcset, int range_alloc,
	2943	bracket_elem_t start_elem, bracket_elem_t end_elem)
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2944	{
	2945	unsigned int ch;
	2946	uint32_t start_collseq;
	2947	uint32_t end_collseq;
	2948
	2949	/* Equivalence Classes and Character Classes can't be a range
	2950	start/end. */
	2951	if (BE (start_elem->type == EQUIV_CLASS \|\| start_elem->type == CHAR_CLASS
	2952	\|\| end_elem->type == EQUIV_CLASS \|\| end_elem->type == CHAR_CLASS,
	2953	0))
	2954	return REG_ERANGE;
	2955
1.3.2 by Colin Watson Import upstream version 3.2	2956	/* FIXME: Implement rational ranges here, too. */
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2957	start_collseq = lookup_collation_sequence_value (start_elem);
	2958	end_collseq = lookup_collation_sequence_value (end_elem);
	2959	/* Check start/end collation sequence values. */
	2960	if (BE (start_collseq == UINT_MAX \|\| end_collseq == UINT_MAX, 0))
	2961	return REG_ECOLLATE;
	2962	if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0))
	2963	return REG_ERANGE;
	2964
	2965	/* Got valid collation sequence values, add them as a new entry.
	2966	However, if we have no collation elements, and the character set
	2967	is single byte, the single byte character set that we
	2968	build below suffices. */
	2969	if (nrules > 0 \|\| dfa->mb_cur_max > 1)
	2970	{
1.2.3 by Colin Watson Import upstream version 2.2	2971	/* Check the space of the arrays. */
1.2.3 by Colin Watson Import upstream version 2.2	2972	if (BE (*range_alloc == mbcset->nranges, 0))
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2973	{
	2974	/* There is not enough space, need realloc. */
	2975	uint32_t *new_array_start;
	2976	uint32_t *new_array_end;
	2977	Idx new_nranges;
	2978
	2979	/* +1 in case of mbcset->nranges is 0. */
	2980	new_nranges = 2 * mbcset->nranges + 1;
	2981	new_array_start = re_realloc (mbcset->range_starts, uint32_t,
	2982	new_nranges);
	2983	new_array_end = re_realloc (mbcset->range_ends, uint32_t,
1.2.3 by Colin Watson Import upstream version 2.2	2984	new_nranges);
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2985
	2986	if (BE (new_array_start == NULL \|\| new_array_end == NULL, 0))
1.2.3 by Colin Watson Import upstream version 2.2	2987	return REG_ESPACE;
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2988
	2989	mbcset->range_starts = new_array_start;
	2990	mbcset->range_ends = new_array_end;
	2991	*range_alloc = new_nranges;
	2992	}
	2993
1.2.3 by Colin Watson Import upstream version 2.2	2994	mbcset->range_starts[mbcset->nranges] = start_collseq;
1.2.3 by Colin Watson Import upstream version 2.2	2995	mbcset->range_ends[mbcset->nranges++] = end_collseq;
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	2996	}
	2997
	2998	/* Build the table for single byte characters. */
	2999	for (ch = 0; ch < SBC_MAX; ch++)
	3000	{
	3001	uint32_t ch_collseq;
	3002	/*
	3003	if (MB_CUR_MAX == 1)
	3004	*/
	3005	if (nrules == 0)
	3006	ch_collseq = collseqmb[ch];
	3007	else
	3008	ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
	3009	if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
	3010	bitset_set (sbcset, ch);
	3011	}
	3012	return REG_NOERROR;
	3013	}
	3014
1.3.1 by Colin Watson Import upstream version 3.1	3015	/* Local function for parse_bracket_exp used in _LIBC environment.
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	3016	Build the collating element which is represented by NAME.
	3017	The result are written to MBCSET and SBCSET.
	3018	COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
1.3.1 by Colin Watson Import upstream version 3.1	3019	pointer argument since we may update it. */
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	3020
	3021	auto inline reg_errcode_t
1.3.2 by Colin Watson Import upstream version 3.2	3022	__attribute__ ((always_inline))
	3023	build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
	3024	Idx coll_sym_alloc, const unsigned char name)
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	3025	{
	3026	int32_t elem, idx;
	3027	size_t name_len = strlen ((const char *) name);
	3028	if (nrules != 0)
	3029	{
	3030	elem = seek_collating_symbol_entry (name, name_len);
1.3.2 by Colin Watson Import upstream version 3.2	3031	if (elem != -1)
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	3032	{
	3033	/* We found the entry. */
	3034	idx = symb_table[2 * elem + 1];
	3035	/* Skip the name of collating element name. */
	3036	idx += 1 + extra[idx];
	3037	}
1.3.2 by Colin Watson Import upstream version 3.2	3038	else if (name_len == 1)
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	3039	{
	3040	/* No valid character, treat it as a normal
	3041	character. */
	3042	bitset_set (sbcset, name[0]);
	3043	return REG_NOERROR;
	3044	}
	3045	else
	3046	return REG_ECOLLATE;
	3047
	3048	/* Got valid collation sequence, add it as a new entry. */
	3049	/* Check the space of the arrays. */
	3050	if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0))
	3051	{
	3052	/* Not enough, realloc it. */
	3053	/* +1 in case of mbcset->ncoll_syms is 0. */
	3054	Idx new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
	3055	/* Use realloc since mbcset->coll_syms is NULL
	3056	if alloc == 0. /
	3057	int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
	3058	new_coll_sym_alloc);
	3059	if (BE (new_coll_syms == NULL, 0))
	3060	return REG_ESPACE;
	3061	mbcset->coll_syms = new_coll_syms;
	3062	*coll_sym_alloc = new_coll_sym_alloc;
	3063	}
	3064	mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
	3065	return REG_NOERROR;
	3066	}
	3067	else
	3068	{
	3069	if (BE (name_len != 1, 0))
	3070	return REG_ECOLLATE;
	3071	else
	3072	{
	3073	bitset_set (sbcset, name[0]);
	3074	return REG_NOERROR;
	3075	}
	3076	}
	3077	}
	3078	#endif
	3079
	3080	re_token_t br_token;
	3081	re_bitset_ptr_t sbcset;
	3082	#ifdef RE_ENABLE_I18N
	3083	re_charset_t *mbcset;
	3084	Idx coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;
	3085	Idx equiv_class_alloc = 0, char_class_alloc = 0;
	3086	#endif /* not RE_ENABLE_I18N */
	3087	bool non_match = false;
	3088	bin_tree_t *work_tree;
	3089	int token_len;
	3090	bool first_round = true;
	3091	#ifdef _LIBC
	3092	collseqmb = (const unsigned char *)
	3093	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
	3094	nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
	3095	if (nrules)
	3096	{
	3097	/*
	3098	if (MB_CUR_MAX > 1)
	3099	*/
	3100	collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
	3101	table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
	3102	symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,
3103	_NL_COLLATE_SYMB_TABLEMB);
3104	extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
3105	_NL_COLLATE_SYMB_EXTRAMB);
3106	}
3107	#endif
3108	sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
3109	#ifdef RE_ENABLE_I18N
3110	mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
3111	#endif /* RE_ENABLE_I18N */
3112	#ifdef RE_ENABLE_I18N
3113	if (BE (sbcset == NULL \|\| mbcset == NULL, 0))
3114	#else
3115	if (BE (sbcset == NULL, 0))
3116	#endif /* RE_ENABLE_I18N */
3117	{
1.3.1 by Colin Watson Import upstream version 3.1	3118	re_free (sbcset);
	3119	#ifdef RE_ENABLE_I18N
	3120	re_free (mbcset);
	3121	#endif
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	3122	*err = REG_ESPACE;
	3123	return NULL;
	3124	}
	3125
	3126	token_len = peek_token_bracket (token, regexp, syntax);
	3127	if (BE (token->type == END_OF_RE, 0))
	3128	{
	3129	*err = REG_BADPAT;
	3130	goto parse_bracket_exp_free_return;
	3131	}
	3132	if (token->type == OP_NON_MATCH_LIST)
	3133	{
	3134	#ifdef RE_ENABLE_I18N
	3135	mbcset->non_match = 1;
	3136	#endif /* not RE_ENABLE_I18N */
	3137	non_match = true;
	3138	if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
	3139	bitset_set (sbcset, '\n');
	3140	re_string_skip_bytes (regexp, token_len); /* Skip a token. */
	3141	token_len = peek_token_bracket (token, regexp, syntax);
	3142	if (BE (token->type == END_OF_RE, 0))
	3143	{
	3144	*err = REG_BADPAT;
	3145	goto parse_bracket_exp_free_return;
	3146	}
	3147	}
	3148
	3149	/* We treat the first ']' as a normal character. */
	3150	if (token->type == OP_CLOSE_BRACKET)
	3151	token->type = CHARACTER;
	3152
	3153	while (1)
	3154	{
	3155	bracket_elem_t start_elem, end_elem;
	3156	unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE];
	3157	unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE];
	3158	reg_errcode_t ret;
	3159	int token_len2 = 0;
	3160	bool is_range_exp = false;
	3161	re_token_t token2;
	3162
	3163	start_elem.opr.name = start_name_buf;
	3164	ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
	3165	syntax, first_round);
	3166	if (BE (ret != REG_NOERROR, 0))
	3167	{
	3168	*err = ret;
	3169	goto parse_bracket_exp_free_return;
	3170	}
	3171	first_round = false;
	3172
	3173	/* Get information about the next token. We need it in any case. */
	3174	token_len = peek_token_bracket (token, regexp, syntax);
	3175
	3176	/* Do not check for ranges if we know they are not allowed. */
	3177	if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS)
	3178	{
	3179	if (BE (token->type == END_OF_RE, 0))
	3180	{
	3181	*err = REG_EBRACK;
	3182	goto parse_bracket_exp_free_return;
	3183	}
	3184	if (token->type == OP_CHARSET_RANGE)
	3185	{
3186	re_string_skip_bytes (regexp, token_len); /* Skip '-'. */
3187	token_len2 = peek_token_bracket (&token2, regexp, syntax);
3188	if (BE (token2.type == END_OF_RE, 0))
3189	{
3190	*err = REG_EBRACK;
3191	goto parse_bracket_exp_free_return;
3192	}
3193	if (token2.type == OP_CLOSE_BRACKET)
3194	{
3195	/* We treat the last '-' as a normal character. */
3196	re_string_skip_bytes (regexp, -token_len);
3197	token->type = CHARACTER;
3198	}
3199	else
3200	is_range_exp = true;
3201	}
3202	}
3203
3204	if (is_range_exp == true)
3205	{
3206	end_elem.opr.name = end_name_buf;
3207	ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,
3208	dfa, syntax, true);
3209	if (BE (ret != REG_NOERROR, 0))
3210	{
3211	*err = ret;
3212	goto parse_bracket_exp_free_return;
3213	}
3214
3215	token_len = peek_token_bracket (token, regexp, syntax);
3216
3217	#ifdef _LIBC
3218	*err = build_range_exp (sbcset, mbcset, &range_alloc,
3219	&start_elem, &end_elem);
3220	#else
3221	# ifdef RE_ENABLE_I18N
1.2.4 by Otavio Salvador Import upstream version 2.3	3222	*err = build_range_exp (syntax, sbcset,
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	3223	dfa->mb_cur_max > 1 ? mbcset : NULL,
	3224	&range_alloc, &start_elem, &end_elem);
	3225	# else
1.2.4 by Otavio Salvador Import upstream version 2.3	3226	*err = build_range_exp (syntax, sbcset, &start_elem, &end_elem);
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	3227	# endif
	3228	#endif /* RE_ENABLE_I18N */
	3229	if (BE (*err != REG_NOERROR, 0))
	3230	goto parse_bracket_exp_free_return;
	3231	}
	3232	else
	3233	{
	3234	switch (start_elem.type)
	3235	{
	3236	case SB_CHAR:
	3237	bitset_set (sbcset, start_elem.opr.ch);
	3238	break;
	3239	#ifdef RE_ENABLE_I18N
	3240	case MB_CHAR:
	3241	/* Check whether the array has enough space. */
	3242	if (BE (mbchar_alloc == mbcset->nmbchars, 0))
	3243	{
	3244	wchar_t *new_mbchars;
	3245	/* Not enough, realloc it. */
	3246	/* +1 in case of mbcset->nmbchars is 0. */
	3247	mbchar_alloc = 2 * mbcset->nmbchars + 1;
	3248	/* Use realloc since array is NULL if alloc == 0. /
	3249	new_mbchars = re_realloc (mbcset->mbchars, wchar_t,
	3250	mbchar_alloc);
	3251	if (BE (new_mbchars == NULL, 0))
	3252	goto parse_bracket_exp_espace;
	3253	mbcset->mbchars = new_mbchars;
	3254	}
	3255	mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
	3256	break;
	3257	#endif /* RE_ENABLE_I18N */
	3258	case EQUIV_CLASS:
	3259	*err = build_equiv_class (sbcset,
	3260	#ifdef RE_ENABLE_I18N
	3261	mbcset, &equiv_class_alloc,
	3262	#endif /* RE_ENABLE_I18N */
	3263	start_elem.opr.name);
	3264	if (BE (*err != REG_NOERROR, 0))
	3265	goto parse_bracket_exp_free_return;
	3266	break;
	3267	case COLL_SYM:
	3268	*err = build_collating_symbol (sbcset,
	3269	#ifdef RE_ENABLE_I18N
	3270	mbcset, &coll_sym_alloc,
	3271	#endif /* RE_ENABLE_I18N */
	3272	start_elem.opr.name);
	3273	if (BE (*err != REG_NOERROR, 0))
	3274	goto parse_bracket_exp_free_return;
	3275	break;
	3276	case CHAR_CLASS:
	3277	*err = build_charclass (regexp->trans, sbcset,
	3278	#ifdef RE_ENABLE_I18N
	3279	mbcset, &char_class_alloc,
	3280	#endif /* RE_ENABLE_I18N */
1.3.2 by Colin Watson Import upstream version 3.2	3281	(const char *) start_elem.opr.name,
1.3.2 by Colin Watson Import upstream version 3.2	3282	syntax);
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	3283	if (BE (*err != REG_NOERROR, 0))
	3284	goto parse_bracket_exp_free_return;
	3285	break;
	3286	default:
	3287	assert (0);
	3288	break;
	3289	}
	3290	}
	3291	if (BE (token->type == END_OF_RE, 0))
	3292	{
	3293	*err = REG_EBRACK;
	3294	goto parse_bracket_exp_free_return;
	3295	}
	3296	if (token->type == OP_CLOSE_BRACKET)
	3297	break;
	3298	}
	3299
	3300	re_string_skip_bytes (regexp, token_len); /* Skip a token. */
	3301
	3302	/* If it is non-matching list. */
	3303	if (non_match)
	3304	bitset_not (sbcset);
	3305
	3306	#ifdef RE_ENABLE_I18N
	3307	/* Ensure only single byte characters are set. */
	3308	if (dfa->mb_cur_max > 1)
	3309	bitset_mask (sbcset, dfa->sb_char);
	3310
	3311	if (mbcset->nmbchars \|\| mbcset->ncoll_syms \|\| mbcset->nequiv_classes
	3312	\|\| mbcset->nranges \|\| (dfa->mb_cur_max > 1 && (mbcset->nchar_classes
	3313	\|\| mbcset->non_match)))
	3314	{
	3315	bin_tree_t *mbc_tree;
	3316	int sbc_idx;
	3317	/* Build a tree for complex bracket. */
	3318	dfa->has_mb_node = 1;
	3319	br_token.type = COMPLEX_BRACKET;
	3320	br_token.opr.mbcset = mbcset;
	3321	mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
	3322	if (BE (mbc_tree == NULL, 0))
	3323	goto parse_bracket_exp_espace;
	3324	for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx)
	3325	if (sbcset[sbc_idx])
	3326	break;
	3327	/* If there are no bits set in sbcset, there is no point
	3328	of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */
	3329	if (sbc_idx < BITSET_WORDS)
	3330	{
1.2.3 by Colin Watson Import upstream version 2.2	3331	/* Build a tree for simple bracket. */
	3332	br_token.type = SIMPLE_BRACKET;
	3333	br_token.opr.sbcset = sbcset;
	3334	work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
	3335	if (BE (work_tree == NULL, 0))
	3336	goto parse_bracket_exp_espace;
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	3337
1.2.3 by Colin Watson Import upstream version 2.2	3338	/* Then join them by ALT node. */
	3339	work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
	3340	if (BE (work_tree == NULL, 0))
	3341	goto parse_bracket_exp_espace;
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	3342	}
	3343	else
	3344	{
	3345	re_free (sbcset);
	3346	work_tree = mbc_tree;
	3347	}
	3348	}
	3349	else
	3350	#endif /* not RE_ENABLE_I18N */
	3351	{
	3352	#ifdef RE_ENABLE_I18N
	3353	free_charset (mbcset);
	3354	#endif
	3355	/* Build a tree for simple bracket. */
	3356	br_token.type = SIMPLE_BRACKET;
	3357	br_token.opr.sbcset = sbcset;
	3358	work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
	3359	if (BE (work_tree == NULL, 0))
1.2.3 by Colin Watson Import upstream version 2.2	3360	goto parse_bracket_exp_espace;
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	3361	}
	3362	return work_tree;
	3363
	3364	parse_bracket_exp_espace:
	3365	*err = REG_ESPACE;
	3366	parse_bracket_exp_free_return:
	3367	re_free (sbcset);
	3368	#ifdef RE_ENABLE_I18N
	3369	free_charset (mbcset);
	3370	#endif /* RE_ENABLE_I18N */
	3371	return NULL;
	3372	}
	3373
	3374	/* Parse an element in the bracket expression. */
	3375
	3376	static reg_errcode_t
	3377	parse_bracket_element (bracket_elem_t elem, re_string_t regexp,
	3378	re_token_t token, int token_len, re_dfa_t dfa,
	3379	reg_syntax_t syntax, bool accept_hyphen)
	3380	{
	3381	#ifdef RE_ENABLE_I18N
	3382	int cur_char_size;
	3383	cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
	3384	if (cur_char_size > 1)
	3385	{
	3386	elem->type = MB_CHAR;
	3387	elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp));
	3388	re_string_skip_bytes (regexp, cur_char_size);
	3389	return REG_NOERROR;
	3390	}
	3391	#endif /* RE_ENABLE_I18N */
	3392	re_string_skip_bytes (regexp, token_len); /* Skip a token. */
	3393	if (token->type == OP_OPEN_COLL_ELEM \|\| token->type == OP_OPEN_CHAR_CLASS
	3394	\|\| token->type == OP_OPEN_EQUIV_CLASS)
	3395	return parse_bracket_symbol (elem, regexp, token);
	3396	if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen)
	3397	{
	3398	/* A '-' must only appear as anything but a range indicator before
	3399	the closing bracket. Everything else is an error. */
	3400	re_token_t token2;
	3401	(void) peek_token_bracket (&token2, regexp, syntax);
	3402	if (token2.type != OP_CLOSE_BRACKET)
	3403	/* The actual error value is not standardized since this whole
	3404	case is undefined. But ERANGE makes good sense. */
	3405	return REG_ERANGE;
	3406	}
	3407	elem->type = SB_CHAR;
	3408	elem->opr.ch = token->opr.c;
	3409	return REG_NOERROR;
	3410	}
	3411
	3412	/* Parse a bracket symbol in the bracket expression. Bracket symbols are
	3413	such as [:<character_class>:], [.<collating_element>.], and
	3414	[=<equivalent_class>=]. */
	3415
	3416	static reg_errcode_t
	3417	parse_bracket_symbol (bracket_elem_t elem, re_string_t regexp,
	3418	re_token_t *token)
	3419	{
	3420	unsigned char ch, delim = token->opr.c;
	3421	int i = 0;
	3422	if (re_string_eoi(regexp))
	3423	return REG_EBRACK;
	3424	for (;; ++i)
3425	{
3426	if (i >= BRACKET_NAME_BUF_SIZE)
3427	return REG_EBRACK;
3428	if (token->type == OP_OPEN_CHAR_CLASS)
3429	ch = re_string_fetch_byte_case (regexp);
3430	else
3431	ch = re_string_fetch_byte (regexp);
3432	if (re_string_eoi(regexp))
3433	return REG_EBRACK;
3434	if (ch == delim && re_string_peek_byte (regexp, 0) == ']')
3435	break;
3436	elem->opr.name[i] = ch;
3437	}
3438	re_string_skip_bytes (regexp, 1);
3439	elem->opr.name[i] = '\0';
3440	switch (token->type)
3441	{
3442	case OP_OPEN_COLL_ELEM:
3443	elem->type = COLL_SYM;
3444	break;
3445	case OP_OPEN_EQUIV_CLASS:
3446	elem->type = EQUIV_CLASS;
3447	break;
3448	case OP_OPEN_CHAR_CLASS:
3449	elem->type = CHAR_CLASS;
3450	break;
3451	default:
3452	break;
3453	}
3454	return REG_NOERROR;
3455	}
3456
3457	/* Helper function for parse_bracket_exp.
3458	Build the equivalence class which is represented by NAME.
3459	The result are written to MBCSET and SBCSET.
3460	EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,
1.3.1 by Colin Watson Import upstream version 3.1	3461	is a pointer argument since we may update it. */
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	3462
	3463	static reg_errcode_t
	3464	#ifdef RE_ENABLE_I18N
	3465	build_equiv_class (bitset_t sbcset, re_charset_t *mbcset,
	3466	Idx equiv_class_alloc, const unsigned char name)
	3467	#else /* not RE_ENABLE_I18N */
	3468	build_equiv_class (bitset_t sbcset, const unsigned char *name)
	3469	#endif /* not RE_ENABLE_I18N */
	3470	{
	3471	#ifdef _LIBC
	3472	uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
	3473	if (nrules != 0)
	3474	{
	3475	const int32_t table, indirect;
	3476	const unsigned char weights, extra, *cp;
	3477	unsigned char char_buf[2];
	3478	int32_t idx1, idx2;
	3479	unsigned int ch;
	3480	size_t len;
	3481	/* This #include defines a local function! */
	3482	# include <locale/weight.h>
	3483	/* Calculate the index for equivalence class. */
	3484	cp = name;
	3485	table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
	3486	weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
	3487	_NL_COLLATE_WEIGHTMB);
	3488	extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
	3489	_NL_COLLATE_EXTRAMB);
	3490	indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
	3491	_NL_COLLATE_INDIRECTMB);
1.3.1 by Colin Watson Import upstream version 3.1	3492	idx1 = findidx (&cp, -1);
1.3.1 by Colin Watson Import upstream version 3.1	3493	if (BE (idx1 == 0 \|\| *cp != '\0', 0))
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	3494	/* This isn't a valid character. */
	3495	return REG_ECOLLATE;
	3496
1.3.1 by Colin Watson Import upstream version 3.1	3497	/* Build single byte matching table for this equivalence class. */
1.2.3 by Colin Watson Import upstream version 2.2	3498	len = weights[idx1 & 0xffffff];
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	3499	for (ch = 0; ch < SBC_MAX; ++ch)
	3500	{
	3501	char_buf[0] = ch;
	3502	cp = char_buf;
1.3.1 by Colin Watson Import upstream version 3.1	3503	idx2 = findidx (&cp, 1);
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	3504	/*
	3505	idx2 = table[ch];
	3506	*/
	3507	if (idx2 == 0)
	3508	/* This isn't a valid character. */
	3509	continue;
1.2.3 by Colin Watson Import upstream version 2.2	3510	/* Compare only if the length matches and the collation rule
	3511	index is the same. */
	3512	if (len == weights[idx2 & 0xffffff] && (idx1 >> 24) == (idx2 >> 24))
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	3513	{
	3514	int cnt = 0;
1.2.3 by Colin Watson Import upstream version 2.2	3515
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	3516	while (cnt <= len &&
1.2.3 by Colin Watson Import upstream version 2.2	3517	weights[(idx1 & 0xffffff) + 1 + cnt]
1.2.3 by Colin Watson Import upstream version 2.2	3518	== weights[(idx2 & 0xffffff) + 1 + cnt])
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	3519	++cnt;
	3520
	3521	if (cnt > len)
	3522	bitset_set (sbcset, ch);
	3523	}
	3524	}
	3525	/* Check whether the array has enough space. */
	3526	if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0))
	3527	{
	3528	/* Not enough, realloc it. */
	3529	/* +1 in case of mbcset->nequiv_classes is 0. */
	3530	Idx new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
	3531	/* Use realloc since the array is NULL if alloc == 0. /
	3532	int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes,
	3533	int32_t,
	3534	new_equiv_class_alloc);
	3535	if (BE (new_equiv_classes == NULL, 0))
	3536	return REG_ESPACE;
	3537	mbcset->equiv_classes = new_equiv_classes;
	3538	*equiv_class_alloc = new_equiv_class_alloc;
	3539	}
	3540	mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
	3541	}
	3542	else
	3543	#endif /* _LIBC */
	3544	{
	3545	if (BE (strlen ((const char *) name) != 1, 0))
	3546	return REG_ECOLLATE;
	3547	bitset_set (sbcset, *name);
	3548	}
	3549	return REG_NOERROR;
	3550	}
	3551
	3552	/* Helper function for parse_bracket_exp.
	3553	Build the character class which is represented by NAME.
	3554	The result are written to MBCSET and SBCSET.
	3555	CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,
1.3.1 by Colin Watson Import upstream version 3.1	3556	is a pointer argument since we may update it. */
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	3557
	3558	static reg_errcode_t
	3559	#ifdef RE_ENABLE_I18N
	3560	build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
	3561	re_charset_t mbcset, Idx char_class_alloc,
1.3.2 by Colin Watson Import upstream version 3.2	3562	const char *class_name, reg_syntax_t syntax)
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	3563	#else /* not RE_ENABLE_I18N */
	3564	build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
1.3.2 by Colin Watson Import upstream version 3.2	3565	const char *class_name, reg_syntax_t syntax)
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	3566	#endif /* not RE_ENABLE_I18N */
	3567	{
	3568	int i;
1.3.2 by Colin Watson Import upstream version 3.2	3569	const char *name = class_name;
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	3570
	3571	/* In case of REG_ICASE "upper" and "lower" match the both of
	3572	upper and lower cases. */
	3573	if ((syntax & RE_ICASE)
	3574	&& (strcmp (name, "upper") == 0 \|\| strcmp (name, "lower") == 0))
	3575	name = "alpha";
	3576
	3577	#ifdef RE_ENABLE_I18N
	3578	/* Check the space of the arrays. */
	3579	if (BE (*char_class_alloc == mbcset->nchar_classes, 0))
	3580	{
	3581	/* Not enough, realloc it. */
	3582	/* +1 in case of mbcset->nchar_classes is 0. */
	3583	Idx new_char_class_alloc = 2 * mbcset->nchar_classes + 1;
	3584	/* Use realloc since array is NULL if alloc == 0. /
	3585	wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t,
	3586	new_char_class_alloc);
	3587	if (BE (new_char_classes == NULL, 0))
	3588	return REG_ESPACE;
	3589	mbcset->char_classes = new_char_classes;
	3590	*char_class_alloc = new_char_class_alloc;
	3591	}
	3592	mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);
	3593	#endif /* RE_ENABLE_I18N */
	3594
	3595	#define BUILD_CHARCLASS_LOOP(ctype_func) \
	3596	do { \
	3597	if (BE (trans != NULL, 0)) \
	3598	{ \
	3599	for (i = 0; i < SBC_MAX; ++i) \
	3600	if (ctype_func (i)) \
	3601	bitset_set (sbcset, trans[i]); \
	3602	} \
	3603	else \
	3604	{ \
	3605	for (i = 0; i < SBC_MAX; ++i) \
	3606	if (ctype_func (i)) \
	3607	bitset_set (sbcset, i); \
	3608	} \
	3609	} while (0)
	3610
	3611	if (strcmp (name, "alnum") == 0)
	3612	BUILD_CHARCLASS_LOOP (isalnum);
	3613	else if (strcmp (name, "cntrl") == 0)
	3614	BUILD_CHARCLASS_LOOP (iscntrl);
	3615	else if (strcmp (name, "lower") == 0)
	3616	BUILD_CHARCLASS_LOOP (islower);
	3617	else if (strcmp (name, "space") == 0)
	3618	BUILD_CHARCLASS_LOOP (isspace);
	3619	else if (strcmp (name, "alpha") == 0)
	3620	BUILD_CHARCLASS_LOOP (isalpha);
	3621	else if (strcmp (name, "digit") == 0)
	3622	BUILD_CHARCLASS_LOOP (isdigit);
	3623	else if (strcmp (name, "print") == 0)
	3624	BUILD_CHARCLASS_LOOP (isprint);
	3625	else if (strcmp (name, "upper") == 0)
	3626	BUILD_CHARCLASS_LOOP (isupper);
	3627	else if (strcmp (name, "blank") == 0)
	3628	BUILD_CHARCLASS_LOOP (isblank);
	3629	else if (strcmp (name, "graph") == 0)
	3630	BUILD_CHARCLASS_LOOP (isgraph);
	3631	else if (strcmp (name, "punct") == 0)
	3632	BUILD_CHARCLASS_LOOP (ispunct);
	3633	else if (strcmp (name, "xdigit") == 0)
3634	BUILD_CHARCLASS_LOOP (isxdigit);
3635	else
3636	return REG_ECTYPE;
3637
3638	return REG_NOERROR;
3639	}
3640
3641	static bin_tree_t *
3642	build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
1.3.2 by Colin Watson Import upstream version 3.2	3643	const char *class_name,
1.3.2 by Colin Watson Import upstream version 3.2	3644	const char *extra, bool non_match,
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	3645	reg_errcode_t *err)
	3646	{
	3647	re_bitset_ptr_t sbcset;
	3648	#ifdef RE_ENABLE_I18N
	3649	re_charset_t *mbcset;
	3650	Idx alloc = 0;
	3651	#endif /* not RE_ENABLE_I18N */
	3652	reg_errcode_t ret;
	3653	re_token_t br_token;
	3654	bin_tree_t *tree;
	3655
	3656	sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1);
	3657	#ifdef RE_ENABLE_I18N
	3658	mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1);
	3659	#endif /* RE_ENABLE_I18N */
	3660
	3661	#ifdef RE_ENABLE_I18N
	3662	if (BE (sbcset == NULL \|\| mbcset == NULL, 0))
	3663	#else /* not RE_ENABLE_I18N */
	3664	if (BE (sbcset == NULL, 0))
	3665	#endif /* not RE_ENABLE_I18N */
	3666	{
	3667	*err = REG_ESPACE;
	3668	return NULL;
	3669	}
	3670
	3671	if (non_match)
	3672	{
	3673	#ifdef RE_ENABLE_I18N
	3674	mbcset->non_match = 1;
	3675	#endif /* not RE_ENABLE_I18N */
	3676	}
	3677
	3678	/* We don't care the syntax in this case. */
	3679	ret = build_charclass (trans, sbcset,
	3680	#ifdef RE_ENABLE_I18N
	3681	mbcset, &alloc,
	3682	#endif /* RE_ENABLE_I18N */
	3683	class_name, 0);
	3684
	3685	if (BE (ret != REG_NOERROR, 0))
	3686	{
	3687	re_free (sbcset);
	3688	#ifdef RE_ENABLE_I18N
	3689	free_charset (mbcset);
	3690	#endif /* RE_ENABLE_I18N */
	3691	*err = ret;
	3692	return NULL;
	3693	}
	3694	/* \w match '_' also. */
	3695	for (; *extra; extra++)
	3696	bitset_set (sbcset, *extra);
	3697
	3698	/* If it is non-matching list. */
	3699	if (non_match)
	3700	bitset_not (sbcset);
	3701
	3702	#ifdef RE_ENABLE_I18N
	3703	/* Ensure only single byte characters are set. */
	3704	if (dfa->mb_cur_max > 1)
	3705	bitset_mask (sbcset, dfa->sb_char);
	3706	#endif
	3707
	3708	/* Build a tree for simple bracket. */
3709	br_token.type = SIMPLE_BRACKET;
3710	br_token.opr.sbcset = sbcset;
3711	tree = create_token_tree (dfa, NULL, NULL, &br_token);
3712	if (BE (tree == NULL, 0))
3713	goto build_word_op_espace;
3714
3715	#ifdef RE_ENABLE_I18N
3716	if (dfa->mb_cur_max > 1)
3717	{
3718	bin_tree_t *mbc_tree;
3719	/* Build a tree for complex bracket. */
3720	br_token.type = COMPLEX_BRACKET;
3721	br_token.opr.mbcset = mbcset;
3722	dfa->has_mb_node = 1;
3723	mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3724	if (BE (mbc_tree == NULL, 0))
3725	goto build_word_op_espace;
3726	/* Then join them by ALT node. */
3727	tree = create_tree (dfa, tree, mbc_tree, OP_ALT);
3728	if (BE (mbc_tree != NULL, 1))
3729	return tree;
3730	}
3731	else
3732	{
3733	free_charset (mbcset);
3734	return tree;
3735	}
3736	#else /* not RE_ENABLE_I18N */
3737	return tree;
3738	#endif /* not RE_ENABLE_I18N */
3739
3740	build_word_op_espace:
3741	re_free (sbcset);
3742	#ifdef RE_ENABLE_I18N
3743	free_charset (mbcset);
3744	#endif /* RE_ENABLE_I18N */
3745	*err = REG_ESPACE;
3746	return NULL;
3747	}
3748
3749	/* This is intended for the expressions like "a{1,3}".
1.3.1 by Colin Watson Import upstream version 3.1	3750	Fetch a number from 'input', and return the number.
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	3751	Return REG_MISSING if the number field is empty like "{,1}".
1.3.2 by Colin Watson Import upstream version 3.2	3752	Return RE_DUP_MAX + 1 if the number field is too large.
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	3753	Return REG_ERROR if an error occurred. */
	3754
	3755	static Idx
	3756	fetch_number (re_string_t input, re_token_t token, reg_syntax_t syntax)
	3757	{
	3758	Idx num = REG_MISSING;
	3759	unsigned char c;
	3760	while (1)
	3761	{
	3762	fetch_token (token, input, syntax);
	3763	c = token->opr.c;
	3764	if (BE (token->type == END_OF_RE, 0))
	3765	return REG_ERROR;
	3766	if (token->type == OP_CLOSE_DUP_NUM \|\| c == ',')
	3767	break;
	3768	num = ((token->type != CHARACTER \|\| c < '0' \|\| '9' < c
	3769	\|\| num == REG_ERROR)
	3770	? REG_ERROR
1.3.2 by Colin Watson Import upstream version 3.2	3771	: num == REG_MISSING
	3772	? c - '0'
	3773	: MIN (RE_DUP_MAX + 1, num * 10 + c - '0'));
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	3774	}
	3775	return num;
	3776	}
	3777
	3778	#ifdef RE_ENABLE_I18N
	3779	static void
	3780	free_charset (re_charset_t *cset)
	3781	{
	3782	re_free (cset->mbchars);
	3783	# ifdef _LIBC
	3784	re_free (cset->coll_syms);
	3785	re_free (cset->equiv_classes);
	3786	re_free (cset->range_starts);
	3787	re_free (cset->range_ends);
	3788	# endif
	3789	re_free (cset->char_classes);
	3790	re_free (cset);
	3791	}
	3792	#endif /* RE_ENABLE_I18N */
	3793
	3794	/* Functions for binary tree operation. */
	3795
	3796	/* Create a tree node. */
	3797
	3798	static bin_tree_t *
	3799	create_tree (re_dfa_t dfa, bin_tree_t left, bin_tree_t *right,
	3800	re_token_type_t type)
	3801	{
	3802	re_token_t t;
	3803	t.type = type;
	3804	return create_token_tree (dfa, left, right, &t);
	3805	}
	3806
	3807	static bin_tree_t *
	3808	create_token_tree (re_dfa_t dfa, bin_tree_t left, bin_tree_t *right,
	3809	const re_token_t *token)
	3810	{
	3811	bin_tree_t *tree;
	3812	if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0))
	3813	{
	3814	bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, 1);
	3815
	3816	if (storage == NULL)
	3817	return NULL;
	3818	storage->next = dfa->str_tree_storage;
	3819	dfa->str_tree_storage = storage;
	3820	dfa->str_tree_storage_idx = 0;
	3821	}
	3822	tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++];
	3823
	3824	tree->parent = NULL;
	3825	tree->left = left;
	3826	tree->right = right;
	3827	tree->token = *token;
	3828	tree->token.duplicated = 0;
	3829	tree->token.opt_subexp = 0;
	3830	tree->first = NULL;
	3831	tree->next = NULL;
	3832	tree->node_idx = REG_MISSING;
	3833
	3834	if (left != NULL)
	3835	left->parent = tree;
	3836	if (right != NULL)
	3837	right->parent = tree;
3838	return tree;
3839	}
3840
3841	/* Mark the tree SRC as an optional subexpression.
3842	To be called from preorder or postorder. */
3843
3844	static reg_errcode_t
3845	mark_opt_subexp (void extra, bin_tree_t node)
3846	{
1.3.2 by Colin Watson Import upstream version 3.2	3847	Idx idx = (uintptr_t) extra;
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	3848	if (node->token.type == SUBEXP && node->token.opr.idx == idx)
	3849	node->token.opt_subexp = 1;
	3850
	3851	return REG_NOERROR;
	3852	}
	3853
	3854	/* Free the allocated memory inside NODE. */
	3855
	3856	static void
	3857	free_token (re_token_t *node)
	3858	{
	3859	#ifdef RE_ENABLE_I18N
	3860	if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
	3861	free_charset (node->opr.mbcset);
	3862	else
	3863	#endif /* RE_ENABLE_I18N */
	3864	if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
	3865	re_free (node->opr.sbcset);
	3866	}
	3867
	3868	/* Worker function for tree walking. Free the allocated memory inside NODE
	3869	and its children. */
	3870
	3871	static reg_errcode_t
	3872	free_tree (void extra, bin_tree_t node)
	3873	{
	3874	free_token (&node->token);
	3875	return REG_NOERROR;
	3876	}
	3877
	3878
	3879	/* Duplicate the node SRC, and return new node. This is a preorder
	3880	visit similar to the one implemented by the generic visitor, but
	3881	we need more infrastructure to maintain two parallel trees --- so,
	3882	it's easier to duplicate. */
	3883
	3884	static bin_tree_t *
	3885	duplicate_tree (const bin_tree_t root, re_dfa_t dfa)
	3886	{
	3887	const bin_tree_t *node;
	3888	bin_tree_t *dup_root;
	3889	bin_tree_t *p_new = &dup_root, dup_node = root->parent;
	3890
	3891	for (node = root; ; )
	3892	{
	3893	/* Create a new tree and link it back to the current parent. */
	3894	*p_new = create_token_tree (dfa, NULL, NULL, &node->token);
	3895	if (*p_new == NULL)
	3896	return NULL;
	3897	(*p_new)->parent = dup_node;
	3898	(*p_new)->token.duplicated = 1;
	3899	dup_node = *p_new;
	3900
	3901	/* Go to the left node, or up and to the right. */
	3902	if (node->left)
	3903	{
	3904	node = node->left;
	3905	p_new = &dup_node->left;
	3906	}
	3907	else
	3908	{
	3909	const bin_tree_t *prev = NULL;
	3910	while (node->right == prev \|\| node->right == NULL)
	3911	{
3912	prev = node;
3913	node = node->parent;
3914	dup_node = dup_node->parent;
3915	if (!node)
1.2.3 by Colin Watson Import upstream version 2.2	3916	return dup_root;
1.1.4 by Otavio Salvador Import upstream version 1.8.8.git.2009.06.03	3917	}
	3918	node = node->right;
	3919	p_new = &dup_node->right;
	3920	}
	3921	}
	3922	}