1
/* Extended regular expression matching and search library.
2
Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free
3
Software Foundation, Inc.
4
This file is part of the GNU C Library.
5
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
7
This program is free software; you can redistribute it and/or modify
8
it under the terms of the GNU General Public License as published by
9
the Free Software Foundation; either version 3, or (at your option)
12
This program is distributed in the hope that it will be useful,
13
but WITHOUT ANY WARRANTY; without even the implied warranty of
14
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
GNU General Public License for more details.
17
You should have received a copy of the GNU General Public License along
18
with this program; if not, write to the Free Software Foundation,
19
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
21
static void re_string_construct_common (const char *str, Idx len,
23
RE_TRANSLATE_TYPE trans, bool icase,
24
const re_dfa_t *dfa) internal_function;
25
static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
26
const re_node_set *nodes,
27
re_hashval_t hash) internal_function;
28
static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
29
const re_node_set *nodes,
31
re_hashval_t hash) internal_function;
33
/* Functions for string operation. */
35
/* This function allocate the buffers. It is necessary to call
36
re_string_reconstruct before using the object. */
39
internal_function __attribute_warn_unused_result__
40
re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len,
41
RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
46
/* Ensure at least one character fits into the buffers. */
47
if (init_len < dfa->mb_cur_max)
48
init_len = dfa->mb_cur_max;
49
init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
50
re_string_construct_common (str, len, pstr, trans, icase, dfa);
52
ret = re_string_realloc_buffers (pstr, init_buf_len);
53
if (BE (ret != REG_NOERROR, 0))
56
pstr->word_char = dfa->word_char;
57
pstr->word_ops_used = dfa->word_ops_used;
58
pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
59
pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
60
pstr->valid_raw_len = pstr->valid_len;
64
/* This function allocate the buffers, and initialize them. */
67
internal_function __attribute_warn_unused_result__
68
re_string_construct (re_string_t *pstr, const char *str, Idx len,
69
RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
72
memset (pstr, '\0', sizeof (re_string_t));
73
re_string_construct_common (str, len, pstr, trans, icase, dfa);
77
ret = re_string_realloc_buffers (pstr, len + 1);
78
if (BE (ret != REG_NOERROR, 0))
81
pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
86
if (dfa->mb_cur_max > 1)
90
ret = build_wcs_upper_buffer (pstr);
91
if (BE (ret != REG_NOERROR, 0))
93
if (pstr->valid_raw_len >= len)
95
if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
97
ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
98
if (BE (ret != REG_NOERROR, 0))
103
#endif /* RE_ENABLE_I18N */
104
build_upper_buffer (pstr);
108
#ifdef RE_ENABLE_I18N
109
if (dfa->mb_cur_max > 1)
110
build_wcs_buffer (pstr);
112
#endif /* RE_ENABLE_I18N */
115
re_string_translate_buffer (pstr);
118
pstr->valid_len = pstr->bufs_len;
119
pstr->valid_raw_len = pstr->bufs_len;
127
/* Helper functions for re_string_allocate, and re_string_construct. */
130
internal_function __attribute_warn_unused_result__
131
re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
133
#ifdef RE_ENABLE_I18N
134
if (pstr->mb_cur_max > 1)
138
/* Avoid overflow. */
139
size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx));
140
if (BE (SIZE_MAX / max_object_size < new_buf_len, 0))
143
new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
144
if (BE (new_wcs == NULL, 0))
147
if (pstr->offsets != NULL)
149
Idx *new_offsets = re_realloc (pstr->offsets, Idx, new_buf_len);
150
if (BE (new_offsets == NULL, 0))
152
pstr->offsets = new_offsets;
155
#endif /* RE_ENABLE_I18N */
156
if (pstr->mbs_allocated)
158
unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
160
if (BE (new_mbs == NULL, 0))
164
pstr->bufs_len = new_buf_len;
171
re_string_construct_common (const char *str, Idx len, re_string_t *pstr,
172
RE_TRANSLATE_TYPE trans, bool icase,
175
pstr->raw_mbs = (const unsigned char *) str;
180
pstr->mbs_allocated = (trans != NULL || icase);
181
pstr->mb_cur_max = dfa->mb_cur_max;
182
pstr->is_utf8 = dfa->is_utf8;
183
pstr->map_notascii = dfa->map_notascii;
184
pstr->stop = pstr->len;
185
pstr->raw_stop = pstr->stop;
188
#ifdef RE_ENABLE_I18N
190
/* Build wide character buffer PSTR->WCS.
191
If the byte sequence of the string are:
192
<mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
193
Then wide character buffer will be:
194
<wc1> , WEOF , <wc2> , WEOF , <wc3>
195
We use WEOF for padding, they indicate that the position isn't
196
a first byte of a multibyte character.
198
Note that this function assumes PSTR->VALID_LEN elements are already
199
built and starts from PSTR->VALID_LEN. */
203
build_wcs_buffer (re_string_t *pstr)
206
unsigned char buf[MB_LEN_MAX];
207
assert (MB_LEN_MAX >= pstr->mb_cur_max);
209
unsigned char buf[64];
212
Idx byte_idx, end_idx, remain_len;
215
/* Build the buffers from pstr->valid_len to either pstr->len or
217
end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
218
for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
223
remain_len = end_idx - byte_idx;
224
prev_st = pstr->cur_state;
225
/* Apply the translation if we need. */
226
if (BE (pstr->trans != NULL, 0))
230
for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
232
ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
233
buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
235
p = (const char *) buf;
238
p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
239
mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
240
if (BE (mbclen == (size_t) -2, 0))
242
/* The buffer doesn't have enough space, finish to build. */
243
pstr->cur_state = prev_st;
246
else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
248
/* We treat these cases as a singlebyte character. */
250
wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
251
if (BE (pstr->trans != NULL, 0))
252
wc = pstr->trans[wc];
253
pstr->cur_state = prev_st;
256
/* Write wide character and padding. */
257
pstr->wcs[byte_idx++] = wc;
258
/* Write paddings. */
259
for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
260
pstr->wcs[byte_idx++] = WEOF;
262
pstr->valid_len = byte_idx;
263
pstr->valid_raw_len = byte_idx;
266
/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
267
but for REG_ICASE. */
270
internal_function __attribute_warn_unused_result__
271
build_wcs_upper_buffer (re_string_t *pstr)
274
Idx src_idx, byte_idx, end_idx, remain_len;
277
char buf[MB_LEN_MAX];
278
assert (MB_LEN_MAX >= pstr->mb_cur_max);
283
byte_idx = pstr->valid_len;
284
end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
286
/* The following optimization assumes that ASCII characters can be
287
mapped to wide characters with a simple cast. */
288
if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
290
while (byte_idx < end_idx)
294
if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
295
&& mbsinit (&pstr->cur_state))
297
/* In case of a singlebyte character. */
299
= toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
300
/* The next step uses the assumption that wchar_t is encoded
301
ASCII-safe: all ASCII values can be converted like this. */
302
pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
307
remain_len = end_idx - byte_idx;
308
prev_st = pstr->cur_state;
309
mbclen = __mbrtowc (&wc,
310
((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
311
+ byte_idx), remain_len, &pstr->cur_state);
312
if (BE (mbclen < (size_t) -2, 1))
320
mbcdlen = wcrtomb (buf, wcu, &prev_st);
321
if (BE (mbclen == mbcdlen, 1))
322
memcpy (pstr->mbs + byte_idx, buf, mbclen);
330
memcpy (pstr->mbs + byte_idx,
331
pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
332
pstr->wcs[byte_idx++] = wcu;
333
/* Write paddings. */
334
for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
335
pstr->wcs[byte_idx++] = WEOF;
337
else if (mbclen == (size_t) -1 || mbclen == 0)
339
/* It is an invalid character or '\0'. Just use the byte. */
340
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
341
pstr->mbs[byte_idx] = ch;
342
/* And also cast it to wide char. */
343
pstr->wcs[byte_idx++] = (wchar_t) ch;
344
if (BE (mbclen == (size_t) -1, 0))
345
pstr->cur_state = prev_st;
349
/* The buffer doesn't have enough space, finish to build. */
350
pstr->cur_state = prev_st;
354
pstr->valid_len = byte_idx;
355
pstr->valid_raw_len = byte_idx;
359
for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
364
remain_len = end_idx - byte_idx;
365
prev_st = pstr->cur_state;
366
if (BE (pstr->trans != NULL, 0))
370
for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
372
ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
373
buf[i] = pstr->trans[ch];
375
p = (const char *) buf;
378
p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
379
mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
380
if (BE (mbclen < (size_t) -2, 1))
388
mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
389
if (BE (mbclen == mbcdlen, 1))
390
memcpy (pstr->mbs + byte_idx, buf, mbclen);
391
else if (mbcdlen != (size_t) -1)
395
if (byte_idx + mbcdlen > pstr->bufs_len)
397
pstr->cur_state = prev_st;
401
if (pstr->offsets == NULL)
403
pstr->offsets = re_malloc (Idx, pstr->bufs_len);
405
if (pstr->offsets == NULL)
408
if (!pstr->offsets_needed)
410
for (i = 0; i < (size_t) byte_idx; ++i)
411
pstr->offsets[i] = i;
412
pstr->offsets_needed = 1;
415
memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
416
pstr->wcs[byte_idx] = wcu;
417
pstr->offsets[byte_idx] = src_idx;
418
for (i = 1; i < mbcdlen; ++i)
420
pstr->offsets[byte_idx + i]
421
= src_idx + (i < mbclen ? i : mbclen - 1);
422
pstr->wcs[byte_idx + i] = WEOF;
424
pstr->len += mbcdlen - mbclen;
425
if (pstr->raw_stop > src_idx)
426
pstr->stop += mbcdlen - mbclen;
427
end_idx = (pstr->bufs_len > pstr->len)
428
? pstr->len : pstr->bufs_len;
434
memcpy (pstr->mbs + byte_idx, p, mbclen);
437
memcpy (pstr->mbs + byte_idx, p, mbclen);
439
if (BE (pstr->offsets_needed != 0, 0))
442
for (i = 0; i < mbclen; ++i)
443
pstr->offsets[byte_idx + i] = src_idx + i;
447
pstr->wcs[byte_idx++] = wcu;
448
/* Write paddings. */
449
for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
450
pstr->wcs[byte_idx++] = WEOF;
452
else if (mbclen == (size_t) -1 || mbclen == 0)
454
/* It is an invalid character or '\0'. Just use the byte. */
455
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
457
if (BE (pstr->trans != NULL, 0))
458
ch = pstr->trans [ch];
459
pstr->mbs[byte_idx] = ch;
461
if (BE (pstr->offsets_needed != 0, 0))
462
pstr->offsets[byte_idx] = src_idx;
465
/* And also cast it to wide char. */
466
pstr->wcs[byte_idx++] = (wchar_t) ch;
467
if (BE (mbclen == (size_t) -1, 0))
468
pstr->cur_state = prev_st;
472
/* The buffer doesn't have enough space, finish to build. */
473
pstr->cur_state = prev_st;
477
pstr->valid_len = byte_idx;
478
pstr->valid_raw_len = src_idx;
482
/* Skip characters until the index becomes greater than NEW_RAW_IDX.
487
re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc)
494
/* Skip the characters which are not necessary to check. */
495
for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
496
rawbuf_idx < new_raw_idx;)
500
remain_len = pstr->len - rawbuf_idx;
501
prev_st = pstr->cur_state;
502
mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
503
remain_len, &pstr->cur_state);
504
if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
506
/* We treat these cases as a single byte character. */
507
if (mbclen == 0 || remain_len == 0)
510
wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
512
pstr->cur_state = prev_st;
516
/* Then proceed the next character. */
517
rawbuf_idx += mbclen;
522
#endif /* RE_ENABLE_I18N */
524
/* Build the buffer PSTR->MBS, and apply the translation if we need.
525
This function is used in case of REG_ICASE. */
529
build_upper_buffer (re_string_t *pstr)
531
Idx char_idx, end_idx;
532
end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
534
for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
536
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
537
if (BE (pstr->trans != NULL, 0))
538
ch = pstr->trans[ch];
540
pstr->mbs[char_idx] = toupper (ch);
542
pstr->mbs[char_idx] = ch;
544
pstr->valid_len = char_idx;
545
pstr->valid_raw_len = char_idx;
548
/* Apply TRANS to the buffer in PSTR. */
552
re_string_translate_buffer (re_string_t *pstr)
554
Idx buf_idx, end_idx;
555
end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
557
for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
559
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
560
pstr->mbs[buf_idx] = pstr->trans[ch];
563
pstr->valid_len = buf_idx;
564
pstr->valid_raw_len = buf_idx;
567
/* This function re-construct the buffers.
568
Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
569
convert to upper case in case of REG_ICASE, apply translation. */
572
internal_function __attribute_warn_unused_result__
573
re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
577
if (BE (pstr->raw_mbs_idx <= idx, 0))
578
offset = idx - pstr->raw_mbs_idx;
582
#ifdef RE_ENABLE_I18N
583
if (pstr->mb_cur_max > 1)
584
memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
585
#endif /* RE_ENABLE_I18N */
586
pstr->len = pstr->raw_len;
587
pstr->stop = pstr->raw_stop;
589
pstr->raw_mbs_idx = 0;
590
pstr->valid_raw_len = 0;
591
pstr->offsets_needed = 0;
592
pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
593
: CONTEXT_NEWLINE | CONTEXT_BEGBUF);
594
if (!pstr->mbs_allocated)
595
pstr->mbs = (unsigned char *) pstr->raw_mbs;
599
if (BE (offset != 0, 1))
601
/* Should the already checked characters be kept? */
602
if (BE (offset < pstr->valid_raw_len, 1))
604
/* Yes, move them to the front of the buffer. */
605
#ifdef RE_ENABLE_I18N
606
if (BE (pstr->offsets_needed, 0))
608
Idx low = 0, high = pstr->valid_len, mid;
611
mid = (high + low) / 2;
612
if (pstr->offsets[mid] > offset)
614
else if (pstr->offsets[mid] < offset)
620
if (pstr->offsets[mid] < offset)
622
pstr->tip_context = re_string_context_at (pstr, mid - 1,
624
/* This can be quite complicated, so handle specially
625
only the common and easy case where the character with
626
different length representation of lower and upper
627
case is present at or after offset. */
628
if (pstr->valid_len > offset
629
&& mid == offset && pstr->offsets[mid] == offset)
631
memmove (pstr->wcs, pstr->wcs + offset,
632
(pstr->valid_len - offset) * sizeof (wint_t));
633
memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
634
pstr->valid_len -= offset;
635
pstr->valid_raw_len -= offset;
636
for (low = 0; low < pstr->valid_len; low++)
637
pstr->offsets[low] = pstr->offsets[low + offset] - offset;
641
/* Otherwise, just find out how long the partial multibyte
642
character at offset is and fill it with WEOF/255. */
643
pstr->len = pstr->raw_len - idx + offset;
644
pstr->stop = pstr->raw_stop - idx + offset;
645
pstr->offsets_needed = 0;
646
while (mid > 0 && pstr->offsets[mid - 1] == offset)
648
while (mid < pstr->valid_len)
649
if (pstr->wcs[mid] != WEOF)
653
if (mid == pstr->valid_len)
657
pstr->valid_len = pstr->offsets[mid] - offset;
660
for (low = 0; low < pstr->valid_len; ++low)
661
pstr->wcs[low] = WEOF;
662
memset (pstr->mbs, 255, pstr->valid_len);
665
pstr->valid_raw_len = pstr->valid_len;
671
pstr->tip_context = re_string_context_at (pstr, offset - 1,
673
#ifdef RE_ENABLE_I18N
674
if (pstr->mb_cur_max > 1)
675
memmove (pstr->wcs, pstr->wcs + offset,
676
(pstr->valid_len - offset) * sizeof (wint_t));
677
#endif /* RE_ENABLE_I18N */
678
if (BE (pstr->mbs_allocated, 0))
679
memmove (pstr->mbs, pstr->mbs + offset,
680
pstr->valid_len - offset);
681
pstr->valid_len -= offset;
682
pstr->valid_raw_len -= offset;
684
assert (pstr->valid_len > 0);
690
#ifdef RE_ENABLE_I18N
691
/* No, skip all characters until IDX. */
692
Idx prev_valid_len = pstr->valid_len;
694
if (BE (pstr->offsets_needed, 0))
696
pstr->len = pstr->raw_len - idx + offset;
697
pstr->stop = pstr->raw_stop - idx + offset;
698
pstr->offsets_needed = 0;
702
#ifdef RE_ENABLE_I18N
703
if (pstr->mb_cur_max > 1)
710
const unsigned char *raw, *p, *end;
712
/* Special case UTF-8. Multi-byte chars start with any
713
byte other than 0x80 - 0xbf. */
714
raw = pstr->raw_mbs + pstr->raw_mbs_idx;
715
end = raw + (offset - pstr->mb_cur_max);
716
if (end < pstr->raw_mbs)
718
p = raw + offset - 1;
720
/* We know the wchar_t encoding is UCS4, so for the simple
721
case, ASCII characters, skip the conversion step. */
722
if (isascii (*p) && BE (pstr->trans == NULL, 1))
724
memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
725
/* pstr->valid_len = 0; */
730
for (; p >= end; --p)
731
if ((*p & 0xc0) != 0x80)
735
Idx mlen = raw + pstr->len - p;
738
#if 0 /* dead code: buf is set but never used */
739
unsigned char buf[6];
740
if (BE (pstr->trans != NULL, 0))
742
int i = mlen < 6 ? mlen : 6;
744
buf[i] = pstr->trans[p[i]];
747
/* XXX Don't use mbrtowc, we know which conversion
748
to use (UTF-8 -> UCS4). */
749
memset (&cur_state, 0, sizeof (cur_state));
750
mbclen = __mbrtowc (&wc2, (const char *) p, mlen,
752
if (raw + offset - p <= mbclen
753
&& mbclen < (size_t) -2)
755
memset (&pstr->cur_state, '\0',
757
pstr->valid_len = mbclen - (raw + offset - p);
765
pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
768
= re_string_context_at (pstr, prev_valid_len - 1, eflags);
770
pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
771
&& IS_WIDE_WORD_CHAR (wc))
773
: ((IS_WIDE_NEWLINE (wc)
774
&& pstr->newline_anchor)
775
? CONTEXT_NEWLINE : 0));
776
if (BE (pstr->valid_len, 0))
778
for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
779
pstr->wcs[wcs_idx] = WEOF;
780
if (pstr->mbs_allocated)
781
memset (pstr->mbs, 255, pstr->valid_len);
783
pstr->valid_raw_len = pstr->valid_len;
786
#endif /* RE_ENABLE_I18N */
788
int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
789
pstr->valid_raw_len = 0;
792
pstr->tip_context = (bitset_contain (pstr->word_char, c)
794
: ((IS_NEWLINE (c) && pstr->newline_anchor)
795
? CONTEXT_NEWLINE : 0));
798
if (!BE (pstr->mbs_allocated, 0))
801
pstr->raw_mbs_idx = idx;
803
pstr->stop -= offset;
805
/* Then build the buffers. */
806
#ifdef RE_ENABLE_I18N
807
if (pstr->mb_cur_max > 1)
811
reg_errcode_t ret = build_wcs_upper_buffer (pstr);
812
if (BE (ret != REG_NOERROR, 0))
816
build_wcs_buffer (pstr);
819
#endif /* RE_ENABLE_I18N */
820
if (BE (pstr->mbs_allocated, 0))
823
build_upper_buffer (pstr);
824
else if (pstr->trans != NULL)
825
re_string_translate_buffer (pstr);
828
pstr->valid_len = pstr->len;
835
internal_function __attribute ((pure))
836
re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
841
/* Handle the common (easiest) cases first. */
842
if (BE (!pstr->mbs_allocated, 1))
843
return re_string_peek_byte (pstr, idx);
845
#ifdef RE_ENABLE_I18N
846
if (pstr->mb_cur_max > 1
847
&& ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
848
return re_string_peek_byte (pstr, idx);
851
off = pstr->cur_idx + idx;
852
#ifdef RE_ENABLE_I18N
853
if (pstr->offsets_needed)
854
off = pstr->offsets[off];
857
ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
859
#ifdef RE_ENABLE_I18N
860
/* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
861
this function returns CAPITAL LETTER I instead of first byte of
862
DOTLESS SMALL LETTER I. The latter would confuse the parser,
863
since peek_byte_case doesn't advance cur_idx in any way. */
864
if (pstr->offsets_needed && !isascii (ch))
865
return re_string_peek_byte (pstr, idx);
872
internal_function __attribute ((pure))
873
re_string_fetch_byte_case (re_string_t *pstr)
875
if (BE (!pstr->mbs_allocated, 1))
876
return re_string_fetch_byte (pstr);
878
#ifdef RE_ENABLE_I18N
879
if (pstr->offsets_needed)
884
/* For tr_TR.UTF-8 [[:islower:]] there is
885
[[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
886
in that case the whole multi-byte character and return
887
the original letter. On the other side, with
888
[[: DOTLESS SMALL LETTER I return [[:I, as doing
889
anything else would complicate things too much. */
891
if (!re_string_first_byte (pstr, pstr->cur_idx))
892
return re_string_fetch_byte (pstr);
894
off = pstr->offsets[pstr->cur_idx];
895
ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
898
return re_string_fetch_byte (pstr);
900
re_string_skip_bytes (pstr,
901
re_string_char_size_at (pstr, pstr->cur_idx));
906
return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
911
re_string_destruct (re_string_t *pstr)
913
#ifdef RE_ENABLE_I18N
915
re_free (pstr->offsets);
916
#endif /* RE_ENABLE_I18N */
917
if (pstr->mbs_allocated)
921
/* Return the context at IDX in INPUT. */
925
re_string_context_at (const re_string_t *input, Idx idx, int eflags)
928
if (BE (! REG_VALID_INDEX (idx), 0))
929
/* In this case, we use the value stored in input->tip_context,
930
since we can't know the character in input->mbs[-1] here. */
931
return input->tip_context;
932
if (BE (idx == input->len, 0))
933
return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
934
: CONTEXT_NEWLINE | CONTEXT_ENDBUF);
935
#ifdef RE_ENABLE_I18N
936
if (input->mb_cur_max > 1)
940
while(input->wcs[wc_idx] == WEOF)
943
/* It must not happen. */
944
assert (REG_VALID_INDEX (wc_idx));
947
if (! REG_VALID_INDEX (wc_idx))
948
return input->tip_context;
950
wc = input->wcs[wc_idx];
951
if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
953
return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
954
? CONTEXT_NEWLINE : 0);
959
c = re_string_byte_at (input, idx);
960
if (bitset_contain (input->word_char, c))
962
return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
966
/* Functions for set operation. */
969
internal_function __attribute_warn_unused_result__
970
re_node_set_alloc (re_node_set *set, Idx size)
974
set->elems = re_malloc (Idx, size);
975
if (BE (set->elems == NULL, 0))
981
internal_function __attribute_warn_unused_result__
982
re_node_set_init_1 (re_node_set *set, Idx elem)
986
set->elems = re_malloc (Idx, 1);
987
if (BE (set->elems == NULL, 0))
989
set->alloc = set->nelem = 0;
992
set->elems[0] = elem;
997
internal_function __attribute_warn_unused_result__
998
re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2)
1001
set->elems = re_malloc (Idx, 2);
1002
if (BE (set->elems == NULL, 0))
1007
set->elems[0] = elem1;
1014
set->elems[0] = elem1;
1015
set->elems[1] = elem2;
1019
set->elems[0] = elem2;
1020
set->elems[1] = elem1;
1026
static reg_errcode_t
1027
internal_function __attribute_warn_unused_result__
1028
re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
1030
dest->nelem = src->nelem;
1033
dest->alloc = dest->nelem;
1034
dest->elems = re_malloc (Idx, dest->alloc);
1035
if (BE (dest->elems == NULL, 0))
1037
dest->alloc = dest->nelem = 0;
1040
memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
1043
re_node_set_init_empty (dest);
1047
/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
1048
DEST. Return value indicate the error code or REG_NOERROR if succeeded.
1049
Note: We assume dest->elems is NULL, when dest->alloc is 0. */
1051
static reg_errcode_t
1052
internal_function __attribute_warn_unused_result__
1053
re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
1054
const re_node_set *src2)
1056
Idx i1, i2, is, id, delta, sbase;
1057
if (src1->nelem == 0 || src2->nelem == 0)
1060
/* We need dest->nelem + 2 * elems_in_intersection; this is a
1061
conservative estimate. */
1062
if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
1064
Idx new_alloc = src1->nelem + src2->nelem + dest->alloc;
1065
Idx *new_elems = re_realloc (dest->elems, Idx, new_alloc);
1066
if (BE (new_elems == NULL, 0))
1068
dest->elems = new_elems;
1069
dest->alloc = new_alloc;
1072
/* Find the items in the intersection of SRC1 and SRC2, and copy
1073
into the top of DEST those that are not already in DEST itself. */
1074
sbase = dest->nelem + src1->nelem + src2->nelem;
1075
i1 = src1->nelem - 1;
1076
i2 = src2->nelem - 1;
1077
id = dest->nelem - 1;
1080
if (src1->elems[i1] == src2->elems[i2])
1082
/* Try to find the item in DEST. Maybe we could binary search? */
1083
while (REG_VALID_INDEX (id) && dest->elems[id] > src1->elems[i1])
1086
if (! REG_VALID_INDEX (id) || dest->elems[id] != src1->elems[i1])
1087
dest->elems[--sbase] = src1->elems[i1];
1089
if (! REG_VALID_INDEX (--i1) || ! REG_VALID_INDEX (--i2))
1093
/* Lower the highest of the two items. */
1094
else if (src1->elems[i1] < src2->elems[i2])
1096
if (! REG_VALID_INDEX (--i2))
1101
if (! REG_VALID_INDEX (--i1))
1106
id = dest->nelem - 1;
1107
is = dest->nelem + src1->nelem + src2->nelem - 1;
1108
delta = is - sbase + 1;
1110
/* Now copy. When DELTA becomes zero, the remaining
1111
DEST elements are already in place; this is more or
1112
less the same loop that is in re_node_set_merge. */
1113
dest->nelem += delta;
1114
if (delta > 0 && REG_VALID_INDEX (id))
1117
if (dest->elems[is] > dest->elems[id])
1119
/* Copy from the top. */
1120
dest->elems[id + delta--] = dest->elems[is--];
1126
/* Slide from the bottom. */
1127
dest->elems[id + delta] = dest->elems[id];
1128
if (! REG_VALID_INDEX (--id))
1133
/* Copy remaining SRC elements. */
1134
memcpy (dest->elems, dest->elems + sbase, delta * sizeof (Idx));
1139
/* Calculate the union set of the sets SRC1 and SRC2. And store it to
1140
DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1142
static reg_errcode_t
1143
internal_function __attribute_warn_unused_result__
1144
re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
1145
const re_node_set *src2)
1148
if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
1150
dest->alloc = src1->nelem + src2->nelem;
1151
dest->elems = re_malloc (Idx, dest->alloc);
1152
if (BE (dest->elems == NULL, 0))
1157
if (src1 != NULL && src1->nelem > 0)
1158
return re_node_set_init_copy (dest, src1);
1159
else if (src2 != NULL && src2->nelem > 0)
1160
return re_node_set_init_copy (dest, src2);
1162
re_node_set_init_empty (dest);
1165
for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
1167
if (src1->elems[i1] > src2->elems[i2])
1169
dest->elems[id++] = src2->elems[i2++];
1172
if (src1->elems[i1] == src2->elems[i2])
1174
dest->elems[id++] = src1->elems[i1++];
1176
if (i1 < src1->nelem)
1178
memcpy (dest->elems + id, src1->elems + i1,
1179
(src1->nelem - i1) * sizeof (Idx));
1180
id += src1->nelem - i1;
1182
else if (i2 < src2->nelem)
1184
memcpy (dest->elems + id, src2->elems + i2,
1185
(src2->nelem - i2) * sizeof (Idx));
1186
id += src2->nelem - i2;
1192
/* Calculate the union set of the sets DEST and SRC. And store it to
1193
DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1195
static reg_errcode_t
1196
internal_function __attribute_warn_unused_result__
1197
re_node_set_merge (re_node_set *dest, const re_node_set *src)
1199
Idx is, id, sbase, delta;
1200
if (src == NULL || src->nelem == 0)
1202
if (dest->alloc < 2 * src->nelem + dest->nelem)
1204
Idx new_alloc = 2 * (src->nelem + dest->alloc);
1205
Idx *new_buffer = re_realloc (dest->elems, Idx, new_alloc);
1206
if (BE (new_buffer == NULL, 0))
1208
dest->elems = new_buffer;
1209
dest->alloc = new_alloc;
1212
if (BE (dest->nelem == 0, 0))
1214
dest->nelem = src->nelem;
1215
memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
1219
/* Copy into the top of DEST the items of SRC that are not
1220
found in DEST. Maybe we could binary search in DEST? */
1221
for (sbase = dest->nelem + 2 * src->nelem,
1222
is = src->nelem - 1, id = dest->nelem - 1;
1223
REG_VALID_INDEX (is) && REG_VALID_INDEX (id); )
1225
if (dest->elems[id] == src->elems[is])
1227
else if (dest->elems[id] < src->elems[is])
1228
dest->elems[--sbase] = src->elems[is--];
1229
else /* if (dest->elems[id] > src->elems[is]) */
1233
if (REG_VALID_INDEX (is))
1235
/* If DEST is exhausted, the remaining items of SRC must be unique. */
1237
memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (Idx));
1240
id = dest->nelem - 1;
1241
is = dest->nelem + 2 * src->nelem - 1;
1242
delta = is - sbase + 1;
1246
/* Now copy. When DELTA becomes zero, the remaining
1247
DEST elements are already in place. */
1248
dest->nelem += delta;
1251
if (dest->elems[is] > dest->elems[id])
1253
/* Copy from the top. */
1254
dest->elems[id + delta--] = dest->elems[is--];
1260
/* Slide from the bottom. */
1261
dest->elems[id + delta] = dest->elems[id];
1262
if (! REG_VALID_INDEX (--id))
1264
/* Copy remaining SRC elements. */
1265
memcpy (dest->elems, dest->elems + sbase,
1266
delta * sizeof (Idx));
1275
/* Insert the new element ELEM to the re_node_set* SET.
1276
SET should not already have ELEM.
1277
Return true if successful. */
1280
internal_function __attribute_warn_unused_result__
1281
re_node_set_insert (re_node_set *set, Idx elem)
1284
/* In case the set is empty. */
1285
if (set->alloc == 0)
1286
return BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1);
1288
if (BE (set->nelem, 0) == 0)
1290
/* We already guaranteed above that set->alloc != 0. */
1291
set->elems[0] = elem;
1296
/* Realloc if we need. */
1297
if (set->alloc == set->nelem)
1300
set->alloc = set->alloc * 2;
1301
new_elems = re_realloc (set->elems, Idx, set->alloc);
1302
if (BE (new_elems == NULL, 0))
1304
set->elems = new_elems;
1307
/* Move the elements which follows the new element. Test the
1308
first element separately to skip a check in the inner loop. */
1309
if (elem < set->elems[0])
1312
for (idx = set->nelem; idx > 0; idx--)
1313
set->elems[idx] = set->elems[idx - 1];
1317
for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
1318
set->elems[idx] = set->elems[idx - 1];
1321
/* Insert the new element. */
1322
set->elems[idx] = elem;
1327
/* Insert the new element ELEM to the re_node_set* SET.
1328
SET should not already have any element greater than or equal to ELEM.
1329
Return true if successful. */
1332
internal_function __attribute_warn_unused_result__
1333
re_node_set_insert_last (re_node_set *set, Idx elem)
1335
/* Realloc if we need. */
1336
if (set->alloc == set->nelem)
1339
set->alloc = (set->alloc + 1) * 2;
1340
new_elems = re_realloc (set->elems, Idx, set->alloc);
1341
if (BE (new_elems == NULL, 0))
1343
set->elems = new_elems;
1346
/* Insert the new element. */
1347
set->elems[set->nelem++] = elem;
1351
/* Compare two node sets SET1 and SET2.
1352
Return true if SET1 and SET2 are equivalent. */
1355
internal_function __attribute ((pure))
1356
re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
1359
if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
1361
for (i = set1->nelem ; REG_VALID_INDEX (--i) ; )
1362
if (set1->elems[i] != set2->elems[i])
1367
/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
1370
internal_function __attribute ((pure))
1371
re_node_set_contains (const re_node_set *set, Idx elem)
1373
__re_size_t idx, right, mid;
1374
if (! REG_VALID_NONZERO_INDEX (set->nelem))
1377
/* Binary search the element. */
1379
right = set->nelem - 1;
1382
mid = (idx + right) / 2;
1383
if (set->elems[mid] < elem)
1388
return set->elems[idx] == elem ? idx + 1 : 0;
1393
re_node_set_remove_at (re_node_set *set, Idx idx)
1395
if (idx < 0 || idx >= set->nelem)
1398
for (; idx < set->nelem; idx++)
1399
set->elems[idx] = set->elems[idx + 1];
1403
/* Add the token TOKEN to dfa->nodes, and return the index of the token.
1404
Or return REG_MISSING if an error occurred. */
1408
re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
1410
if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
1412
size_t new_nodes_alloc = dfa->nodes_alloc * 2;
1413
Idx *new_nexts, *new_indices;
1414
re_node_set *new_edests, *new_eclosures;
1415
re_token_t *new_nodes;
1416
size_t max_object_size =
1417
MAX (sizeof (re_token_t),
1418
MAX (sizeof (re_node_set),
1421
/* Avoid overflows. */
1422
if (BE (SIZE_MAX / 2 / max_object_size < dfa->nodes_alloc, 0))
1425
new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
1426
if (BE (new_nodes == NULL, 0))
1428
dfa->nodes = new_nodes;
1429
new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc);
1430
new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc);
1431
new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
1432
new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
1433
if (BE (new_nexts == NULL || new_indices == NULL
1434
|| new_edests == NULL || new_eclosures == NULL, 0))
1436
dfa->nexts = new_nexts;
1437
dfa->org_indices = new_indices;
1438
dfa->edests = new_edests;
1439
dfa->eclosures = new_eclosures;
1440
dfa->nodes_alloc = new_nodes_alloc;
1442
dfa->nodes[dfa->nodes_len] = token;
1443
dfa->nodes[dfa->nodes_len].constraint = 0;
1444
#ifdef RE_ENABLE_I18N
1446
int type = token.type;
1447
dfa->nodes[dfa->nodes_len].accept_mb =
1448
(type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
1451
dfa->nexts[dfa->nodes_len] = REG_MISSING;
1452
re_node_set_init_empty (dfa->edests + dfa->nodes_len);
1453
re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
1454
return dfa->nodes_len++;
1457
static inline re_hashval_t
1459
calc_state_hash (const re_node_set *nodes, unsigned int context)
1461
re_hashval_t hash = nodes->nelem + context;
1463
for (i = 0 ; i < nodes->nelem ; i++)
1464
hash += nodes->elems[i];
1468
/* Search for the state whose node_set is equivalent to NODES.
1469
Return the pointer to the state, if we found it in the DFA.
1470
Otherwise create the new one and return it. In case of an error
1471
return NULL and set the error code in ERR.
1472
Note: - We assume NULL as the invalid state, then it is possible that
1473
return value is NULL and ERR is REG_NOERROR.
1474
- We never return non-NULL value in case of any errors, it is for
1477
static re_dfastate_t *
1478
internal_function __attribute_warn_unused_result__
1479
re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
1480
const re_node_set *nodes)
1483
re_dfastate_t *new_state;
1484
struct re_state_table_entry *spot;
1487
/* Suppress bogus uninitialized-variable warnings. */
1490
if (BE (nodes->nelem == 0, 0))
1495
hash = calc_state_hash (nodes, 0);
1496
spot = dfa->state_table + (hash & dfa->state_hash_mask);
1498
for (i = 0 ; i < spot->num ; i++)
1500
re_dfastate_t *state = spot->array[i];
1501
if (hash != state->hash)
1503
if (re_node_set_compare (&state->nodes, nodes))
1507
/* There are no appropriate state in the dfa, create the new one. */
1508
new_state = create_ci_newstate (dfa, nodes, hash);
1509
if (BE (new_state == NULL, 0))
1515
/* Search for the state whose node_set is equivalent to NODES and
1516
whose context is equivalent to CONTEXT.
1517
Return the pointer to the state, if we found it in the DFA.
1518
Otherwise create the new one and return it. In case of an error
1519
return NULL and set the error code in ERR.
1520
Note: - We assume NULL as the invalid state, then it is possible that
1521
return value is NULL and ERR is REG_NOERROR.
1522
- We never return non-NULL value in case of any errors, it is for
1525
static re_dfastate_t *
1526
internal_function __attribute_warn_unused_result__
1527
re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
1528
const re_node_set *nodes, unsigned int context)
1531
re_dfastate_t *new_state;
1532
struct re_state_table_entry *spot;
1535
/* Suppress bogus uninitialized-variable warnings. */
1538
if (nodes->nelem == 0)
1543
hash = calc_state_hash (nodes, context);
1544
spot = dfa->state_table + (hash & dfa->state_hash_mask);
1546
for (i = 0 ; i < spot->num ; i++)
1548
re_dfastate_t *state = spot->array[i];
1549
if (state->hash == hash
1550
&& state->context == context
1551
&& re_node_set_compare (state->entrance_nodes, nodes))
1554
/* There are no appropriate state in `dfa', create the new one. */
1555
new_state = create_cd_newstate (dfa, nodes, context, hash);
1556
if (BE (new_state == NULL, 0))
1562
/* Finish initialization of the new state NEWSTATE, and using its hash value
1563
HASH put in the appropriate bucket of DFA's state table. Return value
1564
indicates the error code if failed. */
1566
static reg_errcode_t
1567
__attribute_warn_unused_result__
1568
register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
1571
struct re_state_table_entry *spot;
1575
newstate->hash = hash;
1576
err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
1577
if (BE (err != REG_NOERROR, 0))
1579
for (i = 0; i < newstate->nodes.nelem; i++)
1581
Idx elem = newstate->nodes.elems[i];
1582
if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
1583
if (BE (! re_node_set_insert_last (&newstate->non_eps_nodes, elem), 0))
1587
spot = dfa->state_table + (hash & dfa->state_hash_mask);
1588
if (BE (spot->alloc <= spot->num, 0))
1590
Idx new_alloc = 2 * spot->num + 2;
1591
re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
1593
if (BE (new_array == NULL, 0))
1595
spot->array = new_array;
1596
spot->alloc = new_alloc;
1598
spot->array[spot->num++] = newstate;
1603
free_state (re_dfastate_t *state)
1605
re_node_set_free (&state->non_eps_nodes);
1606
re_node_set_free (&state->inveclosure);
1607
if (state->entrance_nodes != &state->nodes)
1609
re_node_set_free (state->entrance_nodes);
1610
re_free (state->entrance_nodes);
1612
re_node_set_free (&state->nodes);
1613
re_free (state->word_trtable);
1614
re_free (state->trtable);
1618
/* Create the new state which is independ of contexts.
1619
Return the new state if succeeded, otherwise return NULL. */
1621
static re_dfastate_t *
1622
internal_function __attribute_warn_unused_result__
1623
create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1628
re_dfastate_t *newstate;
1630
newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1631
if (BE (newstate == NULL, 0))
1633
err = re_node_set_init_copy (&newstate->nodes, nodes);
1634
if (BE (err != REG_NOERROR, 0))
1640
newstate->entrance_nodes = &newstate->nodes;
1641
for (i = 0 ; i < nodes->nelem ; i++)
1643
re_token_t *node = dfa->nodes + nodes->elems[i];
1644
re_token_type_t type = node->type;
1645
if (type == CHARACTER && !node->constraint)
1647
#ifdef RE_ENABLE_I18N
1648
newstate->accept_mb |= node->accept_mb;
1649
#endif /* RE_ENABLE_I18N */
1651
/* If the state has the halt node, the state is a halt state. */
1652
if (type == END_OF_RE)
1654
else if (type == OP_BACK_REF)
1655
newstate->has_backref = 1;
1656
else if (type == ANCHOR || node->constraint)
1657
newstate->has_constraint = 1;
1659
err = register_state (dfa, newstate, hash);
1660
if (BE (err != REG_NOERROR, 0))
1662
free_state (newstate);
1668
/* Create the new state which is depend on the context CONTEXT.
1669
Return the new state if succeeded, otherwise return NULL. */
1671
static re_dfastate_t *
1672
internal_function __attribute_warn_unused_result__
1673
create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1674
unsigned int context, re_hashval_t hash)
1676
Idx i, nctx_nodes = 0;
1678
re_dfastate_t *newstate;
1680
newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1681
if (BE (newstate == NULL, 0))
1683
err = re_node_set_init_copy (&newstate->nodes, nodes);
1684
if (BE (err != REG_NOERROR, 0))
1690
newstate->context = context;
1691
newstate->entrance_nodes = &newstate->nodes;
1693
for (i = 0 ; i < nodes->nelem ; i++)
1695
re_token_t *node = dfa->nodes + nodes->elems[i];
1696
re_token_type_t type = node->type;
1697
unsigned int constraint = node->constraint;
1699
if (type == CHARACTER && !constraint)
1701
#ifdef RE_ENABLE_I18N
1702
newstate->accept_mb |= node->accept_mb;
1703
#endif /* RE_ENABLE_I18N */
1705
/* If the state has the halt node, the state is a halt state. */
1706
if (type == END_OF_RE)
1708
else if (type == OP_BACK_REF)
1709
newstate->has_backref = 1;
1713
if (newstate->entrance_nodes == &newstate->nodes)
1715
newstate->entrance_nodes = re_malloc (re_node_set, 1);
1716
if (BE (newstate->entrance_nodes == NULL, 0))
1718
free_state (newstate);
1721
if (re_node_set_init_copy (newstate->entrance_nodes, nodes)
1725
newstate->has_constraint = 1;
1728
if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1730
re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
1735
err = register_state (dfa, newstate, hash);
1736
if (BE (err != REG_NOERROR, 0))
1738
free_state (newstate);