1
/* Extended regular expression matching and search library.
2
Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
3
Free Software Foundation, Inc.
4
This file is part of the GNU C Library.
5
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
7
This program is free software; you can redistribute it and/or modify
8
it under the terms of the GNU General Public License as published by
9
the Free Software Foundation; either version 2, or (at your option)
12
This program is distributed in the hope that it will be useful,
13
but WITHOUT ANY WARRANTY; without even the implied warranty of
14
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
GNU General Public License for more details.
17
You should have received a copy of the GNU General Public License along
18
with this program; if not, write to the Free Software Foundation,
19
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
21
static void re_string_construct_common (const char *str, Idx len,
23
RE_TRANSLATE_TYPE trans, bool icase,
24
const re_dfa_t *dfa) internal_function;
25
static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
26
const re_node_set *nodes,
27
re_hashval_t hash) internal_function;
28
static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
29
const re_node_set *nodes,
31
re_hashval_t hash) internal_function;
33
/* Functions for string operation. */
35
/* This function allocate the buffers. It is necessary to call
36
re_string_reconstruct before using the object. */
40
re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len,
41
RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
46
/* Ensure at least one character fits into the buffers. */
47
if (init_len < dfa->mb_cur_max)
48
init_len = dfa->mb_cur_max;
49
init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
50
re_string_construct_common (str, len, pstr, trans, icase, dfa);
52
ret = re_string_realloc_buffers (pstr, init_buf_len);
53
if (BE (ret != REG_NOERROR, 0))
56
pstr->word_char = dfa->word_char;
57
pstr->word_ops_used = dfa->word_ops_used;
58
pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
59
pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
60
pstr->valid_raw_len = pstr->valid_len;
64
/* This function allocate the buffers, and initialize them. */
68
re_string_construct (re_string_t *pstr, const char *str, Idx len,
69
RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
72
memset (pstr, '\0', sizeof (re_string_t));
73
re_string_construct_common (str, len, pstr, trans, icase, dfa);
77
ret = re_string_realloc_buffers (pstr, len + 1);
78
if (BE (ret != REG_NOERROR, 0))
81
pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
86
if (dfa->mb_cur_max > 1)
90
ret = build_wcs_upper_buffer (pstr);
91
if (BE (ret != REG_NOERROR, 0))
93
if (pstr->valid_raw_len >= len)
95
if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
97
ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
98
if (BE (ret != REG_NOERROR, 0))
103
#endif /* RE_ENABLE_I18N */
104
build_upper_buffer (pstr);
108
#ifdef RE_ENABLE_I18N
109
if (dfa->mb_cur_max > 1)
110
build_wcs_buffer (pstr);
112
#endif /* RE_ENABLE_I18N */
115
re_string_translate_buffer (pstr);
118
pstr->valid_len = pstr->bufs_len;
119
pstr->valid_raw_len = pstr->bufs_len;
127
/* Helper functions for re_string_allocate, and re_string_construct. */
131
re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
133
#ifdef RE_ENABLE_I18N
134
if (pstr->mb_cur_max > 1)
138
/* Avoid overflow. */
139
size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx));
140
if (BE (SIZE_MAX / max_object_size < new_buf_len, 0))
143
new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
144
if (BE (new_wcs == NULL, 0))
147
if (pstr->offsets != NULL)
149
Idx *new_offsets = re_realloc (pstr->offsets, Idx, new_buf_len);
150
if (BE (new_offsets == NULL, 0))
152
pstr->offsets = new_offsets;
155
#endif /* RE_ENABLE_I18N */
156
if (pstr->mbs_allocated)
158
unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
160
if (BE (new_mbs == NULL, 0))
164
pstr->bufs_len = new_buf_len;
171
re_string_construct_common (const char *str, Idx len, re_string_t *pstr,
172
RE_TRANSLATE_TYPE trans, bool icase,
175
pstr->raw_mbs = (const unsigned char *) str;
180
pstr->mbs_allocated = (trans != NULL || icase);
181
pstr->mb_cur_max = dfa->mb_cur_max;
182
pstr->is_utf8 = dfa->is_utf8;
183
pstr->map_notascii = dfa->map_notascii;
184
pstr->stop = pstr->len;
185
pstr->raw_stop = pstr->stop;
188
#ifdef RE_ENABLE_I18N
190
/* Build wide character buffer PSTR->WCS.
191
If the byte sequence of the string are:
192
<mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
193
Then wide character buffer will be:
194
<wc1> , WEOF , <wc2> , WEOF , <wc3>
195
We use WEOF for padding, they indicate that the position isn't
196
a first byte of a multibyte character.
198
Note that this function assumes PSTR->VALID_LEN elements are already
199
built and starts from PSTR->VALID_LEN. */
203
build_wcs_buffer (re_string_t *pstr)
206
unsigned char buf[MB_LEN_MAX];
207
assert (MB_LEN_MAX >= pstr->mb_cur_max);
209
unsigned char buf[64];
212
Idx byte_idx, end_idx, remain_len;
215
/* Build the buffers from pstr->valid_len to either pstr->len or
217
end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
218
for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
223
remain_len = end_idx - byte_idx;
224
prev_st = pstr->cur_state;
225
/* Apply the translation if we need. */
226
if (BE (pstr->trans != NULL, 0))
230
for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
232
ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
233
buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
235
p = (const char *) buf;
238
p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
239
mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
240
if (BE (mbclen == (size_t) -2, 0))
242
/* The buffer doesn't have enough space, finish to build. */
243
pstr->cur_state = prev_st;
246
else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
248
/* We treat these cases as a singlebyte character. */
250
wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
251
if (BE (pstr->trans != NULL, 0))
252
wc = pstr->trans[wc];
253
pstr->cur_state = prev_st;
256
/* Write wide character and padding. */
257
pstr->wcs[byte_idx++] = wc;
258
/* Write paddings. */
259
for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
260
pstr->wcs[byte_idx++] = WEOF;
262
pstr->valid_len = byte_idx;
263
pstr->valid_raw_len = byte_idx;
266
/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
267
but for REG_ICASE. */
271
build_wcs_upper_buffer (re_string_t *pstr)
274
Idx src_idx, byte_idx, end_idx, remain_len;
277
char buf[MB_LEN_MAX];
278
assert (MB_LEN_MAX >= pstr->mb_cur_max);
283
byte_idx = pstr->valid_len;
284
end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
286
/* The following optimization assumes that ASCII characters can be
287
mapped to wide characters with a simple cast. */
288
if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
290
while (byte_idx < end_idx)
294
if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
295
&& mbsinit (&pstr->cur_state))
297
/* In case of a singlebyte character. */
299
= toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
300
/* The next step uses the assumption that wchar_t is encoded
301
ASCII-safe: all ASCII values can be converted like this. */
302
pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
307
remain_len = end_idx - byte_idx;
308
prev_st = pstr->cur_state;
309
mbclen = __mbrtowc (&wc,
310
((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
311
+ byte_idx), remain_len, &pstr->cur_state);
312
if (BE (mbclen < (size_t) -2, 1))
320
mbcdlen = wcrtomb (buf, wcu, &prev_st);
321
if (BE (mbclen == mbcdlen, 1))
322
memcpy (pstr->mbs + byte_idx, buf, mbclen);
330
memcpy (pstr->mbs + byte_idx,
331
pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
332
pstr->wcs[byte_idx++] = wcu;
333
/* Write paddings. */
334
for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
335
pstr->wcs[byte_idx++] = WEOF;
337
else if (mbclen == (size_t) -1 || mbclen == 0)
339
/* It is an invalid character or '\0'. Just use the byte. */
340
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
341
pstr->mbs[byte_idx] = ch;
342
/* And also cast it to wide char. */
343
pstr->wcs[byte_idx++] = (wchar_t) ch;
344
if (BE (mbclen == (size_t) -1, 0))
345
pstr->cur_state = prev_st;
349
/* The buffer doesn't have enough space, finish to build. */
350
pstr->cur_state = prev_st;
354
pstr->valid_len = byte_idx;
355
pstr->valid_raw_len = byte_idx;
359
for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
364
remain_len = end_idx - byte_idx;
365
prev_st = pstr->cur_state;
366
if (BE (pstr->trans != NULL, 0))
370
for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
372
ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
373
buf[i] = pstr->trans[ch];
375
p = (const char *) buf;
378
p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
379
mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
380
if (BE (mbclen < (size_t) -2, 1))
388
mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
389
if (BE (mbclen == mbcdlen, 1))
390
memcpy (pstr->mbs + byte_idx, buf, mbclen);
391
else if (mbcdlen != (size_t) -1)
395
if (byte_idx + mbcdlen > pstr->bufs_len)
397
pstr->cur_state = prev_st;
401
if (pstr->offsets == NULL)
403
pstr->offsets = re_malloc (Idx, pstr->bufs_len);
405
if (pstr->offsets == NULL)
408
if (!pstr->offsets_needed)
410
for (i = 0; i < (size_t) byte_idx; ++i)
411
pstr->offsets[i] = i;
412
pstr->offsets_needed = 1;
415
memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
416
pstr->wcs[byte_idx] = wcu;
417
pstr->offsets[byte_idx] = src_idx;
418
for (i = 1; i < mbcdlen; ++i)
420
pstr->offsets[byte_idx + i]
421
= src_idx + (i < mbclen ? i : mbclen - 1);
422
pstr->wcs[byte_idx + i] = WEOF;
424
pstr->len += mbcdlen - mbclen;
425
if (pstr->raw_stop > src_idx)
426
pstr->stop += mbcdlen - mbclen;
427
end_idx = (pstr->bufs_len > pstr->len)
428
? pstr->len : pstr->bufs_len;
434
memcpy (pstr->mbs + byte_idx, p, mbclen);
437
memcpy (pstr->mbs + byte_idx, p, mbclen);
439
if (BE (pstr->offsets_needed != 0, 0))
442
for (i = 0; i < mbclen; ++i)
443
pstr->offsets[byte_idx + i] = src_idx + i;
447
pstr->wcs[byte_idx++] = wcu;
448
/* Write paddings. */
449
for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
450
pstr->wcs[byte_idx++] = WEOF;
452
else if (mbclen == (size_t) -1 || mbclen == 0)
454
/* It is an invalid character or '\0'. Just use the byte. */
455
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
457
if (BE (pstr->trans != NULL, 0))
458
ch = pstr->trans [ch];
459
pstr->mbs[byte_idx] = ch;
461
if (BE (pstr->offsets_needed != 0, 0))
462
pstr->offsets[byte_idx] = src_idx;
465
/* And also cast it to wide char. */
466
pstr->wcs[byte_idx++] = (wchar_t) ch;
467
if (BE (mbclen == (size_t) -1, 0))
468
pstr->cur_state = prev_st;
472
/* The buffer doesn't have enough space, finish to build. */
473
pstr->cur_state = prev_st;
477
pstr->valid_len = byte_idx;
478
pstr->valid_raw_len = src_idx;
482
/* Skip characters until the index becomes greater than NEW_RAW_IDX.
487
re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc)
494
/* Skip the characters which are not necessary to check. */
495
for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
496
rawbuf_idx < new_raw_idx;)
500
remain_len = pstr->len - rawbuf_idx;
501
prev_st = pstr->cur_state;
502
mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
503
remain_len, &pstr->cur_state);
504
if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
506
/* We treat these cases as a single byte character. */
507
if (mbclen == 0 || remain_len == 0)
510
wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
512
pstr->cur_state = prev_st;
516
/* Then proceed the next character. */
517
rawbuf_idx += mbclen;
522
#endif /* RE_ENABLE_I18N */
524
/* Build the buffer PSTR->MBS, and apply the translation if we need.
525
This function is used in case of REG_ICASE. */
529
build_upper_buffer (re_string_t *pstr)
531
Idx char_idx, end_idx;
532
end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
534
for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
536
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
537
if (BE (pstr->trans != NULL, 0))
538
ch = pstr->trans[ch];
540
pstr->mbs[char_idx] = toupper (ch);
542
pstr->mbs[char_idx] = ch;
544
pstr->valid_len = char_idx;
545
pstr->valid_raw_len = char_idx;
548
/* Apply TRANS to the buffer in PSTR. */
552
re_string_translate_buffer (re_string_t *pstr)
554
Idx buf_idx, end_idx;
555
end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
557
for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
559
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
560
pstr->mbs[buf_idx] = pstr->trans[ch];
563
pstr->valid_len = buf_idx;
564
pstr->valid_raw_len = buf_idx;
567
/* This function re-construct the buffers.
568
Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
569
convert to upper case in case of REG_ICASE, apply translation. */
573
re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
577
if (BE (pstr->raw_mbs_idx <= idx, 0))
578
offset = idx - pstr->raw_mbs_idx;
582
#ifdef RE_ENABLE_I18N
583
if (pstr->mb_cur_max > 1)
584
memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
585
#endif /* RE_ENABLE_I18N */
586
pstr->len = pstr->raw_len;
587
pstr->stop = pstr->raw_stop;
589
pstr->raw_mbs_idx = 0;
590
pstr->valid_raw_len = 0;
591
pstr->offsets_needed = 0;
592
pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
593
: CONTEXT_NEWLINE | CONTEXT_BEGBUF);
594
if (!pstr->mbs_allocated)
595
pstr->mbs = (unsigned char *) pstr->raw_mbs;
599
if (BE (offset != 0, 1))
601
/* Should the already checked characters be kept? */
602
if (BE (offset < pstr->valid_raw_len, 1))
604
/* Yes, move them to the front of the buffer. */
605
#ifdef RE_ENABLE_I18N
606
if (BE (pstr->offsets_needed, 0))
608
Idx low = 0, high = pstr->valid_len, mid;
611
mid = (high + low) / 2;
612
if (pstr->offsets[mid] > offset)
614
else if (pstr->offsets[mid] < offset)
620
if (pstr->offsets[mid] < offset)
622
pstr->tip_context = re_string_context_at (pstr, mid - 1,
624
/* This can be quite complicated, so handle specially
625
only the common and easy case where the character with
626
different length representation of lower and upper
627
case is present at or after offset. */
628
if (pstr->valid_len > offset
629
&& mid == offset && pstr->offsets[mid] == offset)
631
memmove (pstr->wcs, pstr->wcs + offset,
632
(pstr->valid_len - offset) * sizeof (wint_t));
633
memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
634
pstr->valid_len -= offset;
635
pstr->valid_raw_len -= offset;
636
for (low = 0; low < pstr->valid_len; low++)
637
pstr->offsets[low] = pstr->offsets[low + offset] - offset;
641
/* Otherwise, just find out how long the partial multibyte
642
character at offset is and fill it with WEOF/255. */
643
pstr->len = pstr->raw_len - idx + offset;
644
pstr->stop = pstr->raw_stop - idx + offset;
645
pstr->offsets_needed = 0;
646
while (mid > 0 && pstr->offsets[mid - 1] == offset)
648
while (mid < pstr->valid_len)
649
if (pstr->wcs[mid] != WEOF)
653
if (mid == pstr->valid_len)
657
pstr->valid_len = pstr->offsets[mid] - offset;
660
for (low = 0; low < pstr->valid_len; ++low)
661
pstr->wcs[low] = WEOF;
662
memset (pstr->mbs, 255, pstr->valid_len);
665
pstr->valid_raw_len = pstr->valid_len;
671
pstr->tip_context = re_string_context_at (pstr, offset - 1,
673
#ifdef RE_ENABLE_I18N
674
if (pstr->mb_cur_max > 1)
675
memmove (pstr->wcs, pstr->wcs + offset,
676
(pstr->valid_len - offset) * sizeof (wint_t));
677
#endif /* RE_ENABLE_I18N */
678
if (BE (pstr->mbs_allocated, 0))
679
memmove (pstr->mbs, pstr->mbs + offset,
680
pstr->valid_len - offset);
681
pstr->valid_len -= offset;
682
pstr->valid_raw_len -= offset;
684
assert (pstr->valid_len > 0);
690
#ifdef RE_ENABLE_I18N
691
/* No, skip all characters until IDX. */
692
Idx prev_valid_len = pstr->valid_len;
694
if (BE (pstr->offsets_needed, 0))
696
pstr->len = pstr->raw_len - idx + offset;
697
pstr->stop = pstr->raw_stop - idx + offset;
698
pstr->offsets_needed = 0;
702
#ifdef RE_ENABLE_I18N
703
if (pstr->mb_cur_max > 1)
710
const unsigned char *raw, *p, *end;
712
/* Special case UTF-8. Multi-byte chars start with any
713
byte other than 0x80 - 0xbf. */
714
raw = pstr->raw_mbs + pstr->raw_mbs_idx;
715
end = raw + (offset - pstr->mb_cur_max);
716
if (end < pstr->raw_mbs)
718
p = raw + offset - 1;
720
/* We know the wchar_t encoding is UCS4, so for the simple
721
case, ASCII characters, skip the conversion step. */
722
if (isascii (*p) && BE (pstr->trans == NULL, 1))
724
memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
725
/* pstr->valid_len = 0; */
730
for (; p >= end; --p)
731
if ((*p & 0xc0) != 0x80)
735
Idx mlen = raw + pstr->len - p;
736
unsigned char buf[6];
739
if (BE (pstr->trans != NULL, 0))
741
int i = mlen < 6 ? mlen : 6;
743
buf[i] = pstr->trans[p[i]];
745
/* XXX Don't use mbrtowc, we know which conversion
746
to use (UTF-8 -> UCS4). */
747
memset (&cur_state, 0, sizeof (cur_state));
748
mbclen = __mbrtowc (&wc2, (const char *) p, mlen,
750
if (raw + offset - p <= mbclen
751
&& mbclen < (size_t) -2)
753
memset (&pstr->cur_state, '\0',
755
pstr->valid_len = mbclen - (raw + offset - p);
763
pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
766
= re_string_context_at (pstr, prev_valid_len - 1, eflags);
768
pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
769
&& IS_WIDE_WORD_CHAR (wc))
771
: ((IS_WIDE_NEWLINE (wc)
772
&& pstr->newline_anchor)
773
? CONTEXT_NEWLINE : 0));
774
if (BE (pstr->valid_len, 0))
776
for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
777
pstr->wcs[wcs_idx] = WEOF;
778
if (pstr->mbs_allocated)
779
memset (pstr->mbs, 255, pstr->valid_len);
781
pstr->valid_raw_len = pstr->valid_len;
784
#endif /* RE_ENABLE_I18N */
786
int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
787
pstr->valid_raw_len = 0;
790
pstr->tip_context = (bitset_contain (pstr->word_char, c)
792
: ((IS_NEWLINE (c) && pstr->newline_anchor)
793
? CONTEXT_NEWLINE : 0));
796
if (!BE (pstr->mbs_allocated, 0))
799
pstr->raw_mbs_idx = idx;
801
pstr->stop -= offset;
803
/* Then build the buffers. */
804
#ifdef RE_ENABLE_I18N
805
if (pstr->mb_cur_max > 1)
809
reg_errcode_t ret = build_wcs_upper_buffer (pstr);
810
if (BE (ret != REG_NOERROR, 0))
814
build_wcs_buffer (pstr);
817
#endif /* RE_ENABLE_I18N */
818
if (BE (pstr->mbs_allocated, 0))
821
build_upper_buffer (pstr);
822
else if (pstr->trans != NULL)
823
re_string_translate_buffer (pstr);
826
pstr->valid_len = pstr->len;
833
internal_function __attribute ((pure))
834
re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
839
/* Handle the common (easiest) cases first. */
840
if (BE (!pstr->mbs_allocated, 1))
841
return re_string_peek_byte (pstr, idx);
843
#ifdef RE_ENABLE_I18N
844
if (pstr->mb_cur_max > 1
845
&& ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
846
return re_string_peek_byte (pstr, idx);
849
off = pstr->cur_idx + idx;
850
#ifdef RE_ENABLE_I18N
851
if (pstr->offsets_needed)
852
off = pstr->offsets[off];
855
ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
857
#ifdef RE_ENABLE_I18N
858
/* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
859
this function returns CAPITAL LETTER I instead of first byte of
860
DOTLESS SMALL LETTER I. The latter would confuse the parser,
861
since peek_byte_case doesn't advance cur_idx in any way. */
862
if (pstr->offsets_needed && !isascii (ch))
863
return re_string_peek_byte (pstr, idx);
870
internal_function __attribute ((pure))
871
re_string_fetch_byte_case (re_string_t *pstr)
873
if (BE (!pstr->mbs_allocated, 1))
874
return re_string_fetch_byte (pstr);
876
#ifdef RE_ENABLE_I18N
877
if (pstr->offsets_needed)
882
/* For tr_TR.UTF-8 [[:islower:]] there is
883
[[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
884
in that case the whole multi-byte character and return
885
the original letter. On the other side, with
886
[[: DOTLESS SMALL LETTER I return [[:I, as doing
887
anything else would complicate things too much. */
889
if (!re_string_first_byte (pstr, pstr->cur_idx))
890
return re_string_fetch_byte (pstr);
892
off = pstr->offsets[pstr->cur_idx];
893
ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
896
return re_string_fetch_byte (pstr);
898
re_string_skip_bytes (pstr,
899
re_string_char_size_at (pstr, pstr->cur_idx));
904
return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
909
re_string_destruct (re_string_t *pstr)
911
#ifdef RE_ENABLE_I18N
913
re_free (pstr->offsets);
914
#endif /* RE_ENABLE_I18N */
915
if (pstr->mbs_allocated)
919
/* Return the context at IDX in INPUT. */
923
re_string_context_at (const re_string_t *input, Idx idx, int eflags)
926
if (BE (! REG_VALID_INDEX (idx), 0))
927
/* In this case, we use the value stored in input->tip_context,
928
since we can't know the character in input->mbs[-1] here. */
929
return input->tip_context;
930
if (BE (idx == input->len, 0))
931
return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
932
: CONTEXT_NEWLINE | CONTEXT_ENDBUF);
933
#ifdef RE_ENABLE_I18N
934
if (input->mb_cur_max > 1)
938
while(input->wcs[wc_idx] == WEOF)
941
/* It must not happen. */
942
assert (REG_VALID_INDEX (wc_idx));
945
if (! REG_VALID_INDEX (wc_idx))
946
return input->tip_context;
948
wc = input->wcs[wc_idx];
949
if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
951
return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
952
? CONTEXT_NEWLINE : 0);
957
c = re_string_byte_at (input, idx);
958
if (bitset_contain (input->word_char, c))
960
return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
964
/* Functions for set operation. */
968
re_node_set_alloc (re_node_set *set, Idx size)
972
set->elems = re_malloc (Idx, size);
973
if (BE (set->elems == NULL, 0))
980
re_node_set_init_1 (re_node_set *set, Idx elem)
984
set->elems = re_malloc (Idx, 1);
985
if (BE (set->elems == NULL, 0))
987
set->alloc = set->nelem = 0;
990
set->elems[0] = elem;
996
re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2)
999
set->elems = re_malloc (Idx, 2);
1000
if (BE (set->elems == NULL, 0))
1005
set->elems[0] = elem1;
1012
set->elems[0] = elem1;
1013
set->elems[1] = elem2;
1017
set->elems[0] = elem2;
1018
set->elems[1] = elem1;
1024
static reg_errcode_t
1026
re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
1028
dest->nelem = src->nelem;
1031
dest->alloc = dest->nelem;
1032
dest->elems = re_malloc (Idx, dest->alloc);
1033
if (BE (dest->elems == NULL, 0))
1035
dest->alloc = dest->nelem = 0;
1038
memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
1041
re_node_set_init_empty (dest);
1045
/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
1046
DEST. Return value indicate the error code or REG_NOERROR if succeeded.
1047
Note: We assume dest->elems is NULL, when dest->alloc is 0. */
1049
static reg_errcode_t
1051
re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
1052
const re_node_set *src2)
1054
Idx i1, i2, is, id, delta, sbase;
1055
if (src1->nelem == 0 || src2->nelem == 0)
1058
/* We need dest->nelem + 2 * elems_in_intersection; this is a
1059
conservative estimate. */
1060
if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
1062
Idx new_alloc = src1->nelem + src2->nelem + dest->alloc;
1063
Idx *new_elems = re_realloc (dest->elems, Idx, new_alloc);
1064
if (BE (new_elems == NULL, 0))
1066
dest->elems = new_elems;
1067
dest->alloc = new_alloc;
1070
/* Find the items in the intersection of SRC1 and SRC2, and copy
1071
into the top of DEST those that are not already in DEST itself. */
1072
sbase = dest->nelem + src1->nelem + src2->nelem;
1073
i1 = src1->nelem - 1;
1074
i2 = src2->nelem - 1;
1075
id = dest->nelem - 1;
1078
if (src1->elems[i1] == src2->elems[i2])
1080
/* Try to find the item in DEST. Maybe we could binary search? */
1081
while (REG_VALID_INDEX (id) && dest->elems[id] > src1->elems[i1])
1084
if (! REG_VALID_INDEX (id) || dest->elems[id] != src1->elems[i1])
1085
dest->elems[--sbase] = src1->elems[i1];
1087
if (! REG_VALID_INDEX (--i1) || ! REG_VALID_INDEX (--i2))
1091
/* Lower the highest of the two items. */
1092
else if (src1->elems[i1] < src2->elems[i2])
1094
if (! REG_VALID_INDEX (--i2))
1099
if (! REG_VALID_INDEX (--i1))
1104
id = dest->nelem - 1;
1105
is = dest->nelem + src1->nelem + src2->nelem - 1;
1106
delta = is - sbase + 1;
1108
/* Now copy. When DELTA becomes zero, the remaining
1109
DEST elements are already in place; this is more or
1110
less the same loop that is in re_node_set_merge. */
1111
dest->nelem += delta;
1112
if (delta > 0 && REG_VALID_INDEX (id))
1115
if (dest->elems[is] > dest->elems[id])
1117
/* Copy from the top. */
1118
dest->elems[id + delta--] = dest->elems[is--];
1124
/* Slide from the bottom. */
1125
dest->elems[id + delta] = dest->elems[id];
1126
if (! REG_VALID_INDEX (--id))
1131
/* Copy remaining SRC elements. */
1132
memcpy (dest->elems, dest->elems + sbase, delta * sizeof (Idx));
1137
/* Calculate the union set of the sets SRC1 and SRC2. And store it to
1138
DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1140
static reg_errcode_t
1142
re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
1143
const re_node_set *src2)
1146
if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
1148
dest->alloc = src1->nelem + src2->nelem;
1149
dest->elems = re_malloc (Idx, dest->alloc);
1150
if (BE (dest->elems == NULL, 0))
1155
if (src1 != NULL && src1->nelem > 0)
1156
return re_node_set_init_copy (dest, src1);
1157
else if (src2 != NULL && src2->nelem > 0)
1158
return re_node_set_init_copy (dest, src2);
1160
re_node_set_init_empty (dest);
1163
for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
1165
if (src1->elems[i1] > src2->elems[i2])
1167
dest->elems[id++] = src2->elems[i2++];
1170
if (src1->elems[i1] == src2->elems[i2])
1172
dest->elems[id++] = src1->elems[i1++];
1174
if (i1 < src1->nelem)
1176
memcpy (dest->elems + id, src1->elems + i1,
1177
(src1->nelem - i1) * sizeof (Idx));
1178
id += src1->nelem - i1;
1180
else if (i2 < src2->nelem)
1182
memcpy (dest->elems + id, src2->elems + i2,
1183
(src2->nelem - i2) * sizeof (Idx));
1184
id += src2->nelem - i2;
1190
/* Calculate the union set of the sets DEST and SRC. And store it to
1191
DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
1193
static reg_errcode_t
1195
re_node_set_merge (re_node_set *dest, const re_node_set *src)
1197
Idx is, id, sbase, delta;
1198
if (src == NULL || src->nelem == 0)
1200
if (dest->alloc < 2 * src->nelem + dest->nelem)
1202
Idx new_alloc = 2 * (src->nelem + dest->alloc);
1203
Idx *new_buffer = re_realloc (dest->elems, Idx, new_alloc);
1204
if (BE (new_buffer == NULL, 0))
1206
dest->elems = new_buffer;
1207
dest->alloc = new_alloc;
1210
if (BE (dest->nelem == 0, 0))
1212
dest->nelem = src->nelem;
1213
memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
1217
/* Copy into the top of DEST the items of SRC that are not
1218
found in DEST. Maybe we could binary search in DEST? */
1219
for (sbase = dest->nelem + 2 * src->nelem,
1220
is = src->nelem - 1, id = dest->nelem - 1;
1221
REG_VALID_INDEX (is) && REG_VALID_INDEX (id); )
1223
if (dest->elems[id] == src->elems[is])
1225
else if (dest->elems[id] < src->elems[is])
1226
dest->elems[--sbase] = src->elems[is--];
1227
else /* if (dest->elems[id] > src->elems[is]) */
1231
if (REG_VALID_INDEX (is))
1233
/* If DEST is exhausted, the remaining items of SRC must be unique. */
1235
memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (Idx));
1238
id = dest->nelem - 1;
1239
is = dest->nelem + 2 * src->nelem - 1;
1240
delta = is - sbase + 1;
1244
/* Now copy. When DELTA becomes zero, the remaining
1245
DEST elements are already in place. */
1246
dest->nelem += delta;
1249
if (dest->elems[is] > dest->elems[id])
1251
/* Copy from the top. */
1252
dest->elems[id + delta--] = dest->elems[is--];
1258
/* Slide from the bottom. */
1259
dest->elems[id + delta] = dest->elems[id];
1260
if (! REG_VALID_INDEX (--id))
1262
/* Copy remaining SRC elements. */
1263
memcpy (dest->elems, dest->elems + sbase,
1264
delta * sizeof (Idx));
1273
/* Insert the new element ELEM to the re_node_set* SET.
1274
SET should not already have ELEM.
1275
Return true if successful. */
1279
re_node_set_insert (re_node_set *set, Idx elem)
1282
/* In case the set is empty. */
1283
if (set->alloc == 0)
1284
return BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1);
1286
if (BE (set->nelem, 0) == 0)
1288
/* We already guaranteed above that set->alloc != 0. */
1289
set->elems[0] = elem;
1294
/* Realloc if we need. */
1295
if (set->alloc == set->nelem)
1298
set->alloc = set->alloc * 2;
1299
new_elems = re_realloc (set->elems, Idx, set->alloc);
1300
if (BE (new_elems == NULL, 0))
1302
set->elems = new_elems;
1305
/* Move the elements which follows the new element. Test the
1306
first element separately to skip a check in the inner loop. */
1307
if (elem < set->elems[0])
1310
for (idx = set->nelem; idx > 0; idx--)
1311
set->elems[idx] = set->elems[idx - 1];
1315
for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
1316
set->elems[idx] = set->elems[idx - 1];
1319
/* Insert the new element. */
1320
set->elems[idx] = elem;
1325
/* Insert the new element ELEM to the re_node_set* SET.
1326
SET should not already have any element greater than or equal to ELEM.
1327
Return true if successful. */
1331
re_node_set_insert_last (re_node_set *set, Idx elem)
1333
/* Realloc if we need. */
1334
if (set->alloc == set->nelem)
1337
set->alloc = (set->alloc + 1) * 2;
1338
new_elems = re_realloc (set->elems, Idx, set->alloc);
1339
if (BE (new_elems == NULL, 0))
1341
set->elems = new_elems;
1344
/* Insert the new element. */
1345
set->elems[set->nelem++] = elem;
1349
/* Compare two node sets SET1 and SET2.
1350
Return true if SET1 and SET2 are equivalent. */
1353
internal_function __attribute ((pure))
1354
re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
1357
if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
1359
for (i = set1->nelem ; REG_VALID_INDEX (--i) ; )
1360
if (set1->elems[i] != set2->elems[i])
1365
/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
1368
internal_function __attribute ((pure))
1369
re_node_set_contains (const re_node_set *set, Idx elem)
1371
__re_size_t idx, right, mid;
1372
if (! REG_VALID_NONZERO_INDEX (set->nelem))
1375
/* Binary search the element. */
1377
right = set->nelem - 1;
1380
mid = (idx + right) / 2;
1381
if (set->elems[mid] < elem)
1386
return set->elems[idx] == elem ? idx + 1 : 0;
1391
re_node_set_remove_at (re_node_set *set, Idx idx)
1393
if (idx < 0 || idx >= set->nelem)
1396
for (; idx < set->nelem; idx++)
1397
set->elems[idx] = set->elems[idx + 1];
1401
/* Add the token TOKEN to dfa->nodes, and return the index of the token.
1402
Or return REG_MISSING if an error occurred. */
1406
re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
1408
if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
1410
size_t new_nodes_alloc = dfa->nodes_alloc * 2;
1411
Idx *new_nexts, *new_indices;
1412
re_node_set *new_edests, *new_eclosures;
1413
re_token_t *new_nodes;
1414
size_t max_object_size =
1415
MAX (sizeof (re_token_t),
1416
MAX (sizeof (re_node_set),
1419
/* Avoid overflows. */
1420
if (BE (SIZE_MAX / 2 / max_object_size < dfa->nodes_alloc, 0))
1423
new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
1424
if (BE (new_nodes == NULL, 0))
1426
dfa->nodes = new_nodes;
1427
new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc);
1428
new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc);
1429
new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
1430
new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
1431
if (BE (new_nexts == NULL || new_indices == NULL
1432
|| new_edests == NULL || new_eclosures == NULL, 0))
1434
dfa->nexts = new_nexts;
1435
dfa->org_indices = new_indices;
1436
dfa->edests = new_edests;
1437
dfa->eclosures = new_eclosures;
1438
dfa->nodes_alloc = new_nodes_alloc;
1440
dfa->nodes[dfa->nodes_len] = token;
1441
dfa->nodes[dfa->nodes_len].constraint = 0;
1442
#ifdef RE_ENABLE_I18N
1444
int type = token.type;
1445
dfa->nodes[dfa->nodes_len].accept_mb =
1446
(type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
1449
dfa->nexts[dfa->nodes_len] = REG_MISSING;
1450
re_node_set_init_empty (dfa->edests + dfa->nodes_len);
1451
re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
1452
return dfa->nodes_len++;
1455
static inline re_hashval_t
1457
calc_state_hash (const re_node_set *nodes, unsigned int context)
1459
re_hashval_t hash = nodes->nelem + context;
1461
for (i = 0 ; i < nodes->nelem ; i++)
1462
hash += nodes->elems[i];
1466
/* Search for the state whose node_set is equivalent to NODES.
1467
Return the pointer to the state, if we found it in the DFA.
1468
Otherwise create the new one and return it. In case of an error
1469
return NULL and set the error code in ERR.
1470
Note: - We assume NULL as the invalid state, then it is possible that
1471
return value is NULL and ERR is REG_NOERROR.
1472
- We never return non-NULL value in case of any errors, it is for
1475
static re_dfastate_t *
1477
re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
1478
const re_node_set *nodes)
1481
re_dfastate_t *new_state;
1482
struct re_state_table_entry *spot;
1485
/* Suppress bogus uninitialized-variable warnings. */
1488
if (BE (nodes->nelem == 0, 0))
1493
hash = calc_state_hash (nodes, 0);
1494
spot = dfa->state_table + (hash & dfa->state_hash_mask);
1496
for (i = 0 ; i < spot->num ; i++)
1498
re_dfastate_t *state = spot->array[i];
1499
if (hash != state->hash)
1501
if (re_node_set_compare (&state->nodes, nodes))
1505
/* There are no appropriate state in the dfa, create the new one. */
1506
new_state = create_ci_newstate (dfa, nodes, hash);
1507
if (BE (new_state == NULL, 0))
1513
/* Search for the state whose node_set is equivalent to NODES and
1514
whose context is equivalent to CONTEXT.
1515
Return the pointer to the state, if we found it in the DFA.
1516
Otherwise create the new one and return it. In case of an error
1517
return NULL and set the error code in ERR.
1518
Note: - We assume NULL as the invalid state, then it is possible that
1519
return value is NULL and ERR is REG_NOERROR.
1520
- We never return non-NULL value in case of any errors, it is for
1523
static re_dfastate_t *
1525
re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
1526
const re_node_set *nodes, unsigned int context)
1529
re_dfastate_t *new_state;
1530
struct re_state_table_entry *spot;
1533
/* Suppress bogus uninitialized-variable warnings. */
1536
if (nodes->nelem == 0)
1541
hash = calc_state_hash (nodes, context);
1542
spot = dfa->state_table + (hash & dfa->state_hash_mask);
1544
for (i = 0 ; i < spot->num ; i++)
1546
re_dfastate_t *state = spot->array[i];
1547
if (state->hash == hash
1548
&& state->context == context
1549
&& re_node_set_compare (state->entrance_nodes, nodes))
1552
/* There are no appropriate state in `dfa', create the new one. */
1553
new_state = create_cd_newstate (dfa, nodes, context, hash);
1554
if (BE (new_state == NULL, 0))
1560
/* Finish initialization of the new state NEWSTATE, and using its hash value
1561
HASH put in the appropriate bucket of DFA's state table. Return value
1562
indicates the error code if failed. */
1564
static reg_errcode_t
1565
register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
1568
struct re_state_table_entry *spot;
1572
newstate->hash = hash;
1573
err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
1574
if (BE (err != REG_NOERROR, 0))
1576
for (i = 0; i < newstate->nodes.nelem; i++)
1578
Idx elem = newstate->nodes.elems[i];
1579
if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
1580
if (BE (! re_node_set_insert_last (&newstate->non_eps_nodes, elem), 0))
1584
spot = dfa->state_table + (hash & dfa->state_hash_mask);
1585
if (BE (spot->alloc <= spot->num, 0))
1587
Idx new_alloc = 2 * spot->num + 2;
1588
re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
1590
if (BE (new_array == NULL, 0))
1592
spot->array = new_array;
1593
spot->alloc = new_alloc;
1595
spot->array[spot->num++] = newstate;
1600
free_state (re_dfastate_t *state)
1602
re_node_set_free (&state->non_eps_nodes);
1603
re_node_set_free (&state->inveclosure);
1604
if (state->entrance_nodes != &state->nodes)
1606
re_node_set_free (state->entrance_nodes);
1607
re_free (state->entrance_nodes);
1609
re_node_set_free (&state->nodes);
1610
re_free (state->word_trtable);
1611
re_free (state->trtable);
1615
/* Create the new state which is independ of contexts.
1616
Return the new state if succeeded, otherwise return NULL. */
1618
static re_dfastate_t *
1620
create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1625
re_dfastate_t *newstate;
1627
newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1628
if (BE (newstate == NULL, 0))
1630
err = re_node_set_init_copy (&newstate->nodes, nodes);
1631
if (BE (err != REG_NOERROR, 0))
1637
newstate->entrance_nodes = &newstate->nodes;
1638
for (i = 0 ; i < nodes->nelem ; i++)
1640
re_token_t *node = dfa->nodes + nodes->elems[i];
1641
re_token_type_t type = node->type;
1642
if (type == CHARACTER && !node->constraint)
1644
#ifdef RE_ENABLE_I18N
1645
newstate->accept_mb |= node->accept_mb;
1646
#endif /* RE_ENABLE_I18N */
1648
/* If the state has the halt node, the state is a halt state. */
1649
if (type == END_OF_RE)
1651
else if (type == OP_BACK_REF)
1652
newstate->has_backref = 1;
1653
else if (type == ANCHOR || node->constraint)
1654
newstate->has_constraint = 1;
1656
err = register_state (dfa, newstate, hash);
1657
if (BE (err != REG_NOERROR, 0))
1659
free_state (newstate);
1665
/* Create the new state which is depend on the context CONTEXT.
1666
Return the new state if succeeded, otherwise return NULL. */
1668
static re_dfastate_t *
1670
create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1671
unsigned int context, re_hashval_t hash)
1673
Idx i, nctx_nodes = 0;
1675
re_dfastate_t *newstate;
1677
newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1678
if (BE (newstate == NULL, 0))
1680
err = re_node_set_init_copy (&newstate->nodes, nodes);
1681
if (BE (err != REG_NOERROR, 0))
1687
newstate->context = context;
1688
newstate->entrance_nodes = &newstate->nodes;
1690
for (i = 0 ; i < nodes->nelem ; i++)
1692
re_token_t *node = dfa->nodes + nodes->elems[i];
1693
re_token_type_t type = node->type;
1694
unsigned int constraint = node->constraint;
1696
if (type == CHARACTER && !constraint)
1698
#ifdef RE_ENABLE_I18N
1699
newstate->accept_mb |= node->accept_mb;
1700
#endif /* RE_ENABLE_I18N */
1702
/* If the state has the halt node, the state is a halt state. */
1703
if (type == END_OF_RE)
1705
else if (type == OP_BACK_REF)
1706
newstate->has_backref = 1;
1710
if (newstate->entrance_nodes == &newstate->nodes)
1712
newstate->entrance_nodes = re_malloc (re_node_set, 1);
1713
if (BE (newstate->entrance_nodes == NULL, 0))
1715
free_state (newstate);
1718
re_node_set_init_copy (newstate->entrance_nodes, nodes);
1720
newstate->has_constraint = 1;
1723
if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1725
re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
1730
err = register_state (dfa, newstate, hash);
1731
if (BE (err != REG_NOERROR, 0))
1733
free_state (newstate);