~ubuntu-branches/ubuntu/natty/diffutils/natty

« back to all changes in this revision

Viewing changes to lib/regexec.c

Committer: Bazaar Package Importer
Author(s): Santiago Vila
Date: 2010-05-04 20:38:00 UTC
mfrom: (2.1.7 sid)
Revision ID: james.westby@ubuntu.com-20100504203800-f67xd9rsa9xl9qqj

Tags: 1:3.0-1

New upstream release.

files added:
.tarball-version

ChangeLog-2008

GNUmakefile

TODO

bootstrap

build-aux

build-aux/announce-gen

build-aux/arg-nonnull.h

build-aux/c++defs.h

build-aux/compile

build-aux/config.guess

build-aux/config.rpath

build-aux/config.sub

build-aux/depcomp

build-aux/do-release-commit-and-tag

build-aux/gendocs.sh

build-aux/git-version-gen

build-aux/gitlog-to-changelog

build-aux/gnu-web-doc-update

build-aux/gnupload

build-aux/install-sh

build-aux/mdate-sh

build-aux/missing

build-aux/mkinstalldirs

build-aux/texinfo.tex

build-aux/update-copyright

build-aux/useless-if-before-free

build-aux/vc-list-files

build-aux/warn-on-use.h

cfg.mk

debian/source

debian/source/format

dist-check.mk

gnulib-tests

gnulib-tests/Makefile.am

gnulib-tests/Makefile.in

gnulib-tests/argmatch.c

gnulib-tests/argmatch.h

gnulib-tests/binary-io.h

gnulib-tests/gnulib.mk

gnulib-tests/hash-pjw.c

gnulib-tests/hash-pjw.h

gnulib-tests/init.sh

gnulib-tests/locale

gnulib-tests/locale/fr

gnulib-tests/locale/fr/LC_MESSAGES

gnulib-tests/locale/fr/LC_MESSAGES/test-quotearg.mo

gnulib-tests/locale/fr/LC_MESSAGES/test-quotearg.po

gnulib-tests/macros.h

gnulib-tests/pathmax.h

gnulib-tests/putenv.c

gnulib-tests/same-inode.h

gnulib-tests/setenv.c

gnulib-tests/signature.h

gnulib-tests/sleep.c

gnulib-tests/symlink.c

gnulib-tests/test-alloca-opt.c

gnulib-tests/test-argmatch.c

gnulib-tests/test-binary-io.c

gnulib-tests/test-binary-io.sh

gnulib-tests/test-bitrotate.c

gnulib-tests/test-btowc.c

gnulib-tests/test-btowc1.sh

gnulib-tests/test-btowc2.sh

gnulib-tests/test-c-ctype.c

gnulib-tests/test-c-stack.c

gnulib-tests/test-c-stack.sh

gnulib-tests/test-c-stack2.sh

gnulib-tests/test-c-strcase.sh

gnulib-tests/test-c-strcasecmp.c

gnulib-tests/test-c-strncasecmp.c

gnulib-tests/test-dirname.c

gnulib-tests/test-dup2.c

gnulib-tests/test-environ.c

gnulib-tests/test-errno.c

gnulib-tests/test-exclude.c

gnulib-tests/test-exclude1.sh

gnulib-tests/test-exclude2.sh

gnulib-tests/test-exclude3.sh

gnulib-tests/test-exclude4.sh

gnulib-tests/test-exclude5.sh

gnulib-tests/test-exclude6.sh

gnulib-tests/test-exclude7.sh

gnulib-tests/test-fcntl-h.c

gnulib-tests/test-fcntl.c

gnulib-tests/test-fnmatch.c

gnulib-tests/test-freopen-safer.c

gnulib-tests/test-freopen.c

gnulib-tests/test-getdtablesize.c

gnulib-tests/test-getopt.c

gnulib-tests/test-getopt.h

gnulib-tests/test-getopt_long.h

gnulib-tests/test-gettimeofday.c

gnulib-tests/test-hash.c

gnulib-tests/test-iconv-h.c

gnulib-tests/test-iconv.c

gnulib-tests/test-inttypes.c

gnulib-tests/test-langinfo.c

gnulib-tests/test-lstat.c

gnulib-tests/test-lstat.h

gnulib-tests/test-malloca.c

gnulib-tests/test-mbrtowc.c

gnulib-tests/test-mbrtowc1.sh

gnulib-tests/test-mbrtowc2.sh

gnulib-tests/test-mbrtowc3.sh

gnulib-tests/test-mbrtowc4.sh

gnulib-tests/test-mbscasecmp.c

gnulib-tests/test-mbscasecmp.sh

gnulib-tests/test-mbsinit.c

gnulib-tests/test-mbsinit.sh

gnulib-tests/test-mbsrtowcs.c

gnulib-tests/test-mbsrtowcs1.sh

gnulib-tests/test-mbsrtowcs2.sh

gnulib-tests/test-mbsrtowcs3.sh

gnulib-tests/test-mbsrtowcs4.sh

gnulib-tests/test-mbsstr1.c

gnulib-tests/test-mbsstr2.c

gnulib-tests/test-mbsstr2.sh

gnulib-tests/test-mbsstr3.c

gnulib-tests/test-mbsstr3.sh

gnulib-tests/test-memchr.c

gnulib-tests/test-nl_langinfo.c

gnulib-tests/test-nl_langinfo.sh

gnulib-tests/test-open.c

gnulib-tests/test-open.h

gnulib-tests/test-quotearg.c

gnulib-tests/test-quotearg.sh

gnulib-tests/test-setenv.c

gnulib-tests/test-sigaction.c

gnulib-tests/test-signal.c

gnulib-tests/test-sleep.c

gnulib-tests/test-stat-time.c

gnulib-tests/test-stat.c

gnulib-tests/test-stat.h

gnulib-tests/test-stdbool.c

gnulib-tests/test-stddef.c

gnulib-tests/test-stdint.c

gnulib-tests/test-stdio.c

gnulib-tests/test-stdlib.c

gnulib-tests/test-strerror.c

gnulib-tests/test-striconv.c

gnulib-tests/test-string.c

gnulib-tests/test-strings.c

gnulib-tests/test-symlink.c

gnulib-tests/test-symlink.h

gnulib-tests/test-sys_stat.c

gnulib-tests/test-sys_time.c

gnulib-tests/test-sys_wait.c

gnulib-tests/test-time.c

gnulib-tests/test-unistd.c

gnulib-tests/test-unsetenv.c

gnulib-tests/test-update-copyright.sh

gnulib-tests/test-usleep.c

gnulib-tests/test-vc-list-files-cvs.sh

gnulib-tests/test-vc-list-files-git.sh

gnulib-tests/test-version-etc.c

gnulib-tests/test-version-etc.sh

gnulib-tests/test-wchar.c

gnulib-tests/test-wcrtomb.c

gnulib-tests/test-wcrtomb.sh

gnulib-tests/test-wctype.c

gnulib-tests/test-wcwidth.c

gnulib-tests/test-xalloc-die.c

gnulib-tests/test-xalloc-die.sh

gnulib-tests/test-xstrtol.c

gnulib-tests/test-xstrtol.sh

gnulib-tests/test-xstrtoul.c

gnulib-tests/test-xstrtoumax.c

gnulib-tests/test-xstrtoumax.sh

gnulib-tests/uniwidth

gnulib-tests/uniwidth/test-uc_width.c

gnulib-tests/uniwidth/test-uc_width2.c

gnulib-tests/uniwidth/test-uc_width2.sh

gnulib-tests/unsetenv.c

gnulib-tests/usleep.c

gnulib-tests/wctob.c

gnulib-tests/zerosize-ptr.h

lib/alloca.in.h

lib/basename-lgpl.c

lib/bitrotate.h

lib/btowc.c

lib/c-ctype.c

lib/c-ctype.h

lib/c-strcase.h

lib/c-strcasecmp.c

lib/c-strncasecmp.c

lib/config.charset

lib/config.hin

lib/diffseq.h

lib/dirname-lgpl.c

lib/dirname.c

lib/dup2.c

lib/errno.in.h

lib/fcntl.c

lib/fcntl.in.h

lib/file-type.c

lib/file-type.h

lib/fnmatch.in.h

lib/fnmatch_loop.c

lib/freopen-safer.c

lib/freopen.c

lib/getdtablesize.c

lib/getopt.in.h

lib/getopt_int.h

lib/gettime.c

lib/gettimeofday.c

lib/gnulib.mk

lib/hash.c

lib/hash.h

lib/iconv.in.h

lib/iconv_open-aix.gperf

lib/iconv_open-aix.h

lib/iconv_open-hpux.gperf

lib/iconv_open-hpux.h

lib/iconv_open-irix.gperf

lib/iconv_open-irix.h

lib/iconv_open-osf.gperf

lib/iconv_open-osf.h

lib/iconv_open-solaris.gperf

lib/iconv_open-solaris.h

lib/iconv_open.c

lib/ignore-value.h

lib/intprops.h

lib/inttypes.in.h

lib/langinfo.in.h

lib/localcharset.c

lib/localcharset.h

lib/lstat.c

lib/malloca.c

lib/malloca.h

lib/malloca.valgrind

lib/mbchar.c

lib/mbchar.h

lib/mbiter.h

lib/mbrlen.c

lib/mbrtowc.c

lib/mbscasecmp.c

lib/mbsinit.c

lib/mbslen.c

lib/mbsrtowcs-state.c

lib/mbsrtowcs.c

lib/mbsstr.c

lib/mbuiter.h

lib/memchr.valgrind

lib/mktime-internal.h

lib/mktime.c

lib/nl_langinfo.c

lib/open.c

lib/progname.c

lib/progname.h

lib/propername.c

lib/propername.h

lib/quote.c

lib/quote.h

lib/quotearg.c

lib/quotearg.h

lib/ref-add.sin

lib/ref-del.sin

lib/regcomp.c

lib/regex.h

lib/regex_internal.c

lib/regex_internal.h

lib/regexec.c

lib/sh-quote.c

lib/sh-quote.h

lib/sig-handler.h

lib/sigaction.c

lib/signal.in.h

lib/sigprocmask.c

lib/stat-macros.h

lib/stat-time.h

lib/stat.c

lib/stdarg.in.h

lib/stdbool.in.h

lib/stddef.in.h

lib/stdint.in.h

lib/stdio--.h

lib/stdio-safer.h

lib/stdio-write.c

lib/stdio.in.h

lib/stdlib.in.h

lib/str-kmp.h

lib/streq.h

lib/strerror.c

lib/strftime.h

lib/striconv.c

lib/striconv.h

lib/string.in.h

lib/strings.in.h

lib/stripslash.c

lib/strncasecmp.c

lib/strndup.c

lib/strnlen.c

lib/strnlen1.c

lib/strnlen1.h

lib/strptime.c

lib/strtoll.c

lib/sys_stat.in.h

lib/sys_time.in.h

lib/sys_wait.in.h

lib/tempname.h

lib/time.in.h

lib/time_r.c

lib/timegm.c

lib/timespec.h

lib/trim.c

lib/trim.h

lib/uinttostr.c

lib/unistd.in.h

lib/unitypes.h

lib/uniwidth

lib/uniwidth.h

lib/uniwidth/cjk.h

lib/uniwidth/width.c

lib/verify.h

lib/version-etc-fsf.c

lib/version-etc.c

lib/version-etc.h

lib/wchar.in.h

lib/wcrtomb.c

lib/wctype.in.h

lib/wcwidth.c

lib/xalloc-die.c

lib/xfreopen.c

lib/xfreopen.h

lib/xstriconv.c

lib/xstriconv.h

lib/xstrndup.c

lib/xstrndup.h

lib/xstrtol-error.c

lib/xstrtoul.c

m4/00gnulib.m4

m4/alloca.m4

m4/argmatch.m4

m4/btowc.m4

m4/clock_time.m4

m4/config-h.m4

m4/dirname.m4

m4/double-slash-root.m4

m4/dup2.m4

m4/eealloc.m4

m4/environ.m4

m4/errno_h.m4

m4/exclude.m4

m4/extensions.m4

m4/fcntl-o.m4

m4/fcntl.m4

m4/fcntl_h.m4

m4/file-type.m4

m4/freopen.m4

m4/getdtablesize.m4

m4/getopt.m4

m4/gettext_gl.m4

m4/gettime.m4

m4/gettimeofday.m4

m4/glibc21_gl.m4

m4/gnu-make.m4

m4/gnulib-common.m4

m4/gnulib-comp.m4

m4/hard-locale.m4

m4/hash.m4

m4/iconv_h.m4

m4/iconv_open.m4

m4/include_next.m4

m4/inline.m4

m4/intlmacosx.m4

m4/inttostr.m4

m4/inttypes-pri.m4

m4/langinfo_h.m4

m4/lib-ld_gl.m4

m4/lib-prefix_gl.m4

m4/libsigsegv.m4

m4/localcharset.m4

m4/locale-fr.m4

m4/locale-ja.m4

m4/locale-tr.m4

m4/locale-zh.m4

m4/longlong_gl.m4

m4/lstat.m4

m4/malloca.m4

m4/manywarnings.m4

m4/mbchar.m4

m4/mbiter.m4

m4/mbrlen.m4

m4/mbsinit.m4

m4/mbslen.m4

m4/mbsrtowcs.m4

m4/memchr.m4

m4/mkstemp.m4

m4/mktime.m4

m4/mmap-anon.m4

m4/mode_t.m4

m4/multiarch.m4

m4/nl_langinfo.m4

m4/nls.m4

m4/onceonly.m4

m4/open.m4

m4/pathmax.m4

m4/po_gl.m4

m4/putenv.m4

m4/quote.m4

m4/quotearg.m4

m4/setenv.m4

m4/sigaction.m4

m4/signal_h.m4

m4/signalblocking.m4

m4/sleep.m4

m4/ssize_t.m4

m4/stat-time.m4

m4/stat.m4

m4/stdarg.m4

m4/stddef_h.m4

m4/stdint.m4

m4/stdio-safer.m4

m4/stdio_h.m4

m4/stdlib_h.m4

m4/strcase.m4

m4/strerror.m4

m4/string_h.m4

m4/strings_h.m4

m4/strndup.m4

m4/strnlen.m4

m4/strptime.m4

m4/strtoimax.m4

m4/strtol.m4

m4/strtoll.m4

m4/strtoul.m4

m4/strtoull.m4

m4/strtoumax.m4

m4/symlink.m4

m4/sys_stat_h.m4

m4/sys_time_h.m4

m4/sys_wait_h.m4

m4/tempname.m4

m4/time_h.m4

m4/time_r.m4

m4/timegm.m4

m4/timespec.m4

m4/tm_gmtoff.m4

m4/unistd_h.m4

m4/unlocked-io.m4

m4/usleep.m4

m4/version-etc.m4

m4/warn-on-use.m4

m4/warnings.m4

m4/wchar_h.m4

m4/wchar_t.m4

m4/wcrtomb.m4

m4/wctob.m4

m4/wctype_h.m4

m4/wcwidth.m4

m4/wint_t.m4

m4/xalloc.m4

m4/xstrndup.m4

m4/xstrtol.m4

maint.mk

po/ca.gmo

po/ca.po

po/el.gmo

po/el.po

po/fi.gmo

po/fi.po

po/ga.gmo

po/ga.po

po/hu.gmo

po/hu.po

po/it.gmo

po/it.po

po/lv.gmo

po/lv.po

po/ms.gmo

po/ms.po

po/nl.gmo

po/nl.po

po/pt_BR.gmo

po/pt_BR.po

po/ro.gmo

po/ro.po

po/sr.gmo

po/sr.po

po/stamp-po

po/uk.gmo

po/uk.po

po/vi.gmo

po/vi.po

po/zh_CN.gmo

po/zh_CN.po

tests

tests/Makefile.am

tests/Makefile.in

tests/basic

tests/binary

tests/function-line-vs-leading-space

tests/help-version

tests/init.sh

tests/label-vs-func

tests/no-newline-at-eof

tests/stdin

tests/t-local.sh

files removed:
INSTALLME

config

config.hin

config/config.guess

config/config.rpath

config/config.sub

config/depcomp

config/install-sh

config/mdate-sh

config/missing

config/mkinstalldirs

config/texinfo.tex

lib/fnmatch.hin

lib/freesoft.c

lib/freesoft.h

lib/getopt.h

lib/posix

lib/posix/Makefile.am

lib/posix/Makefile.in

lib/posix/regex.h

lib/posixver.c

lib/posixver.h

lib/quotesys.c

lib/quotesys.h

lib/realloc.c

lib/regex.hin

lib/setmode.c

lib/setmode.h

lib/waitpid.c

m4/Makefile.am

m4/Makefile.am.in

m4/Makefile.in

m4/README

m4/c-bs-a.m4

m4/check-decl.m4

m4/gettext.m4

m4/glibc21.m4

m4/gnu-source.m4

m4/jm-glibc-io.m4

m4/lcmessage.m4

m4/lib-ld.m4

m4/lib-prefix.m4

m4/longlong.m4

m4/prereq.m4

m4/realloc.m4

m4/setmode.m4

m4/st_mtim.m4

m4/xstrtoumax.m4

po/en_GB.gmo

po/en_GB.po

src/version.c

files modified:
ABOUT-NLS

AUTHORS

COPYING

ChangeLog

INSTALL

Makefile.am

Makefile.in

NEWS

README

THANKS

aclocal.m4

configure

configure.ac

debian/changelog

debian/control

debian/copyright

debian/rules

debian/watch

exgettext

lib/Makefile.am

lib/Makefile.in

lib/alloca.c

lib/basename.c

lib/c-stack.c

lib/c-stack.h

lib/cmpbuf.c

lib/cmpbuf.h

lib/dirname.h

lib/error.c

lib/error.h

lib/exclude.c

lib/exclude.h

lib/exitfail.c

lib/exitfail.h

lib/fnmatch.c

lib/getopt.c

lib/getopt1.c

lib/gettext.h

lib/hard-locale.c

lib/hard-locale.h

lib/imaxtostr.c

lib/inttostr.c

lib/inttostr.h

lib/malloc.c

lib/memchr.c

lib/mkstemp.c

lib/offtostr.c

lib/prepargs.c

lib/regex.c

lib/strcasecmp.c

lib/strftime.c

lib/strtoimax.c

lib/strtol.c

lib/strtoul.c

lib/strtoull.c

lib/tempname.c

lib/umaxtostr.c

lib/unlocked-io.h

lib/xalloc.h

lib/xmalloc.c

lib/xstrtol.c

lib/xstrtol.h

lib/xstrtoumax.c

m4/c-stack.m4

m4/codeset.m4

m4/dos.m4

m4/error.m4

m4/fnmatch.m4

m4/iconv.m4

m4/inttypes.m4

m4/lib-link.m4

m4/malloc.m4

m4/mbrtowc.m4

m4/mbstate_t.m4

m4/progtest.m4

m4/regex.m4

m4/stdbool.m4

m4/strftime.m4

m4/vararrays.m4

man/Makefile.am

man/Makefile.in

man/cmp.1

man/diff.1

man/diff3.1

man/sdiff.1

ms/Makefile.am

ms/Makefile.in

ms/README

ms/config.bat

ms/config.sed

ms/config.site

po/LINGUAS

po/Makefile.in.in

po/Makevars

po/POTFILES.in

po/Rules-quot

po/cs.gmo

po/cs.po

po/da.gmo

po/da.po

po/de.gmo

po/de.po

po/diffutils.pot

po/eo.gmo

po/eo.po

po/es.gmo

po/es.po

po/fr.gmo

po/fr.po

po/gl.gmo

po/gl.po

po/he.gmo

po/he.po

po/id.gmo

po/id.po

po/ja.gmo

po/ja.po

po/pl.gmo

po/pl.po

po/ru.gmo

po/ru.po

po/sv.gmo

po/sv.po

po/tr.gmo

po/tr.po

po/zh_TW.gmo

po/zh_TW.po

src/Makefile.am

src/Makefile.in

src/analyze.c

src/cmp.c

src/context.c

src/diff.c

src/diff.h

src/diff3.c

src/dir.c

src/ed.c

src/ifdef.c

src/io.c

src/normal.c

src/sdiff.c

src/side.c

src/system.h

src/util.c

Show diffs side-by-side

added added

removed removed

lib/regexec.c

/* -*- buffer-read-only: t -*- vi: set ro: */

/* DO NOT EDIT! GENERATED AUTOMATICALLY! */

/* Extended regular expression matching and search library.

Software Foundation, Inc.

This file is part of the GNU C Library.

Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.

This program is free software; you can redistribute it and/or modify

it under the terms of the GNU General Public License as published by

the Free Software Foundation; either version 3, or (at your option)

any later version.

This program is distributed in the hope that it will be useful,

but WITHOUT ANY WARRANTY; without even the implied warranty of

MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

GNU General Public License for more details.

You should have received a copy of the GNU General Public License along

with this program; if not, write to the Free Software Foundation,

Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */

#include "verify.h"

#include "intprops.h"

static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,

Idx n) internal_function;

static void match_ctx_clean (re_match_context_t *mctx) internal_function;

static void match_ctx_free (re_match_context_t *cache) internal_function;

static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,

Idx str_idx, Idx from, Idx to)

internal_function;

static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)

internal_function;

static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,

Idx str_idx) internal_function;

static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,

Idx node, Idx str_idx)

internal_function;

static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,

re_dfastate_t **limited_sts, Idx last_node,

Idx last_str_idx)

internal_function;

static reg_errcode_t re_search_internal (const regex_t *preg,

const char *string, Idx length,

Idx start, Idx last_start, Idx stop,

size_t nmatch, regmatch_t pmatch[],

int eflags) internal_function;

static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,

const char *string1, Idx length1,

const char *string2, Idx length2,

Idx start, regoff_t range,

struct re_registers *regs,

Idx stop, bool ret_len) internal_function;

static regoff_t re_search_stub (struct re_pattern_buffer *bufp,

const char *string, Idx length, Idx start,

regoff_t range, Idx stop,

struct re_registers *regs,

bool ret_len) internal_function;

static unsigned int re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,

Idx nregs, int regs_allocated)

internal_function;

static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)

internal_function;

static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,

Idx *p_match_first) internal_function;

static Idx check_halt_state_context (const re_match_context_t *mctx,

const re_dfastate_t *state, Idx idx)

internal_function;

static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,

regmatch_t *prev_idx_match, Idx cur_node,

Idx cur_idx, Idx nmatch) internal_function;

static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,

Idx str_idx, Idx dest_node, Idx nregs,

regmatch_t *regs,

re_node_set *eps_via_nodes)

internal_function;

static reg_errcode_t set_regs (const regex_t *preg,

const re_match_context_t *mctx,

size_t nmatch, regmatch_t *pmatch,

bool fl_backtrack) internal_function;

static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)

internal_function;

#ifdef RE_ENABLE_I18N

static int sift_states_iter_mb (const re_match_context_t *mctx,

re_sift_context_t *sctx,

Idx node_idx, Idx str_idx, Idx max_str_idx)

internal_function;

#endif /* RE_ENABLE_I18N */

static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,

re_sift_context_t *sctx)

internal_function;

static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,

re_sift_context_t *sctx, Idx str_idx,

re_node_set *cur_dest)

internal_function;

static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,

re_sift_context_t *sctx,

Idx str_idx,

100

re_node_set *dest_nodes)

101

internal_function;

102

static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,

103

re_node_set *dest_nodes,

104

const re_node_set *candidates)

105

internal_function;

106

static bool check_dst_limits (const re_match_context_t *mctx,

107

const re_node_set *limits,

108

Idx dst_node, Idx dst_idx, Idx src_node,

109

Idx src_idx) internal_function;

110

static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,

111

int boundaries, Idx subexp_idx,

112

Idx from_node, Idx bkref_idx)

113

internal_function;

114

static int check_dst_limits_calc_pos (const re_match_context_t *mctx,

115

Idx limit, Idx subexp_idx,

116

Idx node, Idx str_idx,

117

Idx bkref_idx) internal_function;

118

static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,

119

re_node_set *dest_nodes,

120

const re_node_set *candidates,

121

re_node_set *limits,

122

struct re_backref_cache_entry *bkref_ents,

123

Idx str_idx) internal_function;

124

static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,

125

re_sift_context_t *sctx,

126

Idx str_idx, const re_node_set *candidates)

127

internal_function;

128

static reg_errcode_t merge_state_array (const re_dfa_t *dfa,

129

re_dfastate_t **dst,

130

re_dfastate_t **src, Idx num)

131

internal_function;

132

static re_dfastate_t *find_recover_state (reg_errcode_t *err,

133

re_match_context_t *mctx) internal_function;

134

static re_dfastate_t *transit_state (reg_errcode_t *err,

135

re_match_context_t *mctx,

136

re_dfastate_t *state) internal_function;

137

static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,

138

re_match_context_t *mctx,

139

re_dfastate_t *next_state)

140

internal_function;

141

static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,

142

re_node_set *cur_nodes,

143

Idx str_idx) internal_function;

144

#if 0

145

static re_dfastate_t *transit_state_sb (reg_errcode_t *err,

146

re_match_context_t *mctx,

147

re_dfastate_t *pstate)

148

internal_function;

149

#endif

150

#ifdef RE_ENABLE_I18N

151

static reg_errcode_t transit_state_mb (re_match_context_t *mctx,

152

re_dfastate_t *pstate)

153

internal_function;

154

#endif /* RE_ENABLE_I18N */

155

static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,

156

const re_node_set *nodes)

157

internal_function;

158

static reg_errcode_t get_subexp (re_match_context_t *mctx,

159

Idx bkref_node, Idx bkref_str_idx)

160

internal_function;

161

static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,

162

const re_sub_match_top_t *sub_top,

163

re_sub_match_last_t *sub_last,

164

Idx bkref_node, Idx bkref_str)

165

internal_function;

166

static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,

167

Idx subexp_idx, int type) internal_function;

168

static reg_errcode_t check_arrival (re_match_context_t *mctx,

169

state_array_t *path, Idx top_node,

170

Idx top_str, Idx last_node, Idx last_str,

171

int type) internal_function;

172

static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,

173

Idx str_idx,

174

re_node_set *cur_nodes,

175

re_node_set *next_nodes)

176

internal_function;

177

static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,

178

re_node_set *cur_nodes,

179

Idx ex_subexp, int type)

180

internal_function;

181

static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,

182

re_node_set *dst_nodes,

183

Idx target, Idx ex_subexp,

184

int type) internal_function;

185

static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,

186

re_node_set *cur_nodes, Idx cur_str,

187

Idx subexp_num, int type)

188

internal_function;

189

static bool build_trtable (const re_dfa_t *dfa,

190

re_dfastate_t *state) internal_function;

191

#ifdef RE_ENABLE_I18N

192

static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,

193

const re_string_t *input, Idx idx)

194

internal_function;

195

# ifdef _LIBC

196

static unsigned int find_collation_sequence_value (const unsigned char *mbs,

197

size_t name_len)

198

internal_function;

199

# endif /* _LIBC */

200

#endif /* RE_ENABLE_I18N */

201

static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,

202

const re_dfastate_t *state,

203

re_node_set *states_node,

204

bitset_t *states_ch) internal_function;

205

static bool check_node_accept (const re_match_context_t *mctx,

206

const re_token_t *node, Idx idx)

207

internal_function;

208

static reg_errcode_t extend_buffers (re_match_context_t *mctx)

209

internal_function;

210

211

/* Entry point for POSIX code. */

212

213

/* regexec searches for a given pattern, specified by PREG, in the

214

string STRING.

215

216

If NMATCH is zero or REG_NOSUB was set in the cflags argument to

217

`regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at

218

least NMATCH elements, and we set them to the offsets of the

219

corresponding matched substrings.

220

221

EFLAGS specifies `execution flags' which affect matching: if

222

REG_NOTBOL is set, then ^ does not match at the beginning of the

223

string; if REG_NOTEOL is set, then $ does not match at the end.

224

225

We return 0 if we find a match and REG_NOMATCH if not. */

226

227

int

228

regexec (preg, string, nmatch, pmatch, eflags)

229

const regex_t *_Restrict_ preg;

230

const char *_Restrict_ string;

231

size_t nmatch;

232

regmatch_t pmatch[_Restrict_arr_];

233

int eflags;

234

{

235

reg_errcode_t err;

236

Idx start, length;

237

#ifdef _LIBC

238

re_dfa_t *dfa = (re_dfa_t *) preg->buffer;

239

#endif

240

241

if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))

242

return REG_BADPAT;

243

244

if (eflags & REG_STARTEND)

245

{

246

start = pmatch[0].rm_so;

247

length = pmatch[0].rm_eo;

248

}

249

else

250

{

251

start = 0;

252

length = strlen (string);

253

}

254

255

__libc_lock_lock (dfa->lock);

256

if (preg->no_sub)

257

err = re_search_internal (preg, string, length, start, length,

258

length, 0, NULL, eflags);

259

else

260

err = re_search_internal (preg, string, length, start, length,

261

length, nmatch, pmatch, eflags);

262

__libc_lock_unlock (dfa->lock);

263

return err != REG_NOERROR;

264

}

265

266

#ifdef _LIBC

267

# include <shlib-compat.h>

268

versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);

269

270

# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)

271

__typeof__ (__regexec) __compat_regexec;

272

273

int

274

attribute_compat_text_section

275

__compat_regexec (const regex_t *_Restrict_ preg,

276

const char *_Restrict_ string, size_t nmatch,

277

regmatch_t pmatch[], int eflags)

278

{

279

return regexec (preg, string, nmatch, pmatch,

280

eflags & (REG_NOTBOL | REG_NOTEOL));

281

}

282

compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);

283

# endif

284

#endif

285

286

/* Entry points for GNU code. */

287

288

/* re_match, re_search, re_match_2, re_search_2

289

290

The former two functions operate on STRING with length LENGTH,

291

while the later two operate on concatenation of STRING1 and STRING2

292

with lengths LENGTH1 and LENGTH2, respectively.

293

294

re_match() matches the compiled pattern in BUFP against the string,

295

starting at index START.

296

297

re_search() first tries matching at index START, then it tries to match

298

starting from index START + 1, and so on. The last start position tried

299

is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same

300

way as re_match().)

301

302

The parameter STOP of re_{match,search}_2 specifies that no match exceeding

303

the first STOP characters of the concatenation of the strings should be

304

concerned.

305

306

If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match

307

and all groups is stored in REGS. (For the "_2" variants, the offsets are

308

computed relative to the concatenation, not relative to the individual

309

strings.)

310

311

On success, re_match* functions return the length of the match, re_search*

312

return the position of the start of the match. Return value -1 means no

313

match was found and -2 indicates an internal error. */

314

315

regoff_t

316

re_match (bufp, string, length, start, regs)

317

struct re_pattern_buffer *bufp;

318

const char *string;

319

Idx length, start;

320

struct re_registers *regs;

321

{

322

return re_search_stub (bufp, string, length, start, 0, length, regs, true);

323

}

324

#ifdef _LIBC

325

weak_alias (__re_match, re_match)

326

#endif

327

328

regoff_t

329

re_search (bufp, string, length, start, range, regs)

330

struct re_pattern_buffer *bufp;

331

const char *string;

332

Idx length, start;

333

regoff_t range;

334

struct re_registers *regs;

335

{

336

return re_search_stub (bufp, string, length, start, range, length, regs,

337

false);

338

}

339

#ifdef _LIBC

340

weak_alias (__re_search, re_search)

341

#endif

342

343

regoff_t

344

re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)

345

struct re_pattern_buffer *bufp;

346

const char *string1, *string2;

347

Idx length1, length2, start, stop;

348

struct re_registers *regs;

349

{

350

return re_search_2_stub (bufp, string1, length1, string2, length2,

351

start, 0, regs, stop, true);

352

}

353

#ifdef _LIBC

354

weak_alias (__re_match_2, re_match_2)

355

#endif

356

357

regoff_t

358

re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)

359

struct re_pattern_buffer *bufp;

360

const char *string1, *string2;

361

Idx length1, length2, start, stop;

362

regoff_t range;

363

struct re_registers *regs;

364

{

365

return re_search_2_stub (bufp, string1, length1, string2, length2,

366

start, range, regs, stop, false);

367

}

368

#ifdef _LIBC

369

weak_alias (__re_search_2, re_search_2)

370

#endif

371

372

static regoff_t

373

internal_function

374

re_search_2_stub (struct re_pattern_buffer *bufp,

375

const char *string1, Idx length1,

376

const char *string2, Idx length2,

377

Idx start, regoff_t range, struct re_registers *regs,

378

Idx stop, bool ret_len)

379

{

380

const char *str;

381

regoff_t rval;

382

Idx len = length1 + length2;

383

char *s = NULL;

384

385

verify (! TYPE_SIGNED (Idx));

386

if (BE (len < length1, 0))

387

return -2;

388

/* if (BE (length1 < 0 || length2 < 0 || stop < 0, 0))

389

return -2; */

390

391

/* Concatenate the strings. */

392

if (length2 > 0)

393

if (length1 > 0)

394

{

395

s = re_malloc (char, len);

396

397

if (BE (s == NULL, 0))

398

return -2;

399

#ifdef _LIBC

400

memcpy (__mempcpy (s, string1, length1), string2, length2);

401

#else

402

memcpy (s, string1, length1);

403

memcpy (s + length1, string2, length2);

404

#endif

405

str = s;

406

}

407

else

408

str = string2;

409

else

410

str = string1;

411

412

rval = re_search_stub (bufp, str, len, start, range, stop, regs,

413

ret_len);

414

re_free (s);

415

return rval;

416

}

417

418

/* The parameters have the same meaning as those of re_search.

419

Additional parameters:

420

If RET_LEN is true the length of the match is returned (re_match style);

421

otherwise the position of the match is returned. */

422

423

static regoff_t

424

internal_function

425

re_search_stub (struct re_pattern_buffer *bufp,

426

const char *string, Idx length,

427

Idx start, regoff_t range, Idx stop, struct re_registers *regs,

428

bool ret_len)

429

{

430

reg_errcode_t result;

431

regmatch_t *pmatch;

432

Idx nregs;

433

regoff_t rval;

434

int eflags = 0;

435

#ifdef _LIBC

436

re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;

437

#endif

438

Idx last_start = start + range;

439

440

/* Check for out-of-range. */

441

verify (! TYPE_SIGNED (Idx));

442

/* if (BE (start < 0, 0))

443

return -1; */

444

if (BE (start > length, 0))

445

return -1;

446

if (BE (length < last_start || (0 <= range && last_start < start), 0))

447

last_start = length;

448

else if (BE (/* last_start < 0 || */ (range < 0 && start <= last_start), 0))

449

last_start = 0;

450

451

__libc_lock_lock (dfa->lock);

452

453

eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;

454

eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;

455

456

/* Compile fastmap if we haven't yet. */

457

if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)

458

re_compile_fastmap (bufp);

459

460

if (BE (bufp->no_sub, 0))

461

regs = NULL;

462

463

/* We need at least 1 register. */

464

if (regs == NULL)

465

nregs = 1;

466

else if (BE (bufp->regs_allocated == REGS_FIXED

467

&& regs->num_regs <= bufp->re_nsub, 0))

468

{

469

nregs = regs->num_regs;

470

if (BE (nregs < 1, 0))

471

{

472

/* Nothing can be copied to regs. */

473

regs = NULL;

474

nregs = 1;

475

}

476

}

477

else

478

nregs = bufp->re_nsub + 1;

479

pmatch = re_malloc (regmatch_t, nregs);

480

if (BE (pmatch == NULL, 0))

481

{

482

rval = -2;

483

goto out;

484

}

485

486

result = re_search_internal (bufp, string, length, start, last_start, stop,

487

nregs, pmatch, eflags);

488

489

rval = 0;

490

491

/* I hope we needn't fill ther regs with -1's when no match was found. */

492

if (result != REG_NOERROR)

493

rval = -1;

494

else if (regs != NULL)

495

{

496

/* If caller wants register contents data back, copy them. */

497

bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,

498

bufp->regs_allocated);

499

if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))

500

rval = -2;

501

}

502

503

if (BE (rval == 0, 1))

504

{

505

if (ret_len)

506

{

507

assert (pmatch[0].rm_so == start);

508

rval = pmatch[0].rm_eo - start;

509

}

510

else

511

rval = pmatch[0].rm_so;

512

}

513

re_free (pmatch);

514

out:

515

__libc_lock_unlock (dfa->lock);

516

return rval;

517

}

518

519

static unsigned int

520

internal_function

521

re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,

522

int regs_allocated)

523

{

524

int rval = REGS_REALLOCATE;

525

Idx i;

526

Idx need_regs = nregs + 1;

527

/* We need one extra element beyond `num_regs' for the `-1' marker GNU code

528

uses. */

529

530

/* Have the register data arrays been allocated? */

531

if (regs_allocated == REGS_UNALLOCATED)

532

{ /* No. So allocate them with malloc. */

533

regs->start = re_malloc (regoff_t, need_regs);

534

if (BE (regs->start == NULL, 0))

535

return REGS_UNALLOCATED;

536

regs->end = re_malloc (regoff_t, need_regs);

537

if (BE (regs->end == NULL, 0))

538

{

539

re_free (regs->start);

540

return REGS_UNALLOCATED;

541

}

542

regs->num_regs = need_regs;

543

}

544

else if (regs_allocated == REGS_REALLOCATE)

545

{ /* Yes. If we need more elements than were already

546

allocated, reallocate them. If we need fewer, just

547

leave it alone. */

548

if (BE (need_regs > regs->num_regs, 0))

549

{

550

regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);

551

regoff_t *new_end;

552

if (BE (new_start == NULL, 0))

553

return REGS_UNALLOCATED;

554

new_end = re_realloc (regs->end, regoff_t, need_regs);

555

if (BE (new_end == NULL, 0))

556

{

557

re_free (new_start);

558

return REGS_UNALLOCATED;

559

}

560

regs->start = new_start;

561

regs->end = new_end;

562

regs->num_regs = need_regs;

563

}

564

}

565

else

566

{

567

assert (regs_allocated == REGS_FIXED);

568

/* This function may not be called with REGS_FIXED and nregs too big. */

569

assert (regs->num_regs >= nregs);

570

rval = REGS_FIXED;

571

}

572

573

/* Copy the regs. */

574

for (i = 0; i < nregs; ++i)

575

{

576

regs->start[i] = pmatch[i].rm_so;

577

regs->end[i] = pmatch[i].rm_eo;

578

}

579

for ( ; i < regs->num_regs; ++i)

580

regs->start[i] = regs->end[i] = -1;

581

582

return rval;

583

}

584

585

/* Set REGS to hold NUM_REGS registers, storing them in STARTS and

586

ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use

587

this memory for recording register information. STARTS and ENDS

588

must be allocated using the malloc library routine, and must each

589

be at least NUM_REGS * sizeof (regoff_t) bytes long.

590

591

If NUM_REGS == 0, then subsequent matches should allocate their own

592

593

594

Unless this function is called, the first search or match using

595

PATTERN_BUFFER will allocate its own register data, without

596

freeing the old data. */

597

598

void

599

re_set_registers (bufp, regs, num_regs, starts, ends)

600

struct re_pattern_buffer *bufp;

601

struct re_registers *regs;

602

__re_size_t num_regs;

603

regoff_t *starts, *ends;

604

{

605

if (num_regs)

606

{

607

bufp->regs_allocated = REGS_REALLOCATE;

608

regs->num_regs = num_regs;

609

regs->start = starts;

610

regs->end = ends;

611

}

612

else

613

{

614

bufp->regs_allocated = REGS_UNALLOCATED;

615

regs->num_regs = 0;

616

regs->start = regs->end = NULL;

617

}

618

}

619

#ifdef _LIBC

620

weak_alias (__re_set_registers, re_set_registers)

621

#endif

622

623

/* Entry points compatible with 4.2 BSD regex library. We don't define

624

them unless specifically requested. */

625

626

#if defined _REGEX_RE_COMP || defined _LIBC

627

int

628

# ifdef _LIBC

629

weak_function

630

# endif

631

re_exec (s)

632

const char *s;

633

{

634

return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);

635

}

636

#endif /* _REGEX_RE_COMP */

637

638

/* Internal entry point. */

639

640

/* Searches for a compiled pattern PREG in the string STRING, whose

641

length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same

642

meaning as with regexec. LAST_START is START + RANGE, where

643

START and RANGE have the same meaning as with re_search.

644

Return REG_NOERROR if we find a match, and REG_NOMATCH if not,

645

otherwise return the error code.

646

Note: We assume front end functions already check ranges.

647

(0 <= LAST_START && LAST_START <= LENGTH) */

648

649

static reg_errcode_t

650

internal_function __attribute_warn_unused_result__

651

re_search_internal (const regex_t *preg,

652

const char *string, Idx length,

653

Idx start, Idx last_start, Idx stop,

654

size_t nmatch, regmatch_t pmatch[],

655

int eflags)

656

{

657

reg_errcode_t err;

658

const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;

659

Idx left_lim, right_lim;

660

int incr;

661

bool fl_longest_match;

662

int match_kind;

663

Idx match_first;

664

Idx match_last = REG_MISSING;

665

Idx extra_nmatch;

666

bool sb;

667

int ch;

668

#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)

669

re_match_context_t mctx = { .dfa = dfa };

670

#else

671

re_match_context_t mctx;

672

#endif

673

char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate

674

&& start != last_start && !preg->can_be_null)

675

? preg->fastmap : NULL);

676

RE_TRANSLATE_TYPE t = preg->translate;

677

678

#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))

679

memset (&mctx, '\0', sizeof (re_match_context_t));

680

mctx.dfa = dfa;

681

#endif

682

683

extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;

684

nmatch -= extra_nmatch;

685

686

/* Check if the DFA haven't been compiled. */

687

if (BE (preg->used == 0 || dfa->init_state == NULL

688

|| dfa->init_state_word == NULL || dfa->init_state_nl == NULL

689

|| dfa->init_state_begbuf == NULL, 0))

690

return REG_NOMATCH;

691

692

#ifdef DEBUG

693

/* We assume front-end functions already check them. */

694

assert (0 <= last_start && last_start <= length);

695

#endif

696

697

/* If initial states with non-begbuf contexts have no elements,

698

the regex must be anchored. If preg->newline_anchor is set,

699

we'll never use init_state_nl, so do not check it. */

700

if (dfa->init_state->nodes.nelem == 0

701

&& dfa->init_state_word->nodes.nelem == 0

702

&& (dfa->init_state_nl->nodes.nelem == 0

703

|| !preg->newline_anchor))

704

{

705

if (start != 0 && last_start != 0)

706

return REG_NOMATCH;

707

start = last_start = 0;

708

}

709

710

/* We must check the longest matching, if nmatch > 0. */

711

fl_longest_match = (nmatch != 0 || dfa->nbackref);

712

713

err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,

714

preg->translate, (preg->syntax & RE_ICASE) != 0,

715

dfa);

716

if (BE (err != REG_NOERROR, 0))

717

goto free_return;

718

mctx.input.stop = stop;

719

mctx.input.raw_stop = stop;

720

mctx.input.newline_anchor = preg->newline_anchor;

721

722

err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);

723

if (BE (err != REG_NOERROR, 0))

724

goto free_return;

725

726

/* We will log all the DFA states through which the dfa pass,

727

if nmatch > 1, or this dfa has "multibyte node", which is a

728

back-reference or a node which can accept multibyte character or

729

multi character collating element. */

730

if (nmatch > 1 || dfa->has_mb_node)

731

{

732

/* Avoid overflow. */

733

if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0))

734

{

735

err = REG_ESPACE;

736

goto free_return;

737

}

738

739

mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);

740

if (BE (mctx.state_log == NULL, 0))

741

{

742

err = REG_ESPACE;

743

goto free_return;

744

}

745

}

746

else

747

mctx.state_log = NULL;

748

749

match_first = start;

750

mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF

751

: CONTEXT_NEWLINE | CONTEXT_BEGBUF;

752

753

/* Check incrementally whether of not the input string match. */

754

incr = (last_start < start) ? -1 : 1;

755

left_lim = (last_start < start) ? last_start : start;

756

right_lim = (last_start < start) ? start : last_start;

757

sb = dfa->mb_cur_max == 1;

758

match_kind =

759

(fastmap

760

? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)

761

| (start <= last_start ? 2 : 0)

762

| (t != NULL ? 1 : 0))

763

: 8);

764

765

for (;; match_first += incr)

766

{

767

err = REG_NOMATCH;

768

if (match_first < left_lim || right_lim < match_first)

769

goto free_return;

770

771

/* Advance as rapidly as possible through the string, until we

772

find a plausible place to start matching. This may be done

773

with varying efficiency, so there are various possibilities:

774

only the most common of them are specialized, in order to

775

save on code size. We use a switch statement for speed. */

776

switch (match_kind)

777

{

778

case 8:

779

/* No fastmap. */

780

break;

781

782

case 7:

783

/* Fastmap with single-byte translation, match forward. */

784

while (BE (match_first < right_lim, 1)

785

&& !fastmap[t[(unsigned char) string[match_first]]])

786

++match_first;

787

goto forward_match_found_start_or_reached_end;

788

789

case 6:

790

/* Fastmap without translation, match forward. */

791

while (BE (match_first < right_lim, 1)

792

&& !fastmap[(unsigned char) string[match_first]])

793

++match_first;

794

795

forward_match_found_start_or_reached_end:

796

if (BE (match_first == right_lim, 0))

797

{

798

ch = match_first >= length

799

? 0 : (unsigned char) string[match_first];

800

if (!fastmap[t ? t[ch] : ch])

801

goto free_return;

802

}

803

break;

804

805

case 4:

806

case 5:

807

/* Fastmap without multi-byte translation, match backwards. */

808

while (match_first >= left_lim)

809

{

810

ch = match_first >= length

811

? 0 : (unsigned char) string[match_first];

812

if (fastmap[t ? t[ch] : ch])

813

break;

814

--match_first;

815

}

816

if (match_first < left_lim)

817

goto free_return;

818

break;

819

820

default:

821

/* In this case, we can't determine easily the current byte,

822

since it might be a component byte of a multibyte

823

character. Then we use the constructed buffer instead. */

824

for (;;)

825

{

826

/* If MATCH_FIRST is out of the valid range, reconstruct the

827

buffers. */

828

__re_size_t offset = match_first - mctx.input.raw_mbs_idx;

829

if (BE (offset >= (__re_size_t) mctx.input.valid_raw_len, 0))

830

{

831

err = re_string_reconstruct (&mctx.input, match_first,

832

eflags);

833

if (BE (err != REG_NOERROR, 0))

834

goto free_return;

835

836

offset = match_first - mctx.input.raw_mbs_idx;

837

}

838

/* If MATCH_FIRST is out of the buffer, leave it as '\0'.

839

Note that MATCH_FIRST must not be smaller than 0. */

840

ch = (match_first >= length

841

? 0 : re_string_byte_at (&mctx.input, offset));

842

if (fastmap[ch])

843

break;

844

match_first += incr;

845

if (match_first < left_lim || match_first > right_lim)

846

{

847

err = REG_NOMATCH;

848

goto free_return;

849

}

850

}

851

break;

852

}

853

854

/* Reconstruct the buffers so that the matcher can assume that

855

the matching starts from the beginning of the buffer. */

856

err = re_string_reconstruct (&mctx.input, match_first, eflags);

857

if (BE (err != REG_NOERROR, 0))

858

goto free_return;

859

860

#ifdef RE_ENABLE_I18N

861

/* Don't consider this char as a possible match start if it part,

862

yet isn't the head, of a multibyte character. */

863

if (!sb && !re_string_first_byte (&mctx.input, 0))

864

continue;

865

#endif

866

867

/* It seems to be appropriate one, then use the matcher. */

868

/* We assume that the matching starts from 0. */

869

mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;

870

match_last = check_matching (&mctx, fl_longest_match,

871

start <= last_start ? &match_first : NULL);

872

if (match_last != REG_MISSING)

873

{

874

if (BE (match_last == REG_ERROR, 0))

875

{

876

err = REG_ESPACE;

877

goto free_return;

878

}

879

else

880

{

881

mctx.match_last = match_last;

882

if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)

883

{

884

re_dfastate_t *pstate = mctx.state_log[match_last];

885

mctx.last_node = check_halt_state_context (&mctx, pstate,

886

match_last);

887

}

888

if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)

889

|| dfa->nbackref)

890

{

891

err = prune_impossible_nodes (&mctx);

892

if (err == REG_NOERROR)

893

break;

894

if (BE (err != REG_NOMATCH, 0))

895

goto free_return;

896

match_last = REG_MISSING;

897

}

898

else

899

break; /* We found a match. */

900

}

901

}

902

903

match_ctx_clean (&mctx);

904

}

905

906

#ifdef DEBUG

907

assert (match_last != REG_MISSING);

908

assert (err == REG_NOERROR);

909

#endif

910

911

/* Set pmatch[] if we need. */

912

if (nmatch > 0)

913

{

914

Idx reg_idx;

915

916

/* Initialize registers. */

917

for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)

918

pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;

919

920

/* Set the points where matching start/end. */

921

pmatch[0].rm_so = 0;

922

pmatch[0].rm_eo = mctx.match_last;

923

/* FIXME: This function should fail if mctx.match_last exceeds

924

the maximum possible regoff_t value. We need a new error

925

code REG_OVERFLOW. */

926

927

if (!preg->no_sub && nmatch > 1)

928

{

929

err = set_regs (preg, &mctx, nmatch, pmatch,

930

dfa->has_plural_match && dfa->nbackref > 0);

931

if (BE (err != REG_NOERROR, 0))

932

goto free_return;

933

}

934

935

/* At last, add the offset to the each registers, since we slided

936

the buffers so that we could assume that the matching starts

937

from 0. */

938

for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)

939

if (pmatch[reg_idx].rm_so != -1)

940

{

941

#ifdef RE_ENABLE_I18N

942

if (BE (mctx.input.offsets_needed != 0, 0))

943

{

944

pmatch[reg_idx].rm_so =

945

(pmatch[reg_idx].rm_so == mctx.input.valid_len

946

? mctx.input.valid_raw_len

947

: mctx.input.offsets[pmatch[reg_idx].rm_so]);

948

pmatch[reg_idx].rm_eo =

949

(pmatch[reg_idx].rm_eo == mctx.input.valid_len

950

? mctx.input.valid_raw_len

951

: mctx.input.offsets[pmatch[reg_idx].rm_eo]);

952

}

953

#else

954

assert (mctx.input.offsets_needed == 0);

955

#endif

956

pmatch[reg_idx].rm_so += match_first;

957

pmatch[reg_idx].rm_eo += match_first;

958

}

959

for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)

960

{

961

pmatch[nmatch + reg_idx].rm_so = -1;

962

pmatch[nmatch + reg_idx].rm_eo = -1;

963

}

964

965

if (dfa->subexp_map)

966

for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)

967

if (dfa->subexp_map[reg_idx] != reg_idx)

968

{

969

pmatch[reg_idx + 1].rm_so

970

= pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;

971

pmatch[reg_idx + 1].rm_eo

972

= pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;

973

}

974

}

975

976

free_return:

977

re_free (mctx.state_log);

978

if (dfa->nbackref)

979

match_ctx_free (&mctx);

980

re_string_destruct (&mctx.input);

981

return err;

982

}

983

984

static reg_errcode_t

985

internal_function __attribute_warn_unused_result__

986

prune_impossible_nodes (re_match_context_t *mctx)

987

{

988

const re_dfa_t *const dfa = mctx->dfa;

989

Idx halt_node, match_last;

990

reg_errcode_t ret;

991

re_dfastate_t **sifted_states;

992

re_dfastate_t **lim_states = NULL;

993

re_sift_context_t sctx;

994

#ifdef DEBUG

995

assert (mctx->state_log != NULL);

996

#endif

997

match_last = mctx->match_last;

998

halt_node = mctx->last_node;

999

1000

/* Avoid overflow. */

1001

if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0))

1002

return REG_ESPACE;

1003

1004

sifted_states = re_malloc (re_dfastate_t *, match_last + 1);

1005

if (BE (sifted_states == NULL, 0))

1006

{

1007

ret = REG_ESPACE;

1008

goto free_return;

1009

}

1010

if (dfa->nbackref)

1011

{

1012

lim_states = re_malloc (re_dfastate_t *, match_last + 1);

1013

if (BE (lim_states == NULL, 0))

1014

{

1015

ret = REG_ESPACE;

1016

goto free_return;

1017

}

1018

while (1)

1019

{

1020

memset (lim_states, '\0',

1021

sizeof (re_dfastate_t *) * (match_last + 1));

1022

sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,

1023

match_last);

1024

ret = sift_states_backward (mctx, &sctx);

1025

re_node_set_free (&sctx.limits);

1026

if (BE (ret != REG_NOERROR, 0))

1027

goto free_return;

1028

if (sifted_states[0] != NULL || lim_states[0] != NULL)

1029

break;

1030

1031

{

1032

--match_last;

1033

if (! REG_VALID_INDEX (match_last))

1034

{

1035

ret = REG_NOMATCH;

1036

goto free_return;

1037

}

1038

} while (mctx->state_log[match_last] == NULL

1039

|| !mctx->state_log[match_last]->halt);

1040

halt_node = check_halt_state_context (mctx,

1041

mctx->state_log[match_last],

1042

match_last);

1043

}

1044

ret = merge_state_array (dfa, sifted_states, lim_states,

1045

match_last + 1);

1046

re_free (lim_states);

1047

lim_states = NULL;

1048

if (BE (ret != REG_NOERROR, 0))

1049

goto free_return;

1050

}

1051

else

1052

{

1053

sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);

1054

ret = sift_states_backward (mctx, &sctx);

1055

re_node_set_free (&sctx.limits);

1056

if (BE (ret != REG_NOERROR, 0))

1057

goto free_return;

1058

if (sifted_states[0] == NULL)

1059

{

1060

ret = REG_NOMATCH;

1061

goto free_return;

1062

}

1063

}

1064

re_free (mctx->state_log);

1065

mctx->state_log = sifted_states;

1066

sifted_states = NULL;

1067

mctx->last_node = halt_node;

1068

mctx->match_last = match_last;

1069

ret = REG_NOERROR;

1070

free_return:

1071

re_free (sifted_states);

1072

re_free (lim_states);

1073

return ret;

1074

}

1075

1076

/* Acquire an initial state and return it.

1077

We must select appropriate initial state depending on the context,

1078

since initial states may have constraints like "\<", "^", etc.. */

1079

1080

static inline re_dfastate_t *

1081

__attribute ((always_inline)) internal_function

1082

acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,

1083

Idx idx)

1084

{

1085

const re_dfa_t *const dfa = mctx->dfa;

1086

if (dfa->init_state->has_constraint)

1087

{

1088

unsigned int context;

1089

context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);

1090

if (IS_WORD_CONTEXT (context))

1091

return dfa->init_state_word;

1092

else if (IS_ORDINARY_CONTEXT (context))

1093

return dfa->init_state;

1094

else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))

1095

return dfa->init_state_begbuf;

1096

else if (IS_NEWLINE_CONTEXT (context))

1097

return dfa->init_state_nl;

1098

else if (IS_BEGBUF_CONTEXT (context))

1099

{

1100

/* It is relatively rare case, then calculate on demand. */

1101

return re_acquire_state_context (err, dfa,

1102

dfa->init_state->entrance_nodes,

1103

context);

1104

}

1105

else

1106

/* Must not happen? */

1107

return dfa->init_state;

1108

}

1109

else

1110

return dfa->init_state;

1111

}

1112

1113

/* Check whether the regular expression match input string INPUT or not,

1114

and return the index where the matching end. Return REG_MISSING if

1115

there is no match, and return REG_ERROR in case of an error.

1116

FL_LONGEST_MATCH means we want the POSIX longest matching.

1117

If P_MATCH_FIRST is not NULL, and the match fails, it is set to the

1118

next place where we may want to try matching.

1119

Note that the matcher assume that the maching starts from the current

1120

index of the buffer. */

1121

1122

static Idx

1123

internal_function __attribute_warn_unused_result__

1124

check_matching (re_match_context_t *mctx, bool fl_longest_match,

1125

Idx *p_match_first)

1126

{

1127

const re_dfa_t *const dfa = mctx->dfa;

1128

reg_errcode_t err;

1129

Idx match = 0;

1130

Idx match_last = REG_MISSING;

1131

Idx cur_str_idx = re_string_cur_idx (&mctx->input);

1132

re_dfastate_t *cur_state;

1133

bool at_init_state = p_match_first != NULL;

1134

Idx next_start_idx = cur_str_idx;

1135

1136

err = REG_NOERROR;

1137

cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);

1138

/* An initial state must not be NULL (invalid). */

1139

if (BE (cur_state == NULL, 0))

1140

{

1141

assert (err == REG_ESPACE);

1142

return REG_ERROR;

1143

}

1144

1145

if (mctx->state_log != NULL)

1146

{

1147

mctx->state_log[cur_str_idx] = cur_state;

1148

1149

/* Check OP_OPEN_SUBEXP in the initial state in case that we use them

1150

later. E.g. Processing back references. */

1151

if (BE (dfa->nbackref, 0))

1152

{

1153

at_init_state = false;

1154

err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);

1155

if (BE (err != REG_NOERROR, 0))

1156

return err;

1157

1158

if (cur_state->has_backref)

1159

{

1160

err = transit_state_bkref (mctx, &cur_state->nodes);

1161

if (BE (err != REG_NOERROR, 0))

1162

return err;

1163

}

1164

}

1165

}

1166

1167

/* If the RE accepts NULL string. */

1168

if (BE (cur_state->halt, 0))

1169

{

1170

if (!cur_state->has_constraint

1171

|| check_halt_state_context (mctx, cur_state, cur_str_idx))

1172

{

1173

if (!fl_longest_match)

1174

return cur_str_idx;

1175

else

1176

{

1177

match_last = cur_str_idx;

1178

match = 1;

1179

}

1180

}

1181

}

1182

1183

while (!re_string_eoi (&mctx->input))

1184

{

1185

re_dfastate_t *old_state = cur_state;

1186

Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;

1187

1188

if (BE (next_char_idx >= mctx->input.bufs_len, 0)

1189

|| (BE (next_char_idx >= mctx->input.valid_len, 0)

1190

&& mctx->input.valid_len < mctx->input.len))

1191

{

1192

err = extend_buffers (mctx);

1193

if (BE (err != REG_NOERROR, 0))

1194

{

1195

assert (err == REG_ESPACE);

1196

return REG_ERROR;

1197

}

1198

}

1199

1200

cur_state = transit_state (&err, mctx, cur_state);

1201

if (mctx->state_log != NULL)

1202

cur_state = merge_state_with_log (&err, mctx, cur_state);

1203

1204

if (cur_state == NULL)

1205

{

1206

/* Reached the invalid state or an error. Try to recover a valid

1207

state using the state log, if available and if we have not

1208

already found a valid (even if not the longest) match. */

1209

if (BE (err != REG_NOERROR, 0))

1210

return REG_ERROR;

1211

1212

if (mctx->state_log == NULL

1213

|| (match && !fl_longest_match)

1214

|| (cur_state = find_recover_state (&err, mctx)) == NULL)

1215

break;

1216

}

1217

1218

if (BE (at_init_state, 0))

1219

{

1220

if (old_state == cur_state)

1221

next_start_idx = next_char_idx;

1222

else

1223

at_init_state = false;

1224

}

1225

1226

if (cur_state->halt)

1227

{

1228

/* Reached a halt state.

1229

Check the halt state can satisfy the current context. */

1230

if (!cur_state->has_constraint

1231

|| check_halt_state_context (mctx, cur_state,

1232

re_string_cur_idx (&mctx->input)))

1233

{

1234

/* We found an appropriate halt state. */

1235

match_last = re_string_cur_idx (&mctx->input);

1236

match = 1;

1237

1238

/* We found a match, do not modify match_first below. */

1239

p_match_first = NULL;

1240

if (!fl_longest_match)

1241

break;

1242

}

1243

}

1244

}

1245

1246

if (p_match_first)

1247

*p_match_first += next_start_idx;

1248

1249

return match_last;

1250

}

1251

1252

/* Check NODE match the current context. */

1253

1254

static bool

1255

internal_function

1256

check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)

1257

{

1258

re_token_type_t type = dfa->nodes[node].type;

1259

unsigned int constraint = dfa->nodes[node].constraint;

1260

if (type != END_OF_RE)

1261

return false;

1262

if (!constraint)

1263

return true;

1264

if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))

1265

return false;

1266

return true;

1267

}

1268

1269

/* Check the halt state STATE match the current context.

1270

Return 0 if not match, if the node, STATE has, is a halt node and

1271

match the context, return the node. */

1272

1273

static Idx

1274

internal_function

1275

check_halt_state_context (const re_match_context_t *mctx,

1276

const re_dfastate_t *state, Idx idx)

1277

{

1278

Idx i;

1279

unsigned int context;

1280

#ifdef DEBUG

1281

assert (state->halt);

1282

#endif

1283

context = re_string_context_at (&mctx->input, idx, mctx->eflags);

1284

for (i = 0; i < state->nodes.nelem; ++i)

1285

if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))

1286

return state->nodes.elems[i];

1287

return 0;

1288

}

1289

1290

/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA

1291

corresponding to the DFA).

1292

Return the destination node, and update EPS_VIA_NODES;

1293

return REG_MISSING in case of errors. */

1294

1295

static Idx

1296

internal_function

1297

proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,

1298

Idx *pidx, Idx node, re_node_set *eps_via_nodes,

1299

struct re_fail_stack_t *fs)

1300

{

1301

const re_dfa_t *const dfa = mctx->dfa;

1302

Idx i;

1303

bool ok;

1304

if (IS_EPSILON_NODE (dfa->nodes[node].type))

1305

{

1306

re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;

1307

re_node_set *edests = &dfa->edests[node];

1308

Idx dest_node;

1309

ok = re_node_set_insert (eps_via_nodes, node);

1310

if (BE (! ok, 0))

1311

return REG_ERROR;

1312

/* Pick up a valid destination, or return REG_MISSING if none

1313

is found. */

1314

for (dest_node = REG_MISSING, i = 0; i < edests->nelem; ++i)

1315

{

1316

Idx candidate = edests->elems[i];

1317

if (!re_node_set_contains (cur_nodes, candidate))

1318

continue;

1319

if (dest_node == REG_MISSING)

1320

dest_node = candidate;

1321

1322

else

1323

{

1324

/* In order to avoid infinite loop like "(a*)*", return the second

1325

epsilon-transition if the first was already considered. */

1326

if (re_node_set_contains (eps_via_nodes, dest_node))

1327

return candidate;

1328

1329

/* Otherwise, push the second epsilon-transition on the fail stack. */

1330

else if (fs != NULL

1331

&& push_fail_stack (fs, *pidx, candidate, nregs, regs,

1332

eps_via_nodes))

1333

return REG_ERROR;

1334

1335

/* We know we are going to exit. */

1336

break;

1337

}

1338

}

1339

return dest_node;

1340

}

1341

else

1342

{

1343

Idx naccepted = 0;

1344

re_token_type_t type = dfa->nodes[node].type;

1345

1346

#ifdef RE_ENABLE_I18N

1347

if (dfa->nodes[node].accept_mb)

1348

naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);

1349

else

1350

#endif /* RE_ENABLE_I18N */

1351

if (type == OP_BACK_REF)

1352

{

1353

Idx subexp_idx = dfa->nodes[node].opr.idx + 1;

1354

naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;

1355

if (fs != NULL)

1356

{

1357

if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)

1358

return REG_MISSING;

1359

else if (naccepted)

1360

{

1361

char *buf = (char *) re_string_get_buffer (&mctx->input);

1362

if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,

1363

naccepted) != 0)

1364

return REG_MISSING;

1365

}

1366

}

1367

1368

if (naccepted == 0)

1369

{

1370

Idx dest_node;

1371

ok = re_node_set_insert (eps_via_nodes, node);

1372

if (BE (! ok, 0))

1373

return REG_ERROR;

1374

dest_node = dfa->edests[node].elems[0];

1375

if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,

1376

dest_node))

1377

return dest_node;

1378

}

1379

}

1380

1381

if (naccepted != 0

1382

|| check_node_accept (mctx, dfa->nodes + node, *pidx))

1383

{

1384

Idx dest_node = dfa->nexts[node];

1385

*pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;

1386

if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL

1387

|| !re_node_set_contains (&mctx->state_log[*pidx]->nodes,

1388

dest_node)))

1389

return REG_MISSING;

1390

re_node_set_empty (eps_via_nodes);

1391

return dest_node;

1392

}

1393

}

1394

return REG_MISSING;

1395

}

1396

1397

static reg_errcode_t

1398

internal_function __attribute_warn_unused_result__

1399

push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,

1400

Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)

1401

{

1402

reg_errcode_t err;

1403

Idx num = fs->num++;

1404

if (fs->num == fs->alloc)

1405

{

1406

struct re_fail_stack_ent_t *new_array;

1407

new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)

1408

* fs->alloc * 2));

1409

if (new_array == NULL)

1410

return REG_ESPACE;

1411

fs->alloc *= 2;

1412

fs->stack = new_array;

1413

}

1414

fs->stack[num].idx = str_idx;

1415

fs->stack[num].node = dest_node;

1416

fs->stack[num].regs = re_malloc (regmatch_t, nregs);

1417

if (fs->stack[num].regs == NULL)

1418

return REG_ESPACE;

1419

memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);

1420

err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);

1421

return err;

1422

}

1423

1424

static Idx

1425

internal_function

1426

pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,

1427

regmatch_t *regs, re_node_set *eps_via_nodes)

1428

{

1429

Idx num = --fs->num;

1430

assert (REG_VALID_INDEX (num));

1431

*pidx = fs->stack[num].idx;

1432

memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);

1433

re_node_set_free (eps_via_nodes);

1434

re_free (fs->stack[num].regs);

1435

*eps_via_nodes = fs->stack[num].eps_via_nodes;

1436

return fs->stack[num].node;

1437

}

1438

1439

/* Set the positions where the subexpressions are starts/ends to registers

1440

PMATCH.

1441

Note: We assume that pmatch[0] is already set, and

1442

pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */

1443

1444

static reg_errcode_t

1445

internal_function __attribute_warn_unused_result__

1446

set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,

1447

regmatch_t *pmatch, bool fl_backtrack)

1448

{

1449

const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;

1450

Idx idx, cur_node;

1451

re_node_set eps_via_nodes;

1452

struct re_fail_stack_t *fs;

1453

struct re_fail_stack_t fs_body = { 0, 2, NULL };

1454

regmatch_t *prev_idx_match;

1455

bool prev_idx_match_malloced = false;

1456

1457

#ifdef DEBUG

1458

assert (nmatch > 1);

1459

assert (mctx->state_log != NULL);

1460

#endif

1461

if (fl_backtrack)

1462

{

1463

fs = &fs_body;

1464

fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);

1465

if (fs->stack == NULL)

1466

return REG_ESPACE;

1467

}

1468

else

1469

fs = NULL;

1470

1471

cur_node = dfa->init_node;

1472

re_node_set_init_empty (&eps_via_nodes);

1473

1474

if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))

1475

prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));

1476

else

1477

{

1478

prev_idx_match = re_malloc (regmatch_t, nmatch);

1479

if (prev_idx_match == NULL)

1480

{

1481

free_fail_stack_return (fs);

1482

return REG_ESPACE;

1483

}

1484

prev_idx_match_malloced = true;

1485

}

1486

memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);

1487

1488

for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)

1489

{

1490

update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);

1491

1492

if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)

1493

{

1494

Idx reg_idx;

1495

if (fs)

1496

{

1497

for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)

1498

if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)

1499

break;

1500

if (reg_idx == nmatch)

1501

{

1502

re_node_set_free (&eps_via_nodes);

1503

if (prev_idx_match_malloced)

1504

re_free (prev_idx_match);

1505

return free_fail_stack_return (fs);

1506

}

1507

cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,

1508

&eps_via_nodes);

1509

}

1510

else

1511

{

1512

re_node_set_free (&eps_via_nodes);

1513

if (prev_idx_match_malloced)

1514

re_free (prev_idx_match);

1515

return REG_NOERROR;

1516

}

1517

}

1518

1519

/* Proceed to next node. */

1520

cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,

1521

&eps_via_nodes, fs);

1522

1523

if (BE (! REG_VALID_INDEX (cur_node), 0))

1524

{

1525

if (BE (cur_node == REG_ERROR, 0))

1526

{

1527

re_node_set_free (&eps_via_nodes);

1528

if (prev_idx_match_malloced)

1529

re_free (prev_idx_match);

1530

free_fail_stack_return (fs);

1531

return REG_ESPACE;

1532

}

1533

if (fs)

1534

cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,

1535

&eps_via_nodes);

1536

else

1537

{

1538

re_node_set_free (&eps_via_nodes);

1539

if (prev_idx_match_malloced)

1540

re_free (prev_idx_match);

1541

return REG_NOMATCH;

1542

}

1543

}

1544

}

1545

re_node_set_free (&eps_via_nodes);

1546

if (prev_idx_match_malloced)

1547

re_free (prev_idx_match);

1548

return free_fail_stack_return (fs);

1549

}

1550

1551

static reg_errcode_t

1552

internal_function

1553

free_fail_stack_return (struct re_fail_stack_t *fs)

1554

{

1555

if (fs)

1556

{

1557

Idx fs_idx;

1558

for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)

1559

{

1560

re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);

1561

re_free (fs->stack[fs_idx].regs);

1562

}

1563

re_free (fs->stack);

1564

}

1565

return REG_NOERROR;

1566

}

1567

1568

static void

1569

internal_function

1570

update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,

1571

regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)

1572

{

1573

int type = dfa->nodes[cur_node].type;

1574

if (type == OP_OPEN_SUBEXP)

1575

{

1576

Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;

1577

1578

/* We are at the first node of this sub expression. */

1579

if (reg_num < nmatch)

1580

{

1581

pmatch[reg_num].rm_so = cur_idx;

1582

pmatch[reg_num].rm_eo = -1;

1583

}

1584

}

1585

else if (type == OP_CLOSE_SUBEXP)

1586

{

1587

Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;

1588

if (reg_num < nmatch)

1589

{

1590

/* We are at the last node of this sub expression. */

1591

if (pmatch[reg_num].rm_so < cur_idx)

1592

{

1593

pmatch[reg_num].rm_eo = cur_idx;

1594

/* This is a non-empty match or we are not inside an optional

1595

subexpression. Accept this right away. */

1596

memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);

1597

}

1598

else

1599

{

1600

if (dfa->nodes[cur_node].opt_subexp

1601

&& prev_idx_match[reg_num].rm_so != -1)

1602

/* We transited through an empty match for an optional

1603

subexpression, like (a?)*, and this is not the subexp's

1604

first match. Copy back the old content of the registers

1605

so that matches of an inner subexpression are undone as

1606

well, like in ((a?))*. */

1607

memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);

1608

else

1609

/* We completed a subexpression, but it may be part of

1610

an optional one, so do not update PREV_IDX_MATCH. */

1611

pmatch[reg_num].rm_eo = cur_idx;

1612

}

1613

}

1614

}

1615

}

1616

1617

/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0

1618

and sift the nodes in each states according to the following rules.

1619

Updated state_log will be wrote to STATE_LOG.

1620

1621

Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...

1622

1. When STR_IDX == MATCH_LAST(the last index in the state_log):

1623

If `a' isn't the LAST_NODE and `a' can't epsilon transit to

1624

the LAST_NODE, we throw away the node `a'.

1625

2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts

1626

string `s' and transit to `b':

1627

i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw

1628

away the node `a'.

1629

ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is

1630

thrown away, we throw away the node `a'.

1631

3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':

1632

i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the

1633

node `a'.

1634

ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,

1635

we throw away the node `a'. */

1636

1637

#define STATE_NODE_CONTAINS(state,node) \

1638

((state) != NULL && re_node_set_contains (&(state)->nodes, node))

1639

1640

static reg_errcode_t

1641

internal_function

1642

sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)

1643

{

1644

reg_errcode_t err;

1645

int null_cnt = 0;

1646

Idx str_idx = sctx->last_str_idx;

1647

re_node_set cur_dest;

1648

1649

#ifdef DEBUG

1650

assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);

1651

#endif

1652

1653

/* Build sifted state_log[str_idx]. It has the nodes which can epsilon

1654

transit to the last_node and the last_node itself. */

1655

err = re_node_set_init_1 (&cur_dest, sctx->last_node);

1656

if (BE (err != REG_NOERROR, 0))

1657

return err;

1658

err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);

1659

if (BE (err != REG_NOERROR, 0))

1660

goto free_return;

1661

1662

/* Then check each states in the state_log. */

1663

while (str_idx > 0)

1664

{

1665

/* Update counters. */

1666

null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;

1667

if (null_cnt > mctx->max_mb_elem_len)

1668

{

1669

memset (sctx->sifted_states, '\0',

1670

sizeof (re_dfastate_t *) * str_idx);

1671

re_node_set_free (&cur_dest);

1672

return REG_NOERROR;

1673

}

1674

re_node_set_empty (&cur_dest);

1675

--str_idx;

1676

1677

if (mctx->state_log[str_idx])

1678

{

1679

err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);

1680

if (BE (err != REG_NOERROR, 0))

1681

goto free_return;

1682

}

1683

1684

/* Add all the nodes which satisfy the following conditions:

1685

- It can epsilon transit to a node in CUR_DEST.

1686

- It is in CUR_SRC.

1687

And update state_log. */

1688

err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);

1689

if (BE (err != REG_NOERROR, 0))

1690

goto free_return;

1691

}

1692

err = REG_NOERROR;

1693

free_return:

1694

re_node_set_free (&cur_dest);

1695

return err;

1696

}

1697

1698

static reg_errcode_t

1699

internal_function __attribute_warn_unused_result__

1700

build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,

1701

Idx str_idx, re_node_set *cur_dest)

1702

{

1703

const re_dfa_t *const dfa = mctx->dfa;

1704

const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;

1705

Idx i;

1706

1707

/* Then build the next sifted state.

1708

We build the next sifted state on `cur_dest', and update

1709

`sifted_states[str_idx]' with `cur_dest'.

1710

Note:

1711

`cur_dest' is the sifted state from `state_log[str_idx + 1]'.

1712

`cur_src' points the node_set of the old `state_log[str_idx]'

1713

(with the epsilon nodes pre-filtered out). */

1714

for (i = 0; i < cur_src->nelem; i++)

1715

{

1716

Idx prev_node = cur_src->elems[i];

1717

int naccepted = 0;

1718

bool ok;

1719

1720

#ifdef DEBUG

1721

re_token_type_t type = dfa->nodes[prev_node].type;

1722

assert (!IS_EPSILON_NODE (type));

1723

#endif

1724

#ifdef RE_ENABLE_I18N

1725

/* If the node may accept `multi byte'. */

1726

if (dfa->nodes[prev_node].accept_mb)

1727

naccepted = sift_states_iter_mb (mctx, sctx, prev_node,

1728

str_idx, sctx->last_str_idx);

1729

#endif /* RE_ENABLE_I18N */

1730

1731

/* We don't check backreferences here.

1732

See update_cur_sifted_state(). */

1733

if (!naccepted

1734

&& check_node_accept (mctx, dfa->nodes + prev_node, str_idx)

1735

&& STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],

1736

dfa->nexts[prev_node]))

1737

naccepted = 1;

1738

1739

if (naccepted == 0)

1740

continue;

1741

1742

if (sctx->limits.nelem)

1743

{

1744

Idx to_idx = str_idx + naccepted;

1745

if (check_dst_limits (mctx, &sctx->limits,

1746

dfa->nexts[prev_node], to_idx,

1747

prev_node, str_idx))

1748

continue;

1749

}

1750

ok = re_node_set_insert (cur_dest, prev_node);

1751

if (BE (! ok, 0))

1752

return REG_ESPACE;

1753

}

1754

1755

return REG_NOERROR;

1756

}

1757

1758

/* Helper functions. */

1759

1760

static reg_errcode_t

1761

internal_function

1762

clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)

1763

{

1764

Idx top = mctx->state_log_top;

1765

1766

if (next_state_log_idx >= mctx->input.bufs_len

1767

|| (next_state_log_idx >= mctx->input.valid_len

1768

&& mctx->input.valid_len < mctx->input.len))

1769

{

1770

reg_errcode_t err;

1771

err = extend_buffers (mctx);

1772

if (BE (err != REG_NOERROR, 0))

1773

return err;

1774

}

1775

1776

if (top < next_state_log_idx)

1777

{

1778

memset (mctx->state_log + top + 1, '\0',

1779

sizeof (re_dfastate_t *) * (next_state_log_idx - top));

1780

mctx->state_log_top = next_state_log_idx;

1781

}

1782

return REG_NOERROR;

1783

}

1784

1785

static reg_errcode_t

1786

internal_function

1787

merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,

1788

re_dfastate_t **src, Idx num)

1789

{

1790

Idx st_idx;

1791

reg_errcode_t err;

1792

for (st_idx = 0; st_idx < num; ++st_idx)

1793

{

1794

if (dst[st_idx] == NULL)

1795

dst[st_idx] = src[st_idx];

1796

else if (src[st_idx] != NULL)

1797

{

1798

re_node_set merged_set;

1799

err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,

1800

&src[st_idx]->nodes);

1801

if (BE (err != REG_NOERROR, 0))

1802

return err;

1803

dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);

1804

re_node_set_free (&merged_set);

1805

if (BE (err != REG_NOERROR, 0))

1806

return err;

1807

}

1808

}

1809

return REG_NOERROR;

1810

}

1811

1812

static reg_errcode_t

1813

internal_function

1814

update_cur_sifted_state (const re_match_context_t *mctx,

1815

re_sift_context_t *sctx, Idx str_idx,

1816

re_node_set *dest_nodes)

1817

{

1818

const re_dfa_t *const dfa = mctx->dfa;

1819

reg_errcode_t err = REG_NOERROR;

1820

const re_node_set *candidates;

1821

candidates = ((mctx->state_log[str_idx] == NULL) ? NULL

1822

: &mctx->state_log[str_idx]->nodes);

1823

1824

if (dest_nodes->nelem == 0)

1825

sctx->sifted_states[str_idx] = NULL;

1826

else

1827

{

1828

if (candidates)

1829

{

1830

/* At first, add the nodes which can epsilon transit to a node in

1831

DEST_NODE. */

1832

err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);

1833

if (BE (err != REG_NOERROR, 0))

1834

return err;

1835

1836

/* Then, check the limitations in the current sift_context. */

1837

if (sctx->limits.nelem)

1838

{

1839

err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,

1840

mctx->bkref_ents, str_idx);

1841

if (BE (err != REG_NOERROR, 0))

1842

return err;

1843

}

1844

}

1845

1846

sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);

1847

if (BE (err != REG_NOERROR, 0))

1848

return err;

1849

}

1850

1851

if (candidates && mctx->state_log[str_idx]->has_backref)

1852

{

1853

err = sift_states_bkref (mctx, sctx, str_idx, candidates);

1854

if (BE (err != REG_NOERROR, 0))

1855

return err;

1856

}

1857

return REG_NOERROR;

1858

}

1859

1860

static reg_errcode_t

1861

internal_function __attribute_warn_unused_result__

1862

add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,

1863

const re_node_set *candidates)

1864

{

1865

reg_errcode_t err = REG_NOERROR;

1866

Idx i;

1867

1868

re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);

1869

if (BE (err != REG_NOERROR, 0))

1870

return err;

1871

1872

if (!state->inveclosure.alloc)

1873

{

1874

err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);

1875

if (BE (err != REG_NOERROR, 0))

1876

return REG_ESPACE;

1877

for (i = 0; i < dest_nodes->nelem; i++)

1878

{

1879

err = re_node_set_merge (&state->inveclosure,

1880

dfa->inveclosures + dest_nodes->elems[i]);

1881

if (BE (err != REG_NOERROR, 0))

1882

return REG_ESPACE;

1883

}

1884

}

1885

return re_node_set_add_intersect (dest_nodes, candidates,

1886

&state->inveclosure);

1887

}

1888

1889

static reg_errcode_t

1890

internal_function

1891

sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,

1892

const re_node_set *candidates)

1893

{

1894

Idx ecl_idx;

1895

reg_errcode_t err;

1896

re_node_set *inv_eclosure = dfa->inveclosures + node;

1897

re_node_set except_nodes;

1898

re_node_set_init_empty (&except_nodes);

1899

for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)

1900

{

1901

Idx cur_node = inv_eclosure->elems[ecl_idx];

1902

if (cur_node == node)

1903

continue;

1904

if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))

1905

{

1906

Idx edst1 = dfa->edests[cur_node].elems[0];

1907

Idx edst2 = ((dfa->edests[cur_node].nelem > 1)

1908

? dfa->edests[cur_node].elems[1] : REG_MISSING);

1909

if ((!re_node_set_contains (inv_eclosure, edst1)

1910

&& re_node_set_contains (dest_nodes, edst1))

1911

|| (REG_VALID_NONZERO_INDEX (edst2)

1912

&& !re_node_set_contains (inv_eclosure, edst2)

1913

&& re_node_set_contains (dest_nodes, edst2)))

1914

{

1915

err = re_node_set_add_intersect (&except_nodes, candidates,

1916

dfa->inveclosures + cur_node);

1917

if (BE (err != REG_NOERROR, 0))

1918

{

1919

re_node_set_free (&except_nodes);

1920

return err;

1921

}

1922

}

1923

}

1924

}

1925

for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)

1926

{

1927

Idx cur_node = inv_eclosure->elems[ecl_idx];

1928

if (!re_node_set_contains (&except_nodes, cur_node))

1929

{

1930

Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;

1931

re_node_set_remove_at (dest_nodes, idx);

1932

}

1933

}

1934

re_node_set_free (&except_nodes);

1935

return REG_NOERROR;

1936

}

1937

1938

static bool

1939

internal_function

1940

check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,

1941

Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)

1942

{

1943

const re_dfa_t *const dfa = mctx->dfa;

1944

Idx lim_idx, src_pos, dst_pos;

1945

1946

Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);

1947

Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);

1948

for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)

1949

{

1950

Idx subexp_idx;

1951

struct re_backref_cache_entry *ent;

1952

ent = mctx->bkref_ents + limits->elems[lim_idx];

1953

subexp_idx = dfa->nodes[ent->node].opr.idx;

1954

1955

dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],

1956

subexp_idx, dst_node, dst_idx,

1957

dst_bkref_idx);

1958

src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],

1959

subexp_idx, src_node, src_idx,

1960

src_bkref_idx);

1961

1962

/* In case of:

1963

<src> <dst> ( <subexp> )

1964

( <subexp> ) <src> <dst>

1965

( <subexp1> <src> <subexp2> <dst> <subexp3> ) */

1966

if (src_pos == dst_pos)

1967

continue; /* This is unrelated limitation. */

1968

else

1969

return true;

1970

}

1971

return false;

1972

}

1973

1974

static int

1975

internal_function

1976

check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,

1977

Idx subexp_idx, Idx from_node, Idx bkref_idx)

1978

{

1979

const re_dfa_t *const dfa = mctx->dfa;

1980

const re_node_set *eclosures = dfa->eclosures + from_node;

1981

Idx node_idx;

1982

1983

/* Else, we are on the boundary: examine the nodes on the epsilon

1984

closure. */

1985

for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)

1986

{

1987

Idx node = eclosures->elems[node_idx];

1988

switch (dfa->nodes[node].type)

1989

{

1990

case OP_BACK_REF:

1991

if (bkref_idx != REG_MISSING)

1992

{

1993

struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;

1994

1995

{

1996

Idx dst;

1997

int cpos;

1998

1999

if (ent->node != node)

2000

continue;

2001

2002

if (subexp_idx < BITSET_WORD_BITS

2003

&& !(ent->eps_reachable_subexps_map

2004

& ((bitset_word_t) 1 << subexp_idx)))

2005

continue;

2006

2007

/* Recurse trying to reach the OP_OPEN_SUBEXP and

2008

OP_CLOSE_SUBEXP cases below. But, if the

2009

destination node is the same node as the source

2010

node, don't recurse because it would cause an

2011

infinite loop: a regex that exhibits this behavior

2012

is ()\1*\1* */

2013

dst = dfa->edests[node].elems[0];

2014

if (dst == from_node)

2015

{

2016

if (boundaries & 1)

2017

return -1;

2018

else /* if (boundaries & 2) */

2019

return 0;

2020

}

2021

2022

cpos =

2023

check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,

2024

dst, bkref_idx);

2025

if (cpos == -1 /* && (boundaries & 1) */)

2026

return -1;

2027

if (cpos == 0 && (boundaries & 2))

2028

return 0;

2029

2030

if (subexp_idx < BITSET_WORD_BITS)

2031

ent->eps_reachable_subexps_map

2032

&= ~((bitset_word_t) 1 << subexp_idx);

2033

}

2034

while (ent++->more);

2035

}

2036

break;

2037

2038

case OP_OPEN_SUBEXP:

2039

if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)

2040

return -1;

2041

break;

2042

2043

case OP_CLOSE_SUBEXP:

2044

if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)

2045

return 0;

2046

break;

2047

2048

default:

2049

break;

2050

}

2051

}

2052

2053

return (boundaries & 2) ? 1 : 0;

2054

}

2055

2056

static int

2057

internal_function

2058

check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,

2059

Idx subexp_idx, Idx from_node, Idx str_idx,

2060

Idx bkref_idx)

2061

{

2062

struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;

2063

int boundaries;

2064

2065

/* If we are outside the range of the subexpression, return -1 or 1. */

2066

if (str_idx < lim->subexp_from)

2067

return -1;

2068

2069

if (lim->subexp_to < str_idx)

2070

return 1;

2071

2072

/* If we are within the subexpression, return 0. */

2073

boundaries = (str_idx == lim->subexp_from);

2074

boundaries |= (str_idx == lim->subexp_to) << 1;

2075

if (boundaries == 0)

2076

return 0;

2077

2078

/* Else, examine epsilon closure. */

2079

return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,

2080

from_node, bkref_idx);

2081

}

2082

2083

/* Check the limitations of sub expressions LIMITS, and remove the nodes

2084

which are against limitations from DEST_NODES. */

2085

2086

static reg_errcode_t

2087

internal_function

2088

check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,

2089

const re_node_set *candidates, re_node_set *limits,

2090

struct re_backref_cache_entry *bkref_ents, Idx str_idx)

2091

{

2092

reg_errcode_t err;

2093

Idx node_idx, lim_idx;

2094

2095

for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)

2096

{

2097

Idx subexp_idx;

2098

struct re_backref_cache_entry *ent;

2099

ent = bkref_ents + limits->elems[lim_idx];

2100

2101

if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)

2102

continue; /* This is unrelated limitation. */

2103

2104

subexp_idx = dfa->nodes[ent->node].opr.idx;

2105

if (ent->subexp_to == str_idx)

2106

{

2107

Idx ops_node = REG_MISSING;

2108

Idx cls_node = REG_MISSING;

2109

for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)

2110

{

2111

Idx node = dest_nodes->elems[node_idx];

2112

re_token_type_t type = dfa->nodes[node].type;

2113

if (type == OP_OPEN_SUBEXP

2114

&& subexp_idx == dfa->nodes[node].opr.idx)

2115

ops_node = node;

2116

else if (type == OP_CLOSE_SUBEXP

2117

&& subexp_idx == dfa->nodes[node].opr.idx)

2118

cls_node = node;

2119

}

2120

2121

/* Check the limitation of the open subexpression. */

2122

/* Note that (ent->subexp_to = str_idx != ent->subexp_from). */

2123

if (REG_VALID_INDEX (ops_node))

2124

{

2125

err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,

2126

candidates);

2127

if (BE (err != REG_NOERROR, 0))

2128

return err;

2129

}

2130

2131

/* Check the limitation of the close subexpression. */

2132

if (REG_VALID_INDEX (cls_node))

2133

for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)

2134

{

2135

Idx node = dest_nodes->elems[node_idx];

2136

if (!re_node_set_contains (dfa->inveclosures + node,

2137

cls_node)

2138

&& !re_node_set_contains (dfa->eclosures + node,

2139

cls_node))

2140

{

2141

/* It is against this limitation.

2142

Remove it form the current sifted state. */

2143

err = sub_epsilon_src_nodes (dfa, node, dest_nodes,

2144

candidates);

2145

if (BE (err != REG_NOERROR, 0))

2146

return err;

2147

--node_idx;

2148

}

2149

}

2150

}

2151

else /* (ent->subexp_to != str_idx) */

2152

{

2153

for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)

2154

{

2155

Idx node = dest_nodes->elems[node_idx];

2156

re_token_type_t type = dfa->nodes[node].type;

2157

if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)

2158

{

2159

if (subexp_idx != dfa->nodes[node].opr.idx)

2160

continue;

2161

/* It is against this limitation.

2162

Remove it form the current sifted state. */

2163

err = sub_epsilon_src_nodes (dfa, node, dest_nodes,

2164

candidates);

2165

if (BE (err != REG_NOERROR, 0))

2166

return err;

2167

}

2168

}

2169

}

2170

}

2171

return REG_NOERROR;

2172

}

2173

2174

static reg_errcode_t

2175

internal_function __attribute_warn_unused_result__

2176

sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,

2177

Idx str_idx, const re_node_set *candidates)

2178

{

2179

const re_dfa_t *const dfa = mctx->dfa;

2180

reg_errcode_t err;

2181

Idx node_idx, node;

2182

re_sift_context_t local_sctx;

2183

Idx first_idx = search_cur_bkref_entry (mctx, str_idx);

2184

2185

if (first_idx == REG_MISSING)

2186

return REG_NOERROR;

2187

2188

local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */

2189

2190

for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)

2191

{

2192

Idx enabled_idx;

2193

re_token_type_t type;

2194

struct re_backref_cache_entry *entry;

2195

node = candidates->elems[node_idx];

2196

type = dfa->nodes[node].type;

2197

/* Avoid infinite loop for the REs like "()\1+". */

2198

if (node == sctx->last_node && str_idx == sctx->last_str_idx)

2199

continue;

2200

if (type != OP_BACK_REF)

2201

continue;

2202

2203

entry = mctx->bkref_ents + first_idx;

2204

enabled_idx = first_idx;

2205

2206

{

2207

Idx subexp_len;

2208

Idx to_idx;

2209

Idx dst_node;

2210

bool ok;

2211

re_dfastate_t *cur_state;

2212

2213

if (entry->node != node)

2214

continue;

2215

subexp_len = entry->subexp_to - entry->subexp_from;

2216

to_idx = str_idx + subexp_len;

2217

dst_node = (subexp_len ? dfa->nexts[node]

2218

: dfa->edests[node].elems[0]);

2219

2220

if (to_idx > sctx->last_str_idx

2221

|| sctx->sifted_states[to_idx] == NULL

2222

|| !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)

2223

|| check_dst_limits (mctx, &sctx->limits, node,

2224

str_idx, dst_node, to_idx))

2225

continue;

2226

2227

if (local_sctx.sifted_states == NULL)

2228

{

2229

local_sctx = *sctx;

2230

err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);

2231

if (BE (err != REG_NOERROR, 0))

2232

goto free_return;

2233

}

2234

local_sctx.last_node = node;

2235

local_sctx.last_str_idx = str_idx;

2236

ok = re_node_set_insert (&local_sctx.limits, enabled_idx);

2237

if (BE (! ok, 0))

2238

{

2239

err = REG_ESPACE;

2240

goto free_return;

2241

}

2242

cur_state = local_sctx.sifted_states[str_idx];

2243

err = sift_states_backward (mctx, &local_sctx);

2244

if (BE (err != REG_NOERROR, 0))

2245

goto free_return;

2246

if (sctx->limited_states != NULL)

2247

{

2248

err = merge_state_array (dfa, sctx->limited_states,

2249

local_sctx.sifted_states,

2250

str_idx + 1);

2251

if (BE (err != REG_NOERROR, 0))

2252

goto free_return;

2253

}

2254

local_sctx.sifted_states[str_idx] = cur_state;

2255

re_node_set_remove (&local_sctx.limits, enabled_idx);

2256

2257

/* mctx->bkref_ents may have changed, reload the pointer. */

2258

entry = mctx->bkref_ents + enabled_idx;

2259

}

2260

while (enabled_idx++, entry++->more);

2261

}

2262

err = REG_NOERROR;

2263

free_return:

2264

if (local_sctx.sifted_states != NULL)

2265

{

2266

re_node_set_free (&local_sctx.limits);

2267

}

2268

2269

return err;

2270

}

2271

2272

2273

#ifdef RE_ENABLE_I18N

2274

static int

2275

internal_function

2276

sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,

2277

Idx node_idx, Idx str_idx, Idx max_str_idx)

2278

{

2279

const re_dfa_t *const dfa = mctx->dfa;

2280

int naccepted;

2281

/* Check the node can accept `multi byte'. */

2282

naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);

2283

if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&

2284

!STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],

2285

dfa->nexts[node_idx]))

2286

/* The node can't accept the `multi byte', or the

2287

destination was already thrown away, then the node

2288

could't accept the current input `multi byte'. */

2289

naccepted = 0;

2290

/* Otherwise, it is sure that the node could accept

2291

`naccepted' bytes input. */

2292

return naccepted;

2293

}

2294

#endif /* RE_ENABLE_I18N */

2295

2296

2297

/* Functions for state transition. */

2298

2299

/* Return the next state to which the current state STATE will transit by

2300

accepting the current input byte, and update STATE_LOG if necessary.

2301

If STATE can accept a multibyte char/collating element/back reference

2302

update the destination of STATE_LOG. */

2303

2304

static re_dfastate_t *

2305

internal_function __attribute_warn_unused_result__

2306

transit_state (reg_errcode_t *err, re_match_context_t *mctx,

2307

re_dfastate_t *state)

2308

{

2309

re_dfastate_t **trtable;

2310

unsigned char ch;

2311

2312

#ifdef RE_ENABLE_I18N

2313

/* If the current state can accept multibyte. */

2314

if (BE (state->accept_mb, 0))

2315

{

2316

*err = transit_state_mb (mctx, state);

2317

if (BE (*err != REG_NOERROR, 0))

2318

return NULL;

2319

}

2320

#endif /* RE_ENABLE_I18N */

2321

2322

/* Then decide the next state with the single byte. */

2323

#if 0

2324

if (0)

2325

/* don't use transition table */

2326

return transit_state_sb (err, mctx, state);

2327

#endif

2328

2329

/* Use transition table */

2330

ch = re_string_fetch_byte (&mctx->input);

2331

for (;;)

2332

{

2333

trtable = state->trtable;

2334

if (BE (trtable != NULL, 1))

2335

return trtable[ch];

2336

2337

trtable = state->word_trtable;

2338

if (BE (trtable != NULL, 1))

2339

{

2340

unsigned int context;

2341

context

2342

= re_string_context_at (&mctx->input,

2343

re_string_cur_idx (&mctx->input) - 1,

2344

mctx->eflags);

2345

if (IS_WORD_CONTEXT (context))

2346

return trtable[ch + SBC_MAX];

2347

else

2348

return trtable[ch];

2349

}

2350

2351

if (!build_trtable (mctx->dfa, state))

2352

{

2353

*err = REG_ESPACE;

2354

return NULL;

2355

}

2356

2357

/* Retry, we now have a transition table. */

2358

}

2359

}

2360

2361

/* Update the state_log if we need */

2362

static re_dfastate_t *

2363

internal_function

2364

merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,

2365

re_dfastate_t *next_state)

2366

{

2367

const re_dfa_t *const dfa = mctx->dfa;

2368

Idx cur_idx = re_string_cur_idx (&mctx->input);

2369

2370

if (cur_idx > mctx->state_log_top)

2371

{

2372

mctx->state_log[cur_idx] = next_state;

2373

mctx->state_log_top = cur_idx;

2374

}

2375

else if (mctx->state_log[cur_idx] == 0)

2376

{

2377

mctx->state_log[cur_idx] = next_state;

2378

}

2379

else

2380

{

2381

re_dfastate_t *pstate;

2382

unsigned int context;

2383

re_node_set next_nodes, *log_nodes, *table_nodes = NULL;

2384

/* If (state_log[cur_idx] != 0), it implies that cur_idx is

2385

the destination of a multibyte char/collating element/

2386

back reference. Then the next state is the union set of

2387

these destinations and the results of the transition table. */

2388

pstate = mctx->state_log[cur_idx];

2389

log_nodes = pstate->entrance_nodes;

2390

if (next_state != NULL)

2391

{

2392

table_nodes = next_state->entrance_nodes;

2393

*err = re_node_set_init_union (&next_nodes, table_nodes,

2394

log_nodes);

2395

if (BE (*err != REG_NOERROR, 0))

2396

return NULL;

2397

}

2398

else

2399

next_nodes = *log_nodes;

2400

/* Note: We already add the nodes of the initial state,

2401

then we don't need to add them here. */

2402

2403

context = re_string_context_at (&mctx->input,

2404

re_string_cur_idx (&mctx->input) - 1,

2405

mctx->eflags);

2406

next_state = mctx->state_log[cur_idx]

2407

= re_acquire_state_context (err, dfa, &next_nodes, context);

2408

/* We don't need to check errors here, since the return value of

2409

this function is next_state and ERR is already set. */

2410

2411

if (table_nodes != NULL)

2412

re_node_set_free (&next_nodes);

2413

}

2414

2415

if (BE (dfa->nbackref, 0) && next_state != NULL)

2416

{

2417

/* Check OP_OPEN_SUBEXP in the current state in case that we use them

2418

later. We must check them here, since the back references in the

2419

next state might use them. */

2420

*err = check_subexp_matching_top (mctx, &next_state->nodes,

2421

cur_idx);

2422

if (BE (*err != REG_NOERROR, 0))

2423

return NULL;

2424

2425

/* If the next state has back references. */

2426

if (next_state->has_backref)

2427

{

2428

*err = transit_state_bkref (mctx, &next_state->nodes);

2429

if (BE (*err != REG_NOERROR, 0))

2430

return NULL;

2431

next_state = mctx->state_log[cur_idx];

2432

}

2433

}

2434

2435

return next_state;

2436

}

2437

2438

/* Skip bytes in the input that correspond to part of a

2439

multi-byte match, then look in the log for a state

2440

from which to restart matching. */

2441

static re_dfastate_t *

2442

internal_function

2443

find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)

2444

{

2445

re_dfastate_t *cur_state;

2446

2447

{

2448

Idx max = mctx->state_log_top;

2449

Idx cur_str_idx = re_string_cur_idx (&mctx->input);

2450

2451

2452

{

2453

if (++cur_str_idx > max)

2454

return NULL;

2455

re_string_skip_bytes (&mctx->input, 1);

2456

}

2457

while (mctx->state_log[cur_str_idx] == NULL);

2458

2459

cur_state = merge_state_with_log (err, mctx, NULL);

2460

}

2461

while (*err == REG_NOERROR && cur_state == NULL);

2462

return cur_state;

2463

}

2464

2465

/* Helper functions for transit_state. */

2466

2467

/* From the node set CUR_NODES, pick up the nodes whose types are

2468

OP_OPEN_SUBEXP and which have corresponding back references in the regular

2469

expression. And register them to use them later for evaluating the

2470

correspoding back references. */

2471

2472

static reg_errcode_t

2473

internal_function

2474

check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,

2475

Idx str_idx)

2476

{

2477

const re_dfa_t *const dfa = mctx->dfa;

2478

Idx node_idx;

2479

reg_errcode_t err;

2480

2481

/* TODO: This isn't efficient.

2482

Because there might be more than one nodes whose types are

2483

OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all

2484

nodes.

2485

E.g. RE: (a){2} */

2486

for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)

2487

{

2488

Idx node = cur_nodes->elems[node_idx];

2489

if (dfa->nodes[node].type == OP_OPEN_SUBEXP

2490

&& dfa->nodes[node].opr.idx < BITSET_WORD_BITS

2491

&& (dfa->used_bkref_map

2492

& ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))

2493

{

2494

err = match_ctx_add_subtop (mctx, node, str_idx);

2495

if (BE (err != REG_NOERROR, 0))

2496

return err;

2497

}

2498

}

2499

return REG_NOERROR;

2500

}

2501

2502

#if 0

2503

/* Return the next state to which the current state STATE will transit by

2504

accepting the current input byte. */

2505

2506

static re_dfastate_t *

2507

transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,

2508

re_dfastate_t *state)

2509

{

2510

const re_dfa_t *const dfa = mctx->dfa;

2511

re_node_set next_nodes;

2512

re_dfastate_t *next_state;

2513

Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);

2514

unsigned int context;

2515

2516

*err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);

2517

if (BE (*err != REG_NOERROR, 0))

2518

return NULL;

2519

for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)

2520

{

2521

Idx cur_node = state->nodes.elems[node_cnt];

2522

if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))

2523

{

2524

*err = re_node_set_merge (&next_nodes,

2525

dfa->eclosures + dfa->nexts[cur_node]);

2526

if (BE (*err != REG_NOERROR, 0))

2527

{

2528

re_node_set_free (&next_nodes);

2529

return NULL;

2530

}

2531

}

2532

}

2533

context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);

2534

next_state = re_acquire_state_context (err, dfa, &next_nodes, context);

2535

/* We don't need to check errors here, since the return value of

2536

this function is next_state and ERR is already set. */

2537

2538

re_node_set_free (&next_nodes);

2539

re_string_skip_bytes (&mctx->input, 1);

2540

return next_state;

2541

}

2542

#endif

2543

2544

#ifdef RE_ENABLE_I18N

2545

static reg_errcode_t

2546

internal_function

2547

transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)

2548

{

2549

const re_dfa_t *const dfa = mctx->dfa;

2550

reg_errcode_t err;

2551

Idx i;

2552

2553

for (i = 0; i < pstate->nodes.nelem; ++i)

2554

{

2555

re_node_set dest_nodes, *new_nodes;

2556

Idx cur_node_idx = pstate->nodes.elems[i];

2557

int naccepted;

2558

Idx dest_idx;

2559

unsigned int context;

2560

re_dfastate_t *dest_state;

2561

2562

if (!dfa->nodes[cur_node_idx].accept_mb)

2563

continue;

2564

2565

if (dfa->nodes[cur_node_idx].constraint)

2566

{

2567

context = re_string_context_at (&mctx->input,

2568

re_string_cur_idx (&mctx->input),

2569

mctx->eflags);

2570

if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,

2571

context))

2572

continue;

2573

}

2574

2575

/* How many bytes the node can accept? */

2576

naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,

2577

re_string_cur_idx (&mctx->input));

2578

if (naccepted == 0)

2579

continue;

2580

2581

/* The node can accepts `naccepted' bytes. */

2582

dest_idx = re_string_cur_idx (&mctx->input) + naccepted;

2583

mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted

2584

: mctx->max_mb_elem_len);

2585

err = clean_state_log_if_needed (mctx, dest_idx);

2586

if (BE (err != REG_NOERROR, 0))

2587

return err;

2588

#ifdef DEBUG

2589

assert (dfa->nexts[cur_node_idx] != REG_MISSING);

2590

#endif

2591

new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];

2592

2593

dest_state = mctx->state_log[dest_idx];

2594

if (dest_state == NULL)

2595

dest_nodes = *new_nodes;

2596

else

2597

{

2598

err = re_node_set_init_union (&dest_nodes,

2599

dest_state->entrance_nodes, new_nodes);

2600

if (BE (err != REG_NOERROR, 0))

2601

return err;

2602

}

2603

context = re_string_context_at (&mctx->input, dest_idx - 1,

2604

mctx->eflags);

2605

mctx->state_log[dest_idx]

2606

= re_acquire_state_context (&err, dfa, &dest_nodes, context);

2607

if (dest_state != NULL)

2608

re_node_set_free (&dest_nodes);

2609

if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))

2610

return err;

2611

}

2612

return REG_NOERROR;

2613

}

2614

#endif /* RE_ENABLE_I18N */

2615

2616

static reg_errcode_t

2617

internal_function

2618

transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)

2619

{

2620

const re_dfa_t *const dfa = mctx->dfa;

2621

reg_errcode_t err;

2622

Idx i;

2623

Idx cur_str_idx = re_string_cur_idx (&mctx->input);

2624

2625

for (i = 0; i < nodes->nelem; ++i)

2626

{

2627

Idx dest_str_idx, prev_nelem, bkc_idx;

2628

Idx node_idx = nodes->elems[i];

2629

unsigned int context;

2630

const re_token_t *node = dfa->nodes + node_idx;

2631

re_node_set *new_dest_nodes;

2632

2633

/* Check whether `node' is a backreference or not. */

2634

if (node->type != OP_BACK_REF)

2635

continue;

2636

2637

if (node->constraint)

2638

{

2639

context = re_string_context_at (&mctx->input, cur_str_idx,

2640

mctx->eflags);

2641

if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))

2642

continue;

2643

}

2644

2645

/* `node' is a backreference.

2646

Check the substring which the substring matched. */

2647

bkc_idx = mctx->nbkref_ents;

2648

err = get_subexp (mctx, node_idx, cur_str_idx);

2649

if (BE (err != REG_NOERROR, 0))

2650

goto free_return;

2651

2652

/* And add the epsilon closures (which is `new_dest_nodes') of

2653

the backreference to appropriate state_log. */

2654

#ifdef DEBUG

2655

assert (dfa->nexts[node_idx] != REG_MISSING);

2656

#endif

2657

for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)

2658

{

2659

Idx subexp_len;

2660

re_dfastate_t *dest_state;

2661

struct re_backref_cache_entry *bkref_ent;

2662

bkref_ent = mctx->bkref_ents + bkc_idx;

2663

if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)

2664

continue;

2665

subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;

2666

new_dest_nodes = (subexp_len == 0

2667

? dfa->eclosures + dfa->edests[node_idx].elems[0]

2668

: dfa->eclosures + dfa->nexts[node_idx]);

2669

dest_str_idx = (cur_str_idx + bkref_ent->subexp_to

2670

- bkref_ent->subexp_from);

2671

context = re_string_context_at (&mctx->input, dest_str_idx - 1,

2672

mctx->eflags);

2673

dest_state = mctx->state_log[dest_str_idx];

2674

prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0

2675

: mctx->state_log[cur_str_idx]->nodes.nelem);

2676

/* Add `new_dest_node' to state_log. */

2677

if (dest_state == NULL)

2678

{

2679

mctx->state_log[dest_str_idx]

2680

= re_acquire_state_context (&err, dfa, new_dest_nodes,

2681

context);

2682

if (BE (mctx->state_log[dest_str_idx] == NULL

2683

&& err != REG_NOERROR, 0))

2684

goto free_return;

2685

}

2686

else

2687

{

2688

re_node_set dest_nodes;

2689

err = re_node_set_init_union (&dest_nodes,

2690

dest_state->entrance_nodes,

2691

new_dest_nodes);

2692

if (BE (err != REG_NOERROR, 0))

2693

{

2694

re_node_set_free (&dest_nodes);

2695

goto free_return;

2696

}

2697

mctx->state_log[dest_str_idx]

2698

= re_acquire_state_context (&err, dfa, &dest_nodes, context);

2699

re_node_set_free (&dest_nodes);

2700

if (BE (mctx->state_log[dest_str_idx] == NULL

2701

&& err != REG_NOERROR, 0))

2702

goto free_return;

2703

}

2704

/* We need to check recursively if the backreference can epsilon

2705

transit. */

2706

if (subexp_len == 0

2707

&& mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)

2708

{

2709

err = check_subexp_matching_top (mctx, new_dest_nodes,

2710

cur_str_idx);

2711

if (BE (err != REG_NOERROR, 0))

2712

goto free_return;

2713

err = transit_state_bkref (mctx, new_dest_nodes);

2714

if (BE (err != REG_NOERROR, 0))

2715

goto free_return;

2716

}

2717

}

2718

}

2719

err = REG_NOERROR;

2720

free_return:

2721

return err;

2722

}

2723

2724

/* Enumerate all the candidates which the backreference BKREF_NODE can match

2725

at BKREF_STR_IDX, and register them by match_ctx_add_entry().

2726

Note that we might collect inappropriate candidates here.

2727

However, the cost of checking them strictly here is too high, then we

2728

delay these checking for prune_impossible_nodes(). */

2729

2730

static reg_errcode_t

2731

internal_function __attribute_warn_unused_result__

2732

get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)

2733

{

2734

const re_dfa_t *const dfa = mctx->dfa;

2735

Idx subexp_num, sub_top_idx;

2736

const char *buf = (const char *) re_string_get_buffer (&mctx->input);

2737

/* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */

2738

Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);

2739

if (cache_idx != REG_MISSING)

2740

{

2741

const struct re_backref_cache_entry *entry

2742

= mctx->bkref_ents + cache_idx;

2743

2744

if (entry->node == bkref_node)

2745

return REG_NOERROR; /* We already checked it. */

2746

while (entry++->more);

2747

}

2748

2749

subexp_num = dfa->nodes[bkref_node].opr.idx;

2750

2751

/* For each sub expression */

2752

for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)

2753

{

2754

reg_errcode_t err;

2755

re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];

2756

re_sub_match_last_t *sub_last;

2757

Idx sub_last_idx, sl_str, bkref_str_off;

2758

2759

if (dfa->nodes[sub_top->node].opr.idx != subexp_num)

2760

continue; /* It isn't related. */

2761

2762

sl_str = sub_top->str_idx;

2763

bkref_str_off = bkref_str_idx;

2764

/* At first, check the last node of sub expressions we already

2765

evaluated. */

2766

for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)

2767

{

2768

regoff_t sl_str_diff;

2769

sub_last = sub_top->lasts[sub_last_idx];

2770

sl_str_diff = sub_last->str_idx - sl_str;

2771

/* The matched string by the sub expression match with the substring

2772

at the back reference? */

2773

if (sl_str_diff > 0)

2774

{

2775

if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))

2776

{

2777

/* Not enough chars for a successful match. */

2778

if (bkref_str_off + sl_str_diff > mctx->input.len)

2779

break;

2780

2781

err = clean_state_log_if_needed (mctx,

2782

bkref_str_off

2783

+ sl_str_diff);

2784

if (BE (err != REG_NOERROR, 0))

2785

return err;

2786

buf = (const char *) re_string_get_buffer (&mctx->input);

2787

}

2788

if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)

2789

/* We don't need to search this sub expression any more. */

2790

break;

2791

}

2792

bkref_str_off += sl_str_diff;

2793

sl_str += sl_str_diff;

2794

err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,

2795

bkref_str_idx);

2796

2797

/* Reload buf, since the preceding call might have reallocated

2798

the buffer. */

2799

buf = (const char *) re_string_get_buffer (&mctx->input);

2800

2801

if (err == REG_NOMATCH)

2802

continue;

2803

if (BE (err != REG_NOERROR, 0))

2804

return err;

2805

}

2806

2807

if (sub_last_idx < sub_top->nlasts)

2808

continue;

2809

if (sub_last_idx > 0)

2810

++sl_str;

2811

/* Then, search for the other last nodes of the sub expression. */

2812

for (; sl_str <= bkref_str_idx; ++sl_str)

2813

{

2814

Idx cls_node;

2815

regoff_t sl_str_off;

2816

const re_node_set *nodes;

2817

sl_str_off = sl_str - sub_top->str_idx;

2818

/* The matched string by the sub expression match with the substring

2819

at the back reference? */

2820

if (sl_str_off > 0)

2821

{

2822

if (BE (bkref_str_off >= mctx->input.valid_len, 0))

2823

{

2824

/* If we are at the end of the input, we cannot match. */

2825

if (bkref_str_off >= mctx->input.len)

2826

break;

2827

2828

err = extend_buffers (mctx);

2829

if (BE (err != REG_NOERROR, 0))

2830

return err;

2831

2832

buf = (const char *) re_string_get_buffer (&mctx->input);

2833

}

2834

if (buf [bkref_str_off++] != buf[sl_str - 1])

2835

break; /* We don't need to search this sub expression

2836

any more. */

2837

}

2838

if (mctx->state_log[sl_str] == NULL)

2839

continue;

2840

/* Does this state have a ')' of the sub expression? */

2841

nodes = &mctx->state_log[sl_str]->nodes;

2842

cls_node = find_subexp_node (dfa, nodes, subexp_num,

2843

OP_CLOSE_SUBEXP);

2844

if (cls_node == REG_MISSING)

2845

continue; /* No. */

2846

if (sub_top->path == NULL)

2847

{

2848

sub_top->path = calloc (sizeof (state_array_t),

2849

sl_str - sub_top->str_idx + 1);

2850

if (sub_top->path == NULL)

2851

return REG_ESPACE;

2852

}

2853

/* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node

2854

in the current context? */

2855

err = check_arrival (mctx, sub_top->path, sub_top->node,

2856

sub_top->str_idx, cls_node, sl_str,

2857

OP_CLOSE_SUBEXP);

2858

if (err == REG_NOMATCH)

2859

continue;

2860

if (BE (err != REG_NOERROR, 0))

2861

return err;

2862

sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);

2863

if (BE (sub_last == NULL, 0))

2864

return REG_ESPACE;

2865

err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,

2866

bkref_str_idx);

2867

if (err == REG_NOMATCH)

2868

continue;

2869

}

2870

}

2871

return REG_NOERROR;

2872

}

2873

2874

/* Helper functions for get_subexp(). */

2875

2876

/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.

2877

If it can arrive, register the sub expression expressed with SUB_TOP

2878

and SUB_LAST. */

2879

2880

static reg_errcode_t

2881

internal_function

2882

get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,

2883

re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)

2884

{

2885

reg_errcode_t err;

2886

Idx to_idx;

2887

/* Can the subexpression arrive the back reference? */

2888

err = check_arrival (mctx, &sub_last->path, sub_last->node,

2889

sub_last->str_idx, bkref_node, bkref_str,

2890

OP_OPEN_SUBEXP);

2891

if (err != REG_NOERROR)

2892

return err;

2893

err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,

2894

sub_last->str_idx);

2895

if (BE (err != REG_NOERROR, 0))

2896

return err;

2897

to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;

2898

return clean_state_log_if_needed (mctx, to_idx);

2899

}

2900

2901

/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.

2902

Search '(' if FL_OPEN, or search ')' otherwise.

2903

TODO: This function isn't efficient...

2904

Because there might be more than one nodes whose types are

2905

OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all

2906

nodes.

2907

E.g. RE: (a){2} */

2908

2909

static Idx

2910

internal_function

2911

find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,

2912

Idx subexp_idx, int type)

2913

{

2914

Idx cls_idx;

2915

for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)

2916

{

2917

Idx cls_node = nodes->elems[cls_idx];

2918

const re_token_t *node = dfa->nodes + cls_node;

2919

if (node->type == type

2920

&& node->opr.idx == subexp_idx)

2921

return cls_node;

2922

}

2923

return REG_MISSING;

2924

}

2925

2926

/* Check whether the node TOP_NODE at TOP_STR can arrive to the node

2927

LAST_NODE at LAST_STR. We record the path onto PATH since it will be

2928

heavily reused.

2929

Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */

2930

2931

static reg_errcode_t

2932

internal_function __attribute_warn_unused_result__

2933

check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,

2934

Idx top_str, Idx last_node, Idx last_str, int type)

2935

{

2936

const re_dfa_t *const dfa = mctx->dfa;

2937

reg_errcode_t err = REG_NOERROR;

2938

Idx subexp_num, backup_cur_idx, str_idx, null_cnt;

2939

re_dfastate_t *cur_state = NULL;

2940

re_node_set *cur_nodes, next_nodes;

2941

re_dfastate_t **backup_state_log;

2942

unsigned int context;

2943

2944

subexp_num = dfa->nodes[top_node].opr.idx;

2945

/* Extend the buffer if we need. */

2946

if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))

2947

{

2948

re_dfastate_t **new_array;

2949

Idx old_alloc = path->alloc;

2950

Idx new_alloc = old_alloc + last_str + mctx->max_mb_elem_len + 1;

2951

if (BE (new_alloc < old_alloc, 0)

2952

|| BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0))

2953

return REG_ESPACE;

2954

new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);

2955

if (BE (new_array == NULL, 0))

2956

return REG_ESPACE;

2957

path->array = new_array;

2958

path->alloc = new_alloc;

2959

memset (new_array + old_alloc, '\0',

2960

sizeof (re_dfastate_t *) * (path->alloc - old_alloc));

2961

}

2962

2963

str_idx = path->next_idx ? path->next_idx : top_str;

2964

2965

/* Temporary modify MCTX. */

2966

backup_state_log = mctx->state_log;

2967

backup_cur_idx = mctx->input.cur_idx;

2968

mctx->state_log = path->array;

2969

mctx->input.cur_idx = str_idx;

2970

2971

/* Setup initial node set. */

2972

context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);

2973

if (str_idx == top_str)

2974

{

2975

err = re_node_set_init_1 (&next_nodes, top_node);

2976

if (BE (err != REG_NOERROR, 0))

2977

return err;

2978

err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);

2979

if (BE (err != REG_NOERROR, 0))

2980

{

2981

re_node_set_free (&next_nodes);

2982

return err;

2983

}

2984

}

2985

else

2986

{

2987

cur_state = mctx->state_log[str_idx];

2988

if (cur_state && cur_state->has_backref)

2989

{

2990

err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);

2991

if (BE (err != REG_NOERROR, 0))

2992

return err;

2993

}

2994

else

2995

re_node_set_init_empty (&next_nodes);

2996

}

2997

if (str_idx == top_str || (cur_state && cur_state->has_backref))

2998

{

2999

if (next_nodes.nelem)

3000

{

3001

err = expand_bkref_cache (mctx, &next_nodes, str_idx,

3002

subexp_num, type);

3003

if (BE (err != REG_NOERROR, 0))

3004

{

3005

re_node_set_free (&next_nodes);

3006

return err;

3007

}

3008

}

3009

cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);

3010

if (BE (cur_state == NULL && err != REG_NOERROR, 0))

3011

{

3012

re_node_set_free (&next_nodes);

3013

return err;

3014

}

3015

mctx->state_log[str_idx] = cur_state;

3016

}

3017

3018

for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)

3019

{

3020

re_node_set_empty (&next_nodes);

3021

if (mctx->state_log[str_idx + 1])

3022

{

3023

err = re_node_set_merge (&next_nodes,

3024

&mctx->state_log[str_idx + 1]->nodes);

3025

if (BE (err != REG_NOERROR, 0))

3026

{

3027

re_node_set_free (&next_nodes);

3028

return err;

3029

}

3030

}

3031

if (cur_state)

3032

{

3033

err = check_arrival_add_next_nodes (mctx, str_idx,

3034

&cur_state->non_eps_nodes,

3035

&next_nodes);

3036

if (BE (err != REG_NOERROR, 0))

3037

{

3038

re_node_set_free (&next_nodes);

3039

return err;

3040

}

3041

}

3042

++str_idx;

3043

if (next_nodes.nelem)

3044

{

3045

err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);

3046

if (BE (err != REG_NOERROR, 0))

3047

{

3048

re_node_set_free (&next_nodes);

3049

return err;

3050

}

3051

err = expand_bkref_cache (mctx, &next_nodes, str_idx,

3052

subexp_num, type);

3053

if (BE (err != REG_NOERROR, 0))

3054

{

3055

re_node_set_free (&next_nodes);

3056

return err;

3057

}

3058

}

3059

context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);

3060

cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);

3061

if (BE (cur_state == NULL && err != REG_NOERROR, 0))

3062

{

3063

re_node_set_free (&next_nodes);

3064

return err;

3065

}

3066

mctx->state_log[str_idx] = cur_state;

3067

null_cnt = cur_state == NULL ? null_cnt + 1 : 0;

3068

}

3069

re_node_set_free (&next_nodes);

3070

cur_nodes = (mctx->state_log[last_str] == NULL ? NULL

3071

: &mctx->state_log[last_str]->nodes);

3072

path->next_idx = str_idx;

3073

3074

/* Fix MCTX. */

3075

mctx->state_log = backup_state_log;

3076

mctx->input.cur_idx = backup_cur_idx;

3077

3078

/* Then check the current node set has the node LAST_NODE. */

3079

if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))

3080

return REG_NOERROR;

3081

3082

return REG_NOMATCH;

3083

}

3084

3085

/* Helper functions for check_arrival. */

3086

3087

/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them

3088

to NEXT_NODES.

3089

TODO: This function is similar to the functions transit_state*(),

3090

however this function has many additional works.

3091

Can't we unify them? */

3092

3093

static reg_errcode_t

3094

internal_function __attribute_warn_unused_result__

3095

check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,

3096

re_node_set *cur_nodes, re_node_set *next_nodes)

3097

{

3098

const re_dfa_t *const dfa = mctx->dfa;

3099

bool ok;

3100

Idx cur_idx;

3101

#ifdef RE_ENABLE_I18N

3102

reg_errcode_t err = REG_NOERROR;

3103

#endif

3104

re_node_set union_set;

3105

re_node_set_init_empty (&union_set);

3106

for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)

3107

{

3108

int naccepted = 0;

3109

Idx cur_node = cur_nodes->elems[cur_idx];

3110

#ifdef DEBUG

3111

re_token_type_t type = dfa->nodes[cur_node].type;

3112

assert (!IS_EPSILON_NODE (type));

3113

#endif

3114

#ifdef RE_ENABLE_I18N

3115

/* If the node may accept `multi byte'. */

3116

if (dfa->nodes[cur_node].accept_mb)

3117

{

3118

naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,

3119

str_idx);

3120

if (naccepted > 1)

3121

{

3122

re_dfastate_t *dest_state;

3123

Idx next_node = dfa->nexts[cur_node];

3124

Idx next_idx = str_idx + naccepted;

3125

dest_state = mctx->state_log[next_idx];

3126

re_node_set_empty (&union_set);

3127

if (dest_state)

3128

{

3129

err = re_node_set_merge (&union_set, &dest_state->nodes);

3130

if (BE (err != REG_NOERROR, 0))

3131

{

3132

re_node_set_free (&union_set);

3133

return err;

3134

}

3135

}

3136

ok = re_node_set_insert (&union_set, next_node);

3137

if (BE (! ok, 0))

3138

{

3139

re_node_set_free (&union_set);

3140

return REG_ESPACE;

3141

}

3142

mctx->state_log[next_idx] = re_acquire_state (&err, dfa,

3143

&union_set);

3144

if (BE (mctx->state_log[next_idx] == NULL

3145

&& err != REG_NOERROR, 0))

3146

{

3147

re_node_set_free (&union_set);

3148

return err;

3149

}

3150

}

3151

}

3152

#endif /* RE_ENABLE_I18N */

3153

if (naccepted

3154

|| check_node_accept (mctx, dfa->nodes + cur_node, str_idx))

3155

{

3156

ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);

3157

if (BE (! ok, 0))

3158

{

3159

re_node_set_free (&union_set);

3160

return REG_ESPACE;

3161

}

3162

}

3163

}

3164

re_node_set_free (&union_set);

3165

return REG_NOERROR;

3166

}

3167

3168

/* For all the nodes in CUR_NODES, add the epsilon closures of them to

3169

CUR_NODES, however exclude the nodes which are:

3170

- inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.

3171

- out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.

3172

3173

3174

static reg_errcode_t

3175

internal_function

3176

check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,

3177

Idx ex_subexp, int type)

3178

{

3179

reg_errcode_t err;

3180

Idx idx, outside_node;

3181

re_node_set new_nodes;

3182

#ifdef DEBUG

3183

assert (cur_nodes->nelem);

3184

#endif

3185

err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);

3186

if (BE (err != REG_NOERROR, 0))

3187

return err;

3188

/* Create a new node set NEW_NODES with the nodes which are epsilon

3189

closures of the node in CUR_NODES. */

3190

3191

for (idx = 0; idx < cur_nodes->nelem; ++idx)

3192

{

3193

Idx cur_node = cur_nodes->elems[idx];

3194

const re_node_set *eclosure = dfa->eclosures + cur_node;

3195

outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);

3196

if (outside_node == REG_MISSING)

3197

{

3198

/* There are no problematic nodes, just merge them. */

3199

err = re_node_set_merge (&new_nodes, eclosure);

3200

if (BE (err != REG_NOERROR, 0))

3201

{

3202

re_node_set_free (&new_nodes);

3203

return err;

3204

}

3205

}

3206

else

3207

{

3208

/* There are problematic nodes, re-calculate incrementally. */

3209

err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,

3210

ex_subexp, type);

3211

if (BE (err != REG_NOERROR, 0))

3212

{

3213

re_node_set_free (&new_nodes);

3214

return err;

3215

}

3216

}

3217

}

3218

re_node_set_free (cur_nodes);

3219

*cur_nodes = new_nodes;

3220

return REG_NOERROR;

3221

}

3222

3223

/* Helper function for check_arrival_expand_ecl.

3224

Check incrementally the epsilon closure of TARGET, and if it isn't

3225

problematic append it to DST_NODES. */

3226

3227

static reg_errcode_t

3228

internal_function __attribute_warn_unused_result__

3229

check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,

3230

Idx target, Idx ex_subexp, int type)

3231

{

3232

Idx cur_node;

3233

for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)

3234

{

3235

bool ok;

3236

3237

if (dfa->nodes[cur_node].type == type

3238

&& dfa->nodes[cur_node].opr.idx == ex_subexp)

3239

{

3240

if (type == OP_CLOSE_SUBEXP)

3241

{

3242

ok = re_node_set_insert (dst_nodes, cur_node);

3243

if (BE (! ok, 0))

3244

return REG_ESPACE;

3245

}

3246

break;

3247

}

3248

ok = re_node_set_insert (dst_nodes, cur_node);

3249

if (BE (! ok, 0))

3250

return REG_ESPACE;

3251

if (dfa->edests[cur_node].nelem == 0)

3252

break;

3253

if (dfa->edests[cur_node].nelem == 2)

3254

{

3255

reg_errcode_t err;

3256

err = check_arrival_expand_ecl_sub (dfa, dst_nodes,

3257

dfa->edests[cur_node].elems[1],

3258

ex_subexp, type);

3259

if (BE (err != REG_NOERROR, 0))

3260

return err;

3261

}

3262

cur_node = dfa->edests[cur_node].elems[0];

3263

}

3264

return REG_NOERROR;

3265

}

3266

3267

3268

/* For all the back references in the current state, calculate the

3269

destination of the back references by the appropriate entry

3270

in MCTX->BKREF_ENTS. */

3271

3272

static reg_errcode_t

3273

internal_function __attribute_warn_unused_result__

3274

expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,

3275

Idx cur_str, Idx subexp_num, int type)

3276

{

3277

const re_dfa_t *const dfa = mctx->dfa;

3278

reg_errcode_t err;

3279

Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);

3280

struct re_backref_cache_entry *ent;

3281

3282

if (cache_idx_start == REG_MISSING)

3283

return REG_NOERROR;

3284

3285

restart:

3286

ent = mctx->bkref_ents + cache_idx_start;

3287

3288

{

3289

Idx to_idx, next_node;

3290

3291

/* Is this entry ENT is appropriate? */

3292

if (!re_node_set_contains (cur_nodes, ent->node))

3293

continue; /* No. */

3294

3295

to_idx = cur_str + ent->subexp_to - ent->subexp_from;

3296

/* Calculate the destination of the back reference, and append it

3297

to MCTX->STATE_LOG. */

3298

if (to_idx == cur_str)

3299

{

3300

/* The backreference did epsilon transit, we must re-check all the

3301

node in the current state. */

3302

re_node_set new_dests;

3303

reg_errcode_t err2, err3;

3304

next_node = dfa->edests[ent->node].elems[0];

3305

if (re_node_set_contains (cur_nodes, next_node))

3306

continue;

3307

err = re_node_set_init_1 (&new_dests, next_node);

3308

err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);

3309

err3 = re_node_set_merge (cur_nodes, &new_dests);

3310

re_node_set_free (&new_dests);

3311

if (BE (err != REG_NOERROR || err2 != REG_NOERROR

3312

|| err3 != REG_NOERROR, 0))

3313

{

3314

err = (err != REG_NOERROR ? err

3315

: (err2 != REG_NOERROR ? err2 : err3));

3316

return err;

3317

}

3318

/* TODO: It is still inefficient... */

3319

goto restart;

3320

}

3321

else

3322

{

3323

re_node_set union_set;

3324

next_node = dfa->nexts[ent->node];

3325

if (mctx->state_log[to_idx])

3326

{

3327

bool ok;

3328

if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,

3329

next_node))

3330

continue;

3331

err = re_node_set_init_copy (&union_set,

3332

&mctx->state_log[to_idx]->nodes);

3333

ok = re_node_set_insert (&union_set, next_node);

3334

if (BE (err != REG_NOERROR || ! ok, 0))

3335

{

3336

re_node_set_free (&union_set);

3337

err = err != REG_NOERROR ? err : REG_ESPACE;

3338

return err;

3339

}

3340

}

3341

else

3342

{

3343

err = re_node_set_init_1 (&union_set, next_node);

3344

if (BE (err != REG_NOERROR, 0))

3345

return err;

3346

}

3347

mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);

3348

re_node_set_free (&union_set);

3349

if (BE (mctx->state_log[to_idx] == NULL

3350

&& err != REG_NOERROR, 0))

3351

return err;

3352

}

3353

}

3354

while (ent++->more);

3355

return REG_NOERROR;

3356

}

3357

3358

/* Build transition table for the state.

3359

Return true if successful. */

3360

3361

static bool

3362

internal_function

3363

build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)

3364

{

3365

reg_errcode_t err;

3366

Idx i, j;

3367

int ch;

3368

bool need_word_trtable = false;

3369

bitset_word_t elem, mask;

3370

bool dests_node_malloced = false;

3371

bool dest_states_malloced = false;

3372

Idx ndests; /* Number of the destination states from `state'. */

3373

re_dfastate_t **trtable;

3374

re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;

3375

re_node_set follows, *dests_node;

3376

bitset_t *dests_ch;

3377

bitset_t acceptable;

3378

3379

struct dests_alloc

3380

{

3381

re_node_set dests_node[SBC_MAX];

3382

bitset_t dests_ch[SBC_MAX];

3383

} *dests_alloc;

3384

3385

/* We build DFA states which corresponds to the destination nodes

3386

from `state'. `dests_node[i]' represents the nodes which i-th

3387

destination state contains, and `dests_ch[i]' represents the

3388

characters which i-th destination state accepts. */

3389

if (__libc_use_alloca (sizeof (struct dests_alloc)))

3390

dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));

3391

else

3392

{

3393

dests_alloc = re_malloc (struct dests_alloc, 1);

3394

if (BE (dests_alloc == NULL, 0))

3395

return false;

3396

dests_node_malloced = true;

3397

}

3398

dests_node = dests_alloc->dests_node;

3399

dests_ch = dests_alloc->dests_ch;

3400

3401

/* Initialize transiton table. */

3402

state->word_trtable = state->trtable = NULL;

3403

3404

/* At first, group all nodes belonging to `state' into several

3405

destinations. */

3406

ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);

3407

if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0))

3408

{

3409

if (dests_node_malloced)

3410

free (dests_alloc);

3411

if (ndests == 0)

3412

{

3413

state->trtable = (re_dfastate_t **)

3414

calloc (sizeof (re_dfastate_t *), SBC_MAX);

3415

return true;

3416

}

3417

return false;

3418

}

3419

3420

err = re_node_set_alloc (&follows, ndests + 1);

3421

if (BE (err != REG_NOERROR, 0))

3422

goto out_free;

3423

3424

/* Avoid arithmetic overflow in size calculation. */

3425

if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)

3426

/ (3 * sizeof (re_dfastate_t *)))

3427

< ndests),

3428

0))

3429

goto out_free;

3430

3431

if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX

3432

+ ndests * 3 * sizeof (re_dfastate_t *)))

3433

dest_states = (re_dfastate_t **)

3434

alloca (ndests * 3 * sizeof (re_dfastate_t *));

3435

else

3436

{

3437

dest_states = (re_dfastate_t **)

3438

malloc (ndests * 3 * sizeof (re_dfastate_t *));

3439

if (BE (dest_states == NULL, 0))

3440

{

3441

out_free:

3442

if (dest_states_malloced)

3443

free (dest_states);

3444

re_node_set_free (&follows);

3445

for (i = 0; i < ndests; ++i)

3446

re_node_set_free (dests_node + i);

3447

if (dests_node_malloced)

3448

free (dests_alloc);

3449

return false;

3450

}

3451

dest_states_malloced = true;

3452

}

3453

dest_states_word = dest_states + ndests;

3454

dest_states_nl = dest_states_word + ndests;

3455

bitset_empty (acceptable);

3456

3457

/* Then build the states for all destinations. */

3458

for (i = 0; i < ndests; ++i)

3459

{

3460

Idx next_node;

3461

re_node_set_empty (&follows);

3462

/* Merge the follows of this destination states. */

3463

for (j = 0; j < dests_node[i].nelem; ++j)

3464

{

3465

next_node = dfa->nexts[dests_node[i].elems[j]];

3466

if (next_node != REG_MISSING)

3467

{

3468

err = re_node_set_merge (&follows, dfa->eclosures + next_node);

3469

if (BE (err != REG_NOERROR, 0))

3470

goto out_free;

3471

}

3472

}

3473

dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);

3474

if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))

3475

goto out_free;

3476

/* If the new state has context constraint,

3477

build appropriate states for these contexts. */

3478

if (dest_states[i]->has_constraint)

3479

{

3480

dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,

3481

CONTEXT_WORD);

3482

if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))

3483

goto out_free;

3484

3485

if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)

3486

need_word_trtable = true;

3487

3488

dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,

3489

CONTEXT_NEWLINE);

3490

if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))

3491

goto out_free;

3492

}

3493

else

3494

{

3495

dest_states_word[i] = dest_states[i];

3496

dest_states_nl[i] = dest_states[i];

3497

}

3498

bitset_merge (acceptable, dests_ch[i]);

3499

}

3500

3501

if (!BE (need_word_trtable, 0))

3502

{

3503

/* We don't care about whether the following character is a word

3504

character, or we are in a single-byte character set so we can

3505

discern by looking at the character code: allocate a

3506

256-entry transition table. */

3507

trtable = state->trtable =

3508

(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);

3509

if (BE (trtable == NULL, 0))

3510

goto out_free;

3511

3512

/* For all characters ch...: */

3513

for (i = 0; i < BITSET_WORDS; ++i)

3514

for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;

3515

elem;

3516

mask <<= 1, elem >>= 1, ++ch)

3517

if (BE (elem & 1, 0))

3518

{

3519

/* There must be exactly one destination which accepts

3520

character ch. See group_nodes_into_DFAstates. */

3521

for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)

3522

;

3523

3524

/* j-th destination accepts the word character ch. */

3525

if (dfa->word_char[i] & mask)

3526

trtable[ch] = dest_states_word[j];

3527

else

3528

trtable[ch] = dest_states[j];

3529

}

3530

}

3531

else

3532

{

3533

/* We care about whether the following character is a word

3534

character, and we are in a multi-byte character set: discern

3535

by looking at the character code: build two 256-entry

3536

transition tables, one starting at trtable[0] and one

3537

starting at trtable[SBC_MAX]. */

3538

trtable = state->word_trtable =

3539

(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);

3540

if (BE (trtable == NULL, 0))

3541

goto out_free;

3542

3543

/* For all characters ch...: */

3544

for (i = 0; i < BITSET_WORDS; ++i)

3545

for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;

3546

elem;

3547

mask <<= 1, elem >>= 1, ++ch)

3548

if (BE (elem & 1, 0))

3549

{

3550

/* There must be exactly one destination which accepts

3551

character ch. See group_nodes_into_DFAstates. */

3552

for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)

3553

;

3554

3555

/* j-th destination accepts the word character ch. */

3556

trtable[ch] = dest_states[j];

3557

trtable[ch + SBC_MAX] = dest_states_word[j];

3558

}

3559

}

3560

3561

/* new line */

3562

if (bitset_contain (acceptable, NEWLINE_CHAR))

3563

{

3564

/* The current state accepts newline character. */

3565

for (j = 0; j < ndests; ++j)

3566

if (bitset_contain (dests_ch[j], NEWLINE_CHAR))

3567

{

3568

/* k-th destination accepts newline character. */

3569

trtable[NEWLINE_CHAR] = dest_states_nl[j];

3570

if (need_word_trtable)

3571

trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];

3572

/* There must be only one destination which accepts

3573

newline. See group_nodes_into_DFAstates. */

3574

break;

3575

}

3576

}

3577

3578

if (dest_states_malloced)

3579

free (dest_states);

3580

3581

re_node_set_free (&follows);

3582

for (i = 0; i < ndests; ++i)

3583

re_node_set_free (dests_node + i);

3584

3585

if (dests_node_malloced)

3586

free (dests_alloc);

3587

3588

return true;

3589

}

3590

3591

/* Group all nodes belonging to STATE into several destinations.

3592

Then for all destinations, set the nodes belonging to the destination

3593

to DESTS_NODE[i] and set the characters accepted by the destination

3594

to DEST_CH[i]. This function return the number of destinations. */

3595

3596

static Idx

3597

internal_function

3598

group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,

3599

re_node_set *dests_node, bitset_t *dests_ch)

3600

{

3601

reg_errcode_t err;

3602

bool ok;

3603

Idx i, j, k;

3604

Idx ndests; /* Number of the destinations from `state'. */

3605

bitset_t accepts; /* Characters a node can accept. */

3606

const re_node_set *cur_nodes = &state->nodes;

3607

bitset_empty (accepts);

3608

ndests = 0;

3609

3610

/* For all the nodes belonging to `state', */

3611

for (i = 0; i < cur_nodes->nelem; ++i)

3612

{

3613

re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];

3614

re_token_type_t type = node->type;

3615

unsigned int constraint = node->constraint;

3616

3617

/* Enumerate all single byte character this node can accept. */

3618

if (type == CHARACTER)

3619

bitset_set (accepts, node->opr.c);

3620

else if (type == SIMPLE_BRACKET)

3621

{

3622

bitset_merge (accepts, node->opr.sbcset);

3623

}

3624

else if (type == OP_PERIOD)

3625

{

3626

#ifdef RE_ENABLE_I18N

3627

if (dfa->mb_cur_max > 1)

3628

bitset_merge (accepts, dfa->sb_char);

3629

else

3630

#endif

3631

bitset_set_all (accepts);

3632

if (!(dfa->syntax & RE_DOT_NEWLINE))

3633

bitset_clear (accepts, '\n');

3634

if (dfa->syntax & RE_DOT_NOT_NULL)

3635

bitset_clear (accepts, '\0');

3636

}

3637

#ifdef RE_ENABLE_I18N

3638

else if (type == OP_UTF8_PERIOD)

3639

{

3640

if (ASCII_CHARS % BITSET_WORD_BITS == 0)

3641

memset (accepts, -1, ASCII_CHARS / CHAR_BIT);

3642

else

3643

bitset_merge (accepts, utf8_sb_map);

3644

if (!(dfa->syntax & RE_DOT_NEWLINE))

3645

bitset_clear (accepts, '\n');

3646

if (dfa->syntax & RE_DOT_NOT_NULL)

3647

bitset_clear (accepts, '\0');

3648

}

3649

#endif

3650

else

3651

continue;

3652

3653

/* Check the `accepts' and sift the characters which are not

3654

match it the context. */

3655

if (constraint)

3656

{

3657

if (constraint & NEXT_NEWLINE_CONSTRAINT)

3658

{

3659

bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);

3660

bitset_empty (accepts);

3661

if (accepts_newline)

3662

bitset_set (accepts, NEWLINE_CHAR);

3663

else

3664

continue;

3665

}

3666

if (constraint & NEXT_ENDBUF_CONSTRAINT)

3667

{

3668

bitset_empty (accepts);

3669

continue;

3670

}

3671

3672

if (constraint & NEXT_WORD_CONSTRAINT)

3673

{

3674

bitset_word_t any_set = 0;

3675

if (type == CHARACTER && !node->word_char)

3676

{

3677

bitset_empty (accepts);

3678

continue;

3679

}

3680

#ifdef RE_ENABLE_I18N

3681

if (dfa->mb_cur_max > 1)

3682

for (j = 0; j < BITSET_WORDS; ++j)

3683

any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));

3684

else

3685

#endif

3686

for (j = 0; j < BITSET_WORDS; ++j)

3687

any_set |= (accepts[j] &= dfa->word_char[j]);

3688

if (!any_set)

3689

continue;

3690

}

3691

if (constraint & NEXT_NOTWORD_CONSTRAINT)

3692

{

3693

bitset_word_t any_set = 0;

3694

if (type == CHARACTER && node->word_char)

3695

{

3696

bitset_empty (accepts);

3697

continue;

3698

}

3699

#ifdef RE_ENABLE_I18N

3700

if (dfa->mb_cur_max > 1)

3701

for (j = 0; j < BITSET_WORDS; ++j)

3702

any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));

3703

else

3704

#endif

3705

for (j = 0; j < BITSET_WORDS; ++j)

3706

any_set |= (accepts[j] &= ~dfa->word_char[j]);

3707

if (!any_set)

3708

continue;

3709

}

3710

}

3711

3712

/* Then divide `accepts' into DFA states, or create a new

3713

state. Above, we make sure that accepts is not empty. */

3714

for (j = 0; j < ndests; ++j)

3715

{

3716

bitset_t intersec; /* Intersection sets, see below. */

3717

bitset_t remains;

3718

/* Flags, see below. */

3719

bitset_word_t has_intersec, not_subset, not_consumed;

3720

3721

/* Optimization, skip if this state doesn't accept the character. */

3722

if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))

3723

continue;

3724

3725

/* Enumerate the intersection set of this state and `accepts'. */

3726

has_intersec = 0;

3727

for (k = 0; k < BITSET_WORDS; ++k)

3728

has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];

3729

/* And skip if the intersection set is empty. */

3730

if (!has_intersec)

3731

continue;

3732

3733

/* Then check if this state is a subset of `accepts'. */

3734

not_subset = not_consumed = 0;

3735

for (k = 0; k < BITSET_WORDS; ++k)

3736

{

3737

not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];

3738

not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];

3739

}

3740

3741

/* If this state isn't a subset of `accepts', create a

3742

new group state, which has the `remains'. */

3743

if (not_subset)

3744

{

3745

bitset_copy (dests_ch[ndests], remains);

3746

bitset_copy (dests_ch[j], intersec);

3747

err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);

3748

if (BE (err != REG_NOERROR, 0))

3749

goto error_return;

3750

++ndests;

3751

}

3752

3753

/* Put the position in the current group. */

3754

ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);

3755

if (BE (! ok, 0))

3756

goto error_return;

3757

3758

/* If all characters are consumed, go to next node. */

3759

if (!not_consumed)

3760

break;

3761

}

3762

/* Some characters remain, create a new group. */

3763

if (j == ndests)

3764

{

3765

bitset_copy (dests_ch[ndests], accepts);

3766

err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);

3767

if (BE (err != REG_NOERROR, 0))

3768

goto error_return;

3769

++ndests;

3770

bitset_empty (accepts);

3771

}

3772

}

3773

return ndests;

3774

error_return:

3775

for (j = 0; j < ndests; ++j)

3776

re_node_set_free (dests_node + j);

3777

return REG_MISSING;

3778

}

3779

3780

#ifdef RE_ENABLE_I18N

3781

/* Check how many bytes the node `dfa->nodes[node_idx]' accepts.

3782

Return the number of the bytes the node accepts.

3783

STR_IDX is the current index of the input string.

3784

3785

This function handles the nodes which can accept one character, or

3786

one collating element like '.', '[a-z]', opposite to the other nodes

3787

can only accept one byte. */

3788

3789

static int

3790

internal_function

3791

check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,

3792

const re_string_t *input, Idx str_idx)

3793

{

3794

const re_token_t *node = dfa->nodes + node_idx;

3795

int char_len, elem_len;

3796

Idx i;

3797

3798

if (BE (node->type == OP_UTF8_PERIOD, 0))

3799

{

3800

unsigned char c = re_string_byte_at (input, str_idx), d;

3801

if (BE (c < 0xc2, 1))

3802

return 0;

3803

3804

if (str_idx + 2 > input->len)

3805

return 0;

3806

3807

d = re_string_byte_at (input, str_idx + 1);

3808

if (c < 0xe0)

3809

return (d < 0x80 || d > 0xbf) ? 0 : 2;

3810

else if (c < 0xf0)

3811

{

3812

char_len = 3;

3813

if (c == 0xe0 && d < 0xa0)

3814

return 0;

3815

}

3816

else if (c < 0xf8)

3817

{

3818

char_len = 4;

3819

if (c == 0xf0 && d < 0x90)

3820

return 0;

3821

}

3822

else if (c < 0xfc)

3823

{

3824

char_len = 5;

3825

if (c == 0xf8 && d < 0x88)

3826

return 0;

3827

}

3828

else if (c < 0xfe)

3829

{

3830

char_len = 6;

3831

if (c == 0xfc && d < 0x84)

3832

return 0;

3833

}

3834

else

3835

return 0;

3836

3837

if (str_idx + char_len > input->len)

3838

return 0;

3839

3840

for (i = 1; i < char_len; ++i)

3841

{

3842

d = re_string_byte_at (input, str_idx + i);

3843

if (d < 0x80 || d > 0xbf)

3844

return 0;

3845

}

3846

return char_len;

3847

}

3848

3849

char_len = re_string_char_size_at (input, str_idx);

3850

if (node->type == OP_PERIOD)

3851

{

3852

if (char_len <= 1)

3853

return 0;

3854

/* FIXME: I don't think this if is needed, as both '\n'

3855

and '\0' are char_len == 1. */

3856

/* '.' accepts any one character except the following two cases. */

3857

if ((!(dfa->syntax & RE_DOT_NEWLINE) &&

3858

re_string_byte_at (input, str_idx) == '\n') ||

3859

((dfa->syntax & RE_DOT_NOT_NULL) &&

3860

re_string_byte_at (input, str_idx) == '\0'))

3861

return 0;

3862

return char_len;

3863

}

3864

3865

elem_len = re_string_elem_size_at (input, str_idx);

3866

if ((elem_len <= 1 && char_len <= 1) || char_len == 0)

3867

return 0;

3868

3869

if (node->type == COMPLEX_BRACKET)

3870

{

3871

const re_charset_t *cset = node->opr.mbcset;

3872

# ifdef _LIBC

3873

const unsigned char *pin

3874

= ((const unsigned char *) re_string_get_buffer (input) + str_idx);

3875

Idx j;

3876

uint32_t nrules;

3877

# endif /* _LIBC */

3878

int match_len = 0;

3879

wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)

3880

? re_string_wchar_at (input, str_idx) : 0);

3881

3882

/* match with multibyte character? */

3883

for (i = 0; i < cset->nmbchars; ++i)

3884

if (wc == cset->mbchars[i])

3885

{

3886

match_len = char_len;

3887

goto check_node_accept_bytes_match;

3888

}

3889

/* match with character_class? */

3890

for (i = 0; i < cset->nchar_classes; ++i)

3891

{

3892

wctype_t wt = cset->char_classes[i];

3893

if (__iswctype (wc, wt))

3894

{

3895

match_len = char_len;

3896

goto check_node_accept_bytes_match;

3897

}

3898

}

3899

3900

# ifdef _LIBC

3901

nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);

3902

if (nrules != 0)

3903

{

3904

unsigned int in_collseq = 0;

3905

const int32_t *table, *indirect;

3906

const unsigned char *weights, *extra;

3907

const char *collseqwc;

3908

int32_t idx;

3909

/* This #include defines a local function! */

3910

# include <locale/weight.h>

3911

3912

/* match with collating_symbol? */

3913

if (cset->ncoll_syms)

3914

extra = (const unsigned char *)

3915

_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);

3916

for (i = 0; i < cset->ncoll_syms; ++i)

3917

{

3918

const unsigned char *coll_sym = extra + cset->coll_syms[i];

3919

/* Compare the length of input collating element and

3920

the length of current collating element. */

3921

if (*coll_sym != elem_len)

3922

continue;

3923

/* Compare each bytes. */

3924

for (j = 0; j < *coll_sym; j++)

3925

if (pin[j] != coll_sym[1 + j])

3926

break;

3927

if (j == *coll_sym)

3928

{

3929

/* Match if every bytes is equal. */

3930

match_len = j;

3931

goto check_node_accept_bytes_match;

3932

}

3933

}

3934

3935

if (cset->nranges)

3936

{

3937

if (elem_len <= char_len)

3938

{

3939

collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);

3940

in_collseq = __collseq_table_lookup (collseqwc, wc);

3941

}

3942

else

3943

in_collseq = find_collation_sequence_value (pin, elem_len);

3944

}

3945

/* match with range expression? */

3946

for (i = 0; i < cset->nranges; ++i)

3947

if (cset->range_starts[i] <= in_collseq

3948

&& in_collseq <= cset->range_ends[i])

3949

{

3950

match_len = elem_len;

3951

goto check_node_accept_bytes_match;

3952

}

3953

3954

/* match with equivalence_class? */

3955

if (cset->nequiv_classes)

3956

{

3957

const unsigned char *cp = pin;

3958

table = (const int32_t *)

3959

_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);

3960

weights = (const unsigned char *)

3961

_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);

3962

extra = (const unsigned char *)

3963

_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);

3964

indirect = (const int32_t *)

3965

_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);

3966

int32_t idx = findidx (&cp);

3967

if (idx > 0)

3968

for (i = 0; i < cset->nequiv_classes; ++i)

3969

{

3970

int32_t equiv_class_idx = cset->equiv_classes[i];

3971

size_t weight_len = weights[idx & 0xffffff];

3972

if (weight_len == weights[equiv_class_idx & 0xffffff]

3973

&& (idx >> 24) == (equiv_class_idx >> 24))

3974

{

3975

Idx cnt = 0;

3976

3977

idx &= 0xffffff;

3978

equiv_class_idx &= 0xffffff;

3979

3980

while (cnt <= weight_len

3981

&& (weights[equiv_class_idx + 1 + cnt]

3982

== weights[idx + 1 + cnt]))

3983

++cnt;

3984

if (cnt > weight_len)

3985

{

3986

match_len = elem_len;

3987

goto check_node_accept_bytes_match;

3988

}

3989

}

3990

}

3991

}

3992

}

3993

else

3994

# endif /* _LIBC */

3995

{

3996

/* match with range expression? */

3997

#if __GNUC__ >= 2 && ! (__STDC_VERSION__ < 199901L && __STRICT_ANSI__)

3998

wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};

3999

#else

4000

wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};

4001

cmp_buf[2] = wc;

4002

#endif

4003

for (i = 0; i < cset->nranges; ++i)

4004

{

4005

cmp_buf[0] = cset->range_starts[i];

4006

cmp_buf[4] = cset->range_ends[i];

4007

if (wcscoll (cmp_buf, cmp_buf + 2) <= 0

4008

&& wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)

4009

{

4010

match_len = char_len;

4011

goto check_node_accept_bytes_match;

4012

}

4013

}

4014

}

4015

check_node_accept_bytes_match:

4016

if (!cset->non_match)

4017

return match_len;

4018

else

4019

{

4020

if (match_len > 0)

4021

return 0;

4022

else

4023

return (elem_len > char_len) ? elem_len : char_len;

4024

}

4025

}

4026

return 0;

4027

}

4028

4029

# ifdef _LIBC

4030

static unsigned int

4031

internal_function

4032

find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)

4033

{

4034

uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);

4035

if (nrules == 0)

4036

{

4037

if (mbs_len == 1)

4038

{

4039

/* No valid character. Match it as a single byte character. */

4040

const unsigned char *collseq = (const unsigned char *)

4041

_NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);

4042

return collseq[mbs[0]];

4043

}

4044

return UINT_MAX;

4045

}

4046

else

4047

{

4048

int32_t idx;

4049

const unsigned char *extra = (const unsigned char *)

4050

_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);

4051

int32_t extrasize = (const unsigned char *)

4052

_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;

4053

4054

for (idx = 0; idx < extrasize;)

4055

{

4056

int mbs_cnt;

4057

bool found = false;

4058

int32_t elem_mbs_len;

4059

/* Skip the name of collating element name. */

4060

idx = idx + extra[idx] + 1;

4061

elem_mbs_len = extra[idx++];

4062

if (mbs_len == elem_mbs_len)

4063

{

4064

for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)

4065

if (extra[idx + mbs_cnt] != mbs[mbs_cnt])

4066

break;

4067

if (mbs_cnt == elem_mbs_len)

4068

/* Found the entry. */

4069

found = true;

4070

}

4071

/* Skip the byte sequence of the collating element. */

4072

idx += elem_mbs_len;

4073

/* Adjust for the alignment. */

4074

idx = (idx + 3) & ~3;

4075

/* Skip the collation sequence value. */

4076

idx += sizeof (uint32_t);

4077

/* Skip the wide char sequence of the collating element. */

4078

idx = idx + sizeof (uint32_t) * (extra[idx] + 1);

4079

/* If we found the entry, return the sequence value. */

4080

if (found)

4081

return *(uint32_t *) (extra + idx);

4082

/* Skip the collation sequence value. */

4083

idx += sizeof (uint32_t);

4084

}

4085

return UINT_MAX;

4086

}

4087

}

4088

# endif /* _LIBC */

4089

#endif /* RE_ENABLE_I18N */

4090

4091

/* Check whether the node accepts the byte which is IDX-th

4092

byte of the INPUT. */

4093

4094

static bool

4095

internal_function

4096

check_node_accept (const re_match_context_t *mctx, const re_token_t *node,

4097

Idx idx)

4098

{

4099

unsigned char ch;

4100

ch = re_string_byte_at (&mctx->input, idx);

4101

switch (node->type)

4102

{

4103

case CHARACTER:

4104

if (node->opr.c != ch)

4105

return false;

4106

break;

4107

4108

case SIMPLE_BRACKET:

4109

if (!bitset_contain (node->opr.sbcset, ch))

4110

return false;

4111

break;

4112

4113

#ifdef RE_ENABLE_I18N

4114

case OP_UTF8_PERIOD:

4115

if (ch >= ASCII_CHARS)

4116

return false;

4117

/* FALLTHROUGH */

4118

#endif

4119

case OP_PERIOD:

4120

if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))

4121

|| (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))

4122

return false;

4123

break;

4124

4125

default:

4126

return false;

4127

}

4128

4129

if (node->constraint)

4130

{

4131

/* The node has constraints. Check whether the current context

4132

satisfies the constraints. */

4133

unsigned int context = re_string_context_at (&mctx->input, idx,

4134

mctx->eflags);

4135

if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))

4136

return false;

4137

}

4138

4139

return true;

4140

}

4141

4142

/* Extend the buffers, if the buffers have run out. */

4143

4144

static reg_errcode_t

4145

internal_function __attribute_warn_unused_result__

4146

extend_buffers (re_match_context_t *mctx)

4147

{

4148

reg_errcode_t ret;

4149

re_string_t *pstr = &mctx->input;

4150

4151

/* Avoid overflow. */

4152

if (BE (SIZE_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0))

4153

return REG_ESPACE;

4154

4155

/* Double the lengthes of the buffers. */

4156

ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);

4157

if (BE (ret != REG_NOERROR, 0))

4158

return ret;

4159

4160

if (mctx->state_log != NULL)

4161

{

4162

/* And double the length of state_log. */

4163

/* XXX We have no indication of the size of this buffer. If this

4164

allocation fail we have no indication that the state_log array

4165

does not have the right size. */

4166

re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,

4167

pstr->bufs_len + 1);

4168

if (BE (new_array == NULL, 0))

4169

return REG_ESPACE;

4170

mctx->state_log = new_array;

4171

}

4172

4173

/* Then reconstruct the buffers. */

4174

if (pstr->icase)

4175

{

4176

#ifdef RE_ENABLE_I18N

4177

if (pstr->mb_cur_max > 1)

4178

{

4179

ret = build_wcs_upper_buffer (pstr);

4180

if (BE (ret != REG_NOERROR, 0))

4181

return ret;

4182

}

4183

else

4184

#endif /* RE_ENABLE_I18N */

4185

build_upper_buffer (pstr);

4186

}

4187

else

4188

{

4189

#ifdef RE_ENABLE_I18N

4190

if (pstr->mb_cur_max > 1)

4191

build_wcs_buffer (pstr);

4192

else

4193

#endif /* RE_ENABLE_I18N */

4194

{

4195

if (pstr->trans != NULL)

4196

re_string_translate_buffer (pstr);

4197

}

4198

}

4199

return REG_NOERROR;

4200

}

4201

4202

4203

/* Functions for matching context. */

4204

4205

/* Initialize MCTX. */

4206

4207

static reg_errcode_t

4208

internal_function __attribute_warn_unused_result__

4209

match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)

4210

{

4211

mctx->eflags = eflags;

4212

mctx->match_last = REG_MISSING;

4213

if (n > 0)

4214

{

4215

/* Avoid overflow. */

4216

size_t max_object_size =

4217

MAX (sizeof (struct re_backref_cache_entry),

4218

sizeof (re_sub_match_top_t *));

4219

if (BE (SIZE_MAX / max_object_size < n, 0))

4220

return REG_ESPACE;

4221

4222

mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);

4223

mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);

4224

if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))

4225

return REG_ESPACE;

4226

}

4227

/* Already zero-ed by the caller.

4228

else

4229

mctx->bkref_ents = NULL;

4230

mctx->nbkref_ents = 0;

4231

mctx->nsub_tops = 0; */

4232

mctx->abkref_ents = n;

4233

mctx->max_mb_elem_len = 1;

4234

mctx->asub_tops = n;

4235

return REG_NOERROR;

4236

}

4237

4238

/* Clean the entries which depend on the current input in MCTX.

4239

This function must be invoked when the matcher changes the start index

4240

of the input, or changes the input string. */

4241

4242

static void

4243

internal_function

4244

match_ctx_clean (re_match_context_t *mctx)

4245

{

4246

Idx st_idx;

4247

for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)

4248

{

4249

Idx sl_idx;

4250

re_sub_match_top_t *top = mctx->sub_tops[st_idx];

4251

for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)

4252

{

4253

re_sub_match_last_t *last = top->lasts[sl_idx];

4254

re_free (last->path.array);

4255

re_free (last);

4256

}

4257

re_free (top->lasts);

4258

if (top->path)

4259

{

4260

re_free (top->path->array);

4261

re_free (top->path);

4262

}

4263

free (top);

4264

}

4265

4266

mctx->nsub_tops = 0;

4267

mctx->nbkref_ents = 0;

4268

}

4269

4270

/* Free all the memory associated with MCTX. */

4271

4272

static void

4273

internal_function

4274

match_ctx_free (re_match_context_t *mctx)

4275

{

4276

/* First, free all the memory associated with MCTX->SUB_TOPS. */

4277

match_ctx_clean (mctx);

4278

re_free (mctx->sub_tops);

4279

re_free (mctx->bkref_ents);

4280

}

4281

4282

/* Add a new backreference entry to MCTX.

4283

Note that we assume that caller never call this function with duplicate

4284

entry, and call with STR_IDX which isn't smaller than any existing entry.

4285

4286

4287

static reg_errcode_t

4288

internal_function __attribute_warn_unused_result__

4289

match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,

4290

Idx to)

4291

{

4292

if (mctx->nbkref_ents >= mctx->abkref_ents)

4293

{

4294

struct re_backref_cache_entry* new_entry;

4295

new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,

4296

mctx->abkref_ents * 2);

4297

if (BE (new_entry == NULL, 0))

4298

{

4299

re_free (mctx->bkref_ents);

4300

return REG_ESPACE;

4301

}

4302

mctx->bkref_ents = new_entry;

4303

memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',

4304

sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);

4305

mctx->abkref_ents *= 2;

4306

}

4307

if (mctx->nbkref_ents > 0

4308

&& mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)

4309

mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;

4310

4311

mctx->bkref_ents[mctx->nbkref_ents].node = node;

4312

mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;

4313

mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;

4314

mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;

4315

4316

/* This is a cache that saves negative results of check_dst_limits_calc_pos.

4317

If bit N is clear, means that this entry won't epsilon-transition to

4318

an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If

4319

it is set, check_dst_limits_calc_pos_1 will recurse and try to find one

4320

such node.

4321

4322

A backreference does not epsilon-transition unless it is empty, so set

4323

to all zeros if FROM != TO. */

4324

mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map

4325

= (from == to ? -1 : 0);

4326

4327

mctx->bkref_ents[mctx->nbkref_ents++].more = 0;

4328

if (mctx->max_mb_elem_len < to - from)

4329

mctx->max_mb_elem_len = to - from;

4330

return REG_NOERROR;

4331

}

4332

4333

/* Return the first entry with the same str_idx, or REG_MISSING if none is

4334

found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */

4335

4336

static Idx

4337

internal_function

4338

search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)

4339

{

4340

Idx left, right, mid, last;

4341

last = right = mctx->nbkref_ents;

4342

for (left = 0; left < right;)

4343

{

4344

mid = (left + right) / 2;

4345

if (mctx->bkref_ents[mid].str_idx < str_idx)

4346

left = mid + 1;

4347

else

4348

right = mid;

4349

}

4350

if (left < last && mctx->bkref_ents[left].str_idx == str_idx)

4351

return left;

4352

else

4353

return REG_MISSING;

4354

}

4355

4356

/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches

4357

at STR_IDX. */

4358

4359

static reg_errcode_t

4360

internal_function __attribute_warn_unused_result__

4361

match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)

4362

{

4363

#ifdef DEBUG

4364

assert (mctx->sub_tops != NULL);

4365

assert (mctx->asub_tops > 0);

4366

#endif

4367

if (BE (mctx->nsub_tops == mctx->asub_tops, 0))

4368

{

4369

Idx new_asub_tops = mctx->asub_tops * 2;

4370

re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,

4371

re_sub_match_top_t *,

4372

new_asub_tops);

4373

if (BE (new_array == NULL, 0))

4374

return REG_ESPACE;

4375

mctx->sub_tops = new_array;

4376

mctx->asub_tops = new_asub_tops;

4377

}

4378

mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));

4379

if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))

4380

return REG_ESPACE;

4381

mctx->sub_tops[mctx->nsub_tops]->node = node;

4382

mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;

4383

return REG_NOERROR;

4384

}

4385

4386

/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches

4387

at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */

4388

4389

static re_sub_match_last_t *

4390

internal_function

4391

match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)

4392

{

4393

re_sub_match_last_t *new_entry;

4394

if (BE (subtop->nlasts == subtop->alasts, 0))

4395

{

4396

Idx new_alasts = 2 * subtop->alasts + 1;

4397

re_sub_match_last_t **new_array = re_realloc (subtop->lasts,

4398

re_sub_match_last_t *,

4399

new_alasts);

4400

if (BE (new_array == NULL, 0))

4401

return NULL;

4402

subtop->lasts = new_array;

4403

subtop->alasts = new_alasts;

4404

}

4405

new_entry = calloc (1, sizeof (re_sub_match_last_t));

4406

if (BE (new_entry != NULL, 1))

4407

{

4408

subtop->lasts[subtop->nlasts] = new_entry;

4409

new_entry->node = node;

4410

new_entry->str_idx = str_idx;

4411

++subtop->nlasts;

4412

}

4413

return new_entry;

4414

}

4415

4416

static void

4417

internal_function

4418

sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,

4419

re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)

4420

{

4421

sctx->sifted_states = sifted_sts;

4422

sctx->limited_states = limited_sts;

4423

sctx->last_node = last_node;

4424

sctx->last_str_idx = last_str_idx;

4425

re_node_set_init_empty (&sctx->limits);

4426

}

Older »