~vcs-imports/gawk/master

« back to all changes in this revision

Viewing changes to regexec.c

Committer: Arnold D. Robbins
Date: 2014-01-22 19:54:07 UTC
mto: (408.8.1) (509.1.1) (522.1.1) (731.1.8)
mto: This revision was merged to the branch mainline in revision 502.
Revision ID: git-v1:51038bda2cde5e5a5a4fa1fd54626d4cb107e7ef

Update README.solaris.

files added:
README.cvs

README_d/README.os2

awklib/eg/data/BBS-list

awklib/eg/misc/arraymax.awk

awklib/eg/misc/arraymax.data

bisonfix.awk

config.guess

config.rpath

config.sub

depcomp

dfa.c

dfa.h

doc/api-figure1.eps

doc/api-figure1.fig

doc/api-figure1.pdf

doc/api-figure1.png

doc/api-figure1.txt

doc/api-figure2.eps

doc/api-figure2.fig

doc/api-figure2.pdf

doc/api-figure2.png

doc/api-figure2.txt

doc/api-figure3.eps

doc/api-figure3.fig

doc/api-figure3.pdf

doc/api-figure3.png

doc/api-figure3.txt

doc/gawktexi.in

doc/general-program.eps

doc/general-program.fig

doc/general-program.pdf

doc/general-program.png

doc/general-program.txt

doc/process-flow.eps

doc/process-flow.fig

doc/process-flow.pdf

doc/process-flow.png

doc/process-flow.txt

doc/sidebar.awk

doc/statist.eps

doc/statist.jpg

doc/statist.pdf

extension/build-aux

extension/build-aux/ChangeLog

extension/build-aux/ar-lib

extension/build-aux/config.guess

extension/build-aux/config.rpath

extension/build-aux/config.sub

extension/build-aux/depcomp

extension/build-aux/install-sh

extension/build-aux/ltmain.sh

extension/build-aux/missing

extension/m4

extension/m4/ChangeLog

extension/m4/dirfd.m4

extension/m4/gettext.m4

extension/m4/iconv.m4

extension/m4/intlmacosx.m4

extension/m4/lib-ld.m4

extension/m4/lib-link.m4

extension/m4/lib-prefix.m4

extension/m4/libtool.m4

extension/m4/ltoptions.m4

extension/m4/ltsugar.m4

extension/m4/ltversion.m4

extension/m4/lt~obsolete.m4

extension/m4/nls.m4

extension/m4/po.m4

extension/m4/progtest.m4

extension/rwarray0.c

getopt.c

getopt.h

getopt1.c

getopt_int.h

install-sh

m4/isc-posix.m4

m4/libsigsegv.m4

mbsupport.h

missing

missing_d/gawkbool.h

mkinstalldirs

po/fix-merge.awk

random.c

random.h

regcomp.c

regex.c

regex.h

regex_internal.c

regex_internal.h

regexec.c

test/Gentests.vms

test/longdbl.awk

test/longdbl.in

test/longdbl.ok

xalloc.h

ylwrap

files removed:
ChangeLog.1

NEWS.1

README_d/ChangeLog.1

README_d/README.aix

README_d/README.alpine

README_d/README.development

README_d/README.freebsd

README_d/README.macosx

README_d/README.rpm

README_d/README.zLinux

TODO

awklib/ChangeLog.1

awklib/eg/data/mail-list

awklib/eg/lib/have_mpfr.awk

awklib/eg/lib/intdiv0.awk

awklib/eg/lib/isnumeric.awk

awklib/eg/lib/ns_passwd.awk

awklib/eg/lib/processarray.awk

awklib/eg/lib/shellquote.awk

awklib/eg/lib/tocsv.awk

awklib/eg/misc/arrayorder.awk

awklib/eg/misc/arrayorder.data

awklib/eg/misc/sample.csv

awklib/eg/misc/test-csv.awk

awklib/eg/network/catpipeclient.awk

awklib/eg/network/catpipeserver.awk

awklib/eg/network/daytimeclient.awk

awklib/eg/network/daytimeserver.awk

awklib/eg/network/mailpopclient.awk

awklib/eg/prog/pi.awk

awklib/eg/test-programs

awklib/eg/test-programs/gen-float-table.awk

awklib/eg/test-programs/gen-float-table.c

awklib/eg/test-programs/gen-float-table.py

build-aux

build-aux/ChangeLog

build-aux/ar-lib

build-aux/compile

build-aux/config.guess

build-aux/config.rpath

build-aux/config.sub

build-aux/depcomp

build-aux/install-sh

build-aux/ltmain.sh

build-aux/missing

build-aux/texinfo.tex

build-aux/ylwrap

doc/ChangeLog.1

doc/gawk_api-figure1.eps

doc/gawk_api-figure1.fig

doc/gawk_api-figure1.pdf

doc/gawk_api-figure1.png

doc/gawk_api-figure1.txt

doc/gawk_api-figure2.eps

doc/gawk_api-figure2.fig

doc/gawk_api-figure2.pdf

doc/gawk_api-figure2.png

doc/gawk_api-figure2.txt

doc/gawk_api-figure3.eps

doc/gawk_api-figure3.fig

doc/gawk_api-figure3.pdf

doc/gawk_api-figure3.png

doc/gawk_api-figure3.txt

doc/gawk_array-elements.eps

doc/gawk_array-elements.fig

doc/gawk_array-elements.pdf

doc/gawk_array-elements.png

doc/gawk_array-elements.txt

doc/gawk_general-program.eps

doc/gawk_general-program.fig

doc/gawk_general-program.pdf

doc/gawk_general-program.png

doc/gawk_general-program.txt

doc/gawk_process-flow.eps

doc/gawk_process-flow.fig

doc/gawk_process-flow.pdf

doc/gawk_process-flow.png

doc/gawk_process-flow.txt

doc/gawk_statist.eps

doc/gawk_statist.jpg

doc/gawk_statist.pdf

doc/gawk_statist.txt

doc/gawkbug.1

doc/gawkworkflow.info

doc/gawkworkflow.texi

doc/implementation-notes.txt

doc/it

doc/it/ChangeLog

doc/it/README.txt

doc/it/compila_gawk.sh

doc/it/compila_pma.sh

doc/it/epsf.tex

doc/it/gawk-api-figura1.eps

doc/it/gawk-api-figura1.fig

doc/it/gawk-api-figura1.pdf

doc/it/gawk-api-figura1.png

doc/it/gawk-api-figura1.txt

doc/it/gawk-api-figura2.eps

doc/it/gawk-api-figura2.fig

doc/it/gawk-api-figura2.pdf

doc/it/gawk-api-figura2.png

doc/it/gawk-api-figura2.txt

doc/it/gawk-api-figura3.eps

doc/it/gawk-api-figura3.fig

doc/it/gawk-api-figura3.pdf

doc/it/gawk-api-figura3.png

doc/it/gawk-api-figura3.txt

doc/it/gawk-flusso-elaborazione.eps

doc/it/gawk-flusso-elaborazione.fig

doc/it/gawk-flusso-elaborazione.pdf

doc/it/gawk-flusso-elaborazione.png

doc/it/gawk-flusso-elaborazione.txt

doc/it/gawk-programma-generico.eps

doc/it/gawk-programma-generico.fig

doc/it/gawk-programma-generico.pdf

doc/it/gawk-programma-generico.png

doc/it/gawk-programma-generico.txt

doc/it/gawk-vettore-elementi.eps

doc/it/gawk-vettore-elementi.fig

doc/it/gawk-vettore-elementi.pdf

doc/it/gawk-vettore-elementi.png

doc/it/gawk-vettore-elementi.txt

doc/it/gawk.1

doc/it/gawk.texi

doc/it/gawkbug.1

doc/it/gendocs.sh

doc/it/gendocs_template

doc/it/genera_formati.sh

doc/it/lflashlight-small.xpic

doc/it/lflashlight.eps

doc/it/lflashlight.pdf

doc/it/margini.texi

doc/it/pm-gawk.1

doc/it/pm-gawk.texi

doc/it/rflashlight-small.xpic

doc/it/rflashlight.eps

doc/it/rflashlight.pdf

doc/it/texinfo.tex

doc/it/txi-it.tex

doc/pm-gawk.1

doc/pm-gawk.info

doc/pm-gawk.texi

doc/wordlist

doc/wordlist2

doc/wordlist3

doc/wordlist4

doc/wordlist5

doc/wordlist6

extension/ChangeLog.1

extension/ext_custom.h

extension/intdiv.c

extension/po

extension/po/.gitignore

extension/po/ChangeLog

extension/po/Makefile.in.in

extension/po/Makevars

extension/po/Makevars.template

extension/po/POTFILES.in

extension/po/Rules-quot

extension/po/boldquot.sed

extension/po/en@boldquot.header

extension/po/en@quot.header

extension/po/gawk-extensions.pot

extension/po/insert-header.sin

extension/po/quot.sed

extension/po/remove-potcdate.sin

extension/po/stamp-po

extras

extras/ChangeLog

extras/Makefile.am

extras/Makefile.in

extras/gawk.csh

extras/gawk.sh

gawkbug.in

helpers/changed-files.awk

helpers/chlistref.awk

helpers/find-utf.sh

helpers/fix-merge.awk

helpers/genbin.awk

helpers/gendec.c

helpers/mb_cur_max.c

helpers/pull-all.sh

helpers/quoteconvert2.sh

helpers/scanfmt.c

helpers/test-build.sh

helpers/timeformat.c

helpers/tryfmt.c

helpers/update-aux.sh

helpers/update-branches.sh

helpers/update-support.sh

m4/ChangeLog.1

m4/ax_check_compile_flag.m4

m4/c-bool.m4

m4/dirfd.m4

m4/extern-inline.m4

m4/fcntl-o.m4

m4/host-cpu-c-abi.m4

m4/libtool.m4

m4/ltoptions.m4

m4/ltsugar.m4

m4/ltversion.m4

m4/lt~obsolete.m4

m4/pma.m4

m4/threadlib.m4

m4/triplet-transformation.m4

missing_d/ChangeLog.1

missing_d/mktime-internal.h

missing_d/strptime.c

missing_d/strsignal.c

missing_d/timegm.c

old-extension/notes.mbx

pc/ChangeLog.1

pc/GenMakefileTst.awk

pc/Makefile.tst.prologue

pc/langinfo.h

pc/make-config.sh

po/ChangeLog.1

po/bg.gmo

po/bg.po

po/ca.gmo

po/id.gmo

po/ka.gmo

po/ka.po

po/ko.gmo

po/ko.po

po/pt.gmo

po/pt.po

po/pt_BR.gmo

po/ro.gmo

po/sr.gmo

po/sr.po

po/uk.gmo

po/uk.po

po/xhe.po

po/zh_CN.gmo

posix/ChangeLog.1

printf.c

support

support/ChangeLog

support/ChangeLog.0

support/Makefile.am

support/Makefile.in

support/agpl-3.0.txt

support/attribute.h

support/cdefs.h

support/dfa.c

support/dfa.h

support/dynarray.h

support/flexmember.h

support/getopt.c

support/getopt.h

support/getopt1.c

support/getopt_int.h

support/idx.h

support/intprops-internal.h

support/intprops.h

support/libc-config.h

support/localeinfo.c

support/localeinfo.h

support/malloc

support/malloc/dynarray-skeleton.c

support/malloc/dynarray.h

support/malloc/dynarray_at_failure.c

support/malloc/dynarray_emplace_enlarge.c

support/malloc/dynarray_finalize.c

support/malloc/dynarray_resize.c

support/malloc/dynarray_resize_clear.c

support/pma.c

support/pma.h

support/random.c

support/random.h

support/regcomp.c

support/regex.c

support/regex.h

support/regex_internal.c

support/regex_internal.h

support/regexec.c

support/verify.h

support/xalloc.h

test/ChangeLog.1

test/anchor.awk

test/anchor.in

test/anchor.ok

test/apiterm.awk

test/apiterm.in

test/apiterm.ok

test/argcasfile.awk

test/argcasfile.in

test/argcasfile.ok

test/arrayind1.awk

test/arrayind1.in

test/arrayind1.ok

test/arrayind2.awk

test/arrayind2.ok

test/arrayind3.awk

test/arrayind3.ok

test/arraysort2.awk

test/arraysort2.ok

test/arraytype-mpfr.ok

test/arraytype.awk

test/arraytype.ok

test/arrdbg.awk

test/aryprm9.awk

test/aryprm9.ok

test/aryunasgn.awk

test/aryunasgn.ok

test/asortbool.awk

test/asortbool.ok

test/asortsymtab-mpfr.ok

test/asortsymtab.awk

test/asortsymtab.ok

test/assignnumfield.awk

test/assignnumfield.in

test/assignnumfield.ok

test/assignnumfield2.awk

test/assignnumfield2.ok

test/badbuild.awk

test/badbuild.in

test/badbuild.ok

test/callparam.awk

test/callparam.ok

test/check_retest.awk

test/checknegtime.awk

test/clos1way2.awk

test/clos1way2.in

test/clos1way2.ok

test/clos1way3.awk

test/clos1way3.ok

test/clos1way4.awk

test/clos1way4.ok

test/clos1way5.awk

test/clos1way5.ok

test/clos1way6.awk

test/clos1way6.ok

test/close_status.awk

test/close_status.ok

test/cmdlinefsbacknl.ok

test/cmdlinefsbacknl.sh

test/cmdlinefsbacknl2.ok

test/cmdlinefsbacknl2.sh

test/commas.awk

test/commas.ok

test/concat5.awk

test/concat5.ok

test/crlf.awk

test/crlf.ok

test/csv1.awk

test/csv1.in

test/csv1.ok

test/csv2.awk

test/csv2.ok

test/csv3.awk

test/csv3.in

test/csv3.ok

test/csvodd.awk

test/csvodd.in

test/csvodd.ok

test/dbugarray1.awk

test/dbugarray1.in

test/dbugarray1.ok

test/dbugarray2.awk

test/dbugarray2.in

test/dbugarray2.ok

test/dbugarray3.awk

test/dbugarray3.in

test/dbugarray3.ok

test/dbugarray4.awk

test/dbugarray4.in

test/dbugarray4.ok

test/dbugeval.in

test/dbugeval.ok

test/dbugeval2.awk

test/dbugeval2.in

test/dbugeval2.ok

test/dbugeval3.awk

test/dbugeval3.in

test/dbugeval3.ok

test/dbugeval4.awk

test/dbugeval4.in

test/dbugeval4.ok

test/dbugtypedre1.awk

test/dbugtypedre1.in

test/dbugtypedre1.ok

test/dbugtypedre2.awk

test/dbugtypedre2.in

test/dbugtypedre2.ok

test/dfacheck1.awk

test/dfacheck1.in

test/dfacheck1.ok

test/dfacheck2.awk

test/dfacheck2.in

test/dfacheck2.ok

test/divzero.awk

test/divzero.ok

test/divzero2.awk

test/divzero2.ok

test/elemnew1.awk

test/elemnew1.ok

test/elemnew2.awk

test/elemnew2.ok

test/elemnew3.awk

test/elemnew3.ok

test/eofsrc1.ok

test/eofsrc1a.awk

test/eofsrc1b.awk

test/errno.awk

test/errno.in

test/errno.ok

test/escapebrace.awk

test/escapebrace.in

test/escapebrace.ok

test/exitval3.awk

test/exitval3.ok

test/fieldassign.awk

test/fieldassign.in

test/fieldassign.ok

test/fix-fmtspcl.awk

test/fldterm.awk

test/fldterm.in

test/fldterm.ok

test/forcenum.awk

test/forcenum.ok

test/fpat4.awk

test/fpat4.ok

test/fpat5.awk

test/fpat5.in

test/fpat5.ok

test/fpat6.awk

test/fpat6.in

test/fpat6.ok

test/fpat7.awk

test/fpat7.in

test/fpat7.ok

test/fpat8.awk

test/fpat8.in

test/fpat8.ok

test/fpat9.awk

test/fpat9.in

test/fpat9.ok

test/fr

test/fr/LC_MESSAGES

test/fr/LC_MESSAGES/nlstringtest.mo

test/fscaret.awk

test/fscaret.in

test/fscaret.ok

test/fsnul1.awk

test/fsnul1.in

test/fsnul1.ok

test/functab5.awk

test/functab5.ok

test/functab6.awk

test/functab6.ok

test/fwtest4.awk

test/fwtest4.in

test/fwtest4.ok

test/fwtest5.awk

test/fwtest5.in

test/fwtest5.ok

test/fwtest6.awk

test/fwtest6.in

test/fwtest6.ok

test/fwtest7.awk

test/fwtest7.in

test/fwtest7.ok

test/fwtest8.awk

test/fwtest8.in

test/fwtest8.ok

test/genpot.awk

test/genpot.ok

test/gensub3.awk

test/gensub3.in

test/gensub3.ok

test/gensub4.awk

test/gensub4.ok

test/getfile.awk

test/getfile.ok

test/getlnfa.awk

test/getlnfa.ok

test/gsubind.awk

test/gsubind.ok

test/gsubnulli18n.awk

test/gsubnulli18n.ok

test/hex2.awk

test/hex2.in

test/hex2.ok

test/ignrcas3.awk

test/ignrcas3.ok

test/ignrcas4.awk

test/ignrcas4.ok

test/indirectbuiltin.awk

test/indirectbuiltin.ok

test/indirectbuiltin2.awk

test/indirectbuiltin2.ok

test/indirectcall2.awk

test/indirectcall2.ok

test/indirectcall3.awk

test/indirectcall3.ok

test/inf-nan-torture.awk

test/inf-nan-torture.in

test/inf-nan-torture.ok

test/inplace2bcomp.1.ok

test/inplace2bcomp.1.orig.ok

test/inplace2bcomp.2.ok

test/inplace2bcomp.2.orig.ok

test/inplace2bcomp.ok

test/inplace3bcomp.1.ok

test/inplace3bcomp.1.orig.ok

test/inplace3bcomp.2.ok

test/inplace3bcomp.2.orig.ok

test/inplace3bcomp.ok

test/inpref.awk

test/inpref.in

test/inpref.ok

test/intarray.awk

test/intarray.ok

test/iolint.awk

test/iolint.ok

test/isarrayunset.awk

test/isarrayunset.ok

test/lintexp.awk

test/lintexp.ok

test/lintindex.awk

test/lintindex.ok

test/lintint.awk

test/lintint.ok

test/lintlength.awk

test/lintlength.ok

test/lintplus.awk

test/lintplus.ok

test/lintplus2.awk

test/lintplus2.ok

test/lintplus3.awk

test/lintplus3.in

test/lintplus3.ok

test/lintset.awk

test/lintset.ok

test/litoct.in

test/makepmafile.c

test/match4.awk

test/match4.ok

test/mbprintf5.awk

test/mbprintf5.in

test/mbprintf5.ok

test/mbstr2.awk

test/mbstr2.in

test/mbstr2.ok

test/mdim1.awk

test/mdim1.ok

test/mdim2.awk

test/mdim2.ok

test/mdim3.awk

test/mdim3.ok

test/mdim4.awk

test/mdim4.in

test/mdim4.ok

test/mdim5.awk

test/mdim5.ok

test/mdim6.awk

test/mdim6.ok

test/mdim7.awk

test/mdim7.ok

test/mdim8.awk

test/mdim8.in

test/mdim8.ok

test/memleak.awk

test/memleak.ok

test/mktime.awk

test/mktime.in

test/mktime.ok

test/mmap8k.awk

test/mmap8k.ok

test/modifiers.ok

test/modifiers.sh

test/mpfranswer42.awk

test/mpfranswer42.ok

test/mpfrbigint2.awk

test/mpfrbigint2.in

test/mpfrbigint2.ok

test/mpfrcase.awk

test/mpfrcase.in

test/mpfrcase.ok

test/mpfrcase2.awk

test/mpfrcase2.in

test/mpfrcase2.ok

test/mpfrfield.awk

test/mpfrfield.in

test/mpfrfield.ok

test/mpfrmemok1.awk

test/mpfrmemok1.ok

test/mpfrnegzero2.awk

test/mpfrnegzero2.ok

test/mpfrnonum.awk

test/mpfrnonum.in

test/mpfrnonum.ok

test/mpfrrem.awk

test/mpfrrem.ok

test/mpfrrndeval.awk

test/mpfrrndeval.ok

test/mpfrsqrt.awk

test/mpfrsqrt.ok

test/mpfrstrtonum.awk

test/mpfrstrtonum.ok

test/mpfruplus.ok

test/mpgforcenum.awk

test/mpgforcenum.ok

test/mtchi18n2.awk

test/mtchi18n2.ok

test/muldimposix.awk

test/muldimposix.ok

test/negtime.awk

test/negtime.ok

test/nlstringtest-nogettext.ok

test/nlstringtest.awk

test/nlstringtest.ok

test/nlstringtest.po

test/nonfatal1.awk

test/nonfatal1.ok

test/nonfatal2.awk

test/nonfatal2.ok

test/nonfatal3.awk

test/nonfatal3.ok

test/nonl.in

test/nsawk1.awk

test/nsawk1a.ok

test/nsawk1b.ok

test/nsawk1c.ok

test/nsawk2.awk

test/nsawk2a.ok

test/nsawk2b.ok

test/nsbad.awk

test/nsbad.ok

test/nsbad2.awk

test/nsbad2.ok

test/nsbad3.awk

test/nsbad3.ok

test/nsbad_cmd.ok

test/nsforloop.awk

test/nsforloop.ok

test/nsfuncrecurse.awk

test/nsfuncrecurse.ok

test/nsidentifier.awk

test/nsidentifier.ok

test/nsindirect1.awk

test/nsindirect1.ok

test/nsindirect2.awk

test/nsindirect2.ok

test/nsprof1.awk

test/nsprof1.ok

test/nsprof2.awk

test/nsprof2.ok

test/nsprof3.awk

test/nsprof3.ok

test/nulinsrc.awk

test/nulinsrc.ok

test/numrange-mpfr.ok

test/numrange.awk

test/numrange.ok

test/numstr1.awk

test/numstr1.ok

test/octdec.awk

test/octdec.ok

test/ofmtstrnum.awk

test/ofmtstrnum.ok

test/paramasfunc1.awk

test/paramasfunc1.ok

test/paramasfunc2.awk

test/paramasfunc2.ok

test/pma.ok

test/posix_compare.awk

test/posix_compare.ok

test/printf-corners-mpfr.ok

test/printf-corners.awk

test/printf-corners.ok

test/printfbad4.awk

test/printfbad4.ok

test/printfchar.awk

test/printfchar.ok

test/printhuge.awk

test/printhuge.ok

test/profile0.awk

test/profile0.in

test/profile0.ok

test/profile10.awk

test/profile10.ok

test/profile11.awk

test/profile11.ok

test/profile12.awk

test/profile12.in

test/profile12.ok

test/profile13.awk

test/profile13.ok

test/profile14.awk

test/profile14.ok

test/profile15.awk

test/profile15.ok

test/profile16.awk

test/profile16.ok

test/profile17.awk

test/profile17.ok

test/profile6.awk

test/profile6.ok

test/profile7.awk

test/profile7.ok

test/profile8.awk

test/profile8.ok

test/profile9.awk

test/profile9.ok

test/pty2.awk

test/pty2.ok

test/randtest.ok

test/randtest.sh

test/range2.awk

test/range2.ok

test/re_test.awk

test/re_test.ok

test/readall.ok

test/readall1.awk

test/readall2.awk

test/readbuf.awk

test/readbuf.ok

test/readdir_retest.awk

test/readfile2.awk

test/readfile2.ok

test/rebrackloc.awk

test/rebrackloc.in

test/rebrackloc.ok

test/rebuild.awk

test/rebuild.in

test/rebuild.ok

test/regex3minus.awk

test/regex3minus.ok

test/regexpbad.awk

test/regexpbad.ok

test/regexpbrack.awk

test/regexpbrack.in

test/regexpbrack.ok

test/regexpbrack2.awk

test/regexpbrack2.in

test/regexpbrack2.ok

test/regexsub.awk

test/regexsub.ok

test/regnul1.awk

test/regnul1.ok

test/regnul2.awk

test/regnul2.ok

test/rscompat.awk

test/rscompat.in

test/rscompat.ok

test/rsgetline.awk

test/rsgetline.in

test/rsgetline.ok

test/rsglstdin.ok

test/rsnullre.awk

test/rsnullre.in

test/rsnullre.ok

test/rsnulw.awk

test/rsnulw.in

test/rsnulw.ok

test/rsstart2.in

test/sandbox1.awk

test/sandbox1.ok

test/setrec0.awk

test/setrec0.in

test/setrec0.ok

test/setrec1.awk

test/setrec1.ok

test/shadowbuiltin.awk

test/shadowbuiltin.ok

test/sigpipe1.awk

test/sigpipe1.ok

test/sortfor2.awk

test/sortfor2.in

test/sortfor2.ok

test/sortglos.awk

test/sortglos.in

test/sortglos.ok

test/sourcesplit.ok

test/spacere.awk

test/spacere.ok

test/status-close.awk

test/status-close.ok

test/strfieldnum.awk

test/strfieldnum.in

test/strfieldnum.ok

test/strftfld.awk

test/strftfld.in

test/strftfld.ok

test/strnum2.awk

test/strnum2.ok

test/strsubscript.awk

test/strsubscript.ok

test/strtonum1.awk

test/strtonum1.ok

test/stupid1.awk

test/stupid1.ok

test/stupid2.awk

test/stupid2.ok

test/stupid3.awk

test/stupid3.ok

test/stupid4.awk

test/stupid4.ok

test/stupid5.awk

test/stupid5.ok

test/subback.awk

test/subback.in

test/subback.ok

test/symtab10.awk

test/symtab10.ok

test/symtab11.awk

test/symtab11.ok

test/symtab12.awk

test/symtab12.ok

test/synerr3.awk

test/synerr3.ok

test/tailrecurse.awk

test/tailrecurse.ok

test/testext-mpfr.ok

test/timeout.awk

test/timeout.ok

test/trailbs.awk

test/trailbs.in

test/trailbs.ok

test/typedregex1.awk

test/typedregex1.ok

test/typedregex2.awk

test/typedregex2.ok

test/typedregex3.awk

test/typedregex3.ok

test/typedregex4.awk

test/typedregex4.ok

test/typedregex5.awk

test/typedregex5.in

test/typedregex5.ok

test/typedregex6.awk

test/typedregex6.in

test/typedregex6.ok

test/typeof1.awk

test/typeof1.ok

test/typeof2.awk

test/typeof2.ok

test/typeof3.awk

test/typeof3.ok

test/typeof4.awk

test/typeof4.ok

test/typeof5.awk

test/typeof5.in

test/typeof5.ok

test/typeof6.awk

test/typeof6.ok

test/typeof7.awk

test/typeof7.ok

test/typeof8.awk

test/typeof8.ok

test/unicode1.awk

test/unicode1.ok

test/uplus.awk

test/uplus.ok

test/valgrind.awk

test/watchpoint1.awk

test/watchpoint1.in

test/watchpoint1.ok

test/watchpoint1.script

vms/ChangeLog.1

vms/vms_sort.com

files modified:
.gitignore

ABOUT-NLS

ChangeLog

ChangeLog.0

Checklist

INSTALL

Makefile.am

Makefile.in

NEWS

POSIX.STD

README

README.git

README_d/ChangeLog

README_d/README.VMS

README_d/README.mpfr

README_d/README.pc

aclocal.m4

array.c

awk.h

awkgram.c

awkgram.y

awklib/.gitignore

awklib/ChangeLog

awklib/ChangeLog.0

awklib/Makefile.am

awklib/Makefile.in

awklib/eg/lib/assert.awk

awklib/eg/lib/bits2str.awk

awklib/eg/lib/ctime.awk

awklib/eg/lib/ftrans.awk

awklib/eg/lib/getopt.awk

awklib/eg/lib/grcat.c

awklib/eg/lib/groupawk.in

awklib/eg/lib/inplace.awk

awklib/eg/lib/noassign.awk

awklib/eg/lib/pwcat.c

awklib/eg/lib/quicksort.awk

awklib/eg/lib/readable.awk

awklib/eg/lib/strtonum.awk

awklib/eg/misc/findpat.awk

awklib/eg/network/coreserv.awk

awklib/eg/network/eliza.awk

awklib/eg/network/fingerclient.awk

awklib/eg/network/remconf.awk

awklib/eg/network/statist.awk

awklib/eg/network/stoxpred.awk

awklib/eg/network/webgrab.awk

awklib/eg/prog/alarm.awk

awklib/eg/prog/anagram.awk

awklib/eg/prog/cut.awk

awklib/eg/prog/egrep.awk

awklib/eg/prog/extract.awk

awklib/eg/prog/id.awk

awklib/eg/prog/igawk.sh

awklib/eg/prog/indirectcall.awk

awklib/eg/prog/labels.awk

awklib/eg/prog/split.awk

awklib/eg/prog/tee.awk

awklib/eg/prog/testbits.awk

awklib/eg/prog/translate.awk

awklib/eg/prog/uniq.awk

awklib/eg/prog/wc.awk

awklib/extract.awk

builtin.c

cint_array.c

cmd.h

command.c

command.y

configh.in

configure

configure.ac

custom.h

debug.c

doc/ChangeLog

doc/Makefile.am

doc/Makefile.in

doc/ad.block

doc/awkcard.in

doc/bc_notes

doc/gawk.1

doc/gawk.info

doc/gawk.texi

doc/gawkinet.info

doc/gawkinet.texi

doc/texinfo.tex

eval.c

ext.c

extension/.gitignore

extension/ABOUT-NLS

extension/ChangeLog

extension/INSTALL

extension/Makefile.am

extension/Makefile.in

extension/aclocal.m4

extension/configh.in

extension/configure

extension/configure.ac

extension/filefuncs.3am

extension/filefuncs.c

extension/fnmatch.3am

extension/fnmatch.c

extension/fork.3am

extension/fork.c

extension/gawkfts.c

extension/gawkfts.h

extension/inplace.3am

extension/inplace.c

extension/ordchr.3am

extension/ordchr.c

extension/readdir.3am

extension/readdir.c

extension/readfile.3am

extension/readfile.c

extension/revoutput.3am

extension/revoutput.c

extension/revtwoway.3am

extension/revtwoway.c

extension/rwarray.3am

extension/rwarray.c

extension/stack.c

extension/testext.c

extension/time.3am

extension/time.c

field.c

floatcomp.c

floatmagic.h

gawkapi.c

gawkapi.h

gawkmisc.c

gettext.h

helpers/ChangeLog

helpers/fixdump.awk

helpers/testdfa.c

helpers/testnet.c

int_array.c

interpret.h

io.c

m4/ChangeLog

m4/arch.m4

m4/codeset.m4

m4/gettext.m4

m4/glibc2.m4

m4/glibc21.m4

m4/iconv.m4

m4/intdiv0.m4

m4/intl.m4

m4/intldir.m4

m4/intlmacosx.m4

m4/intmax.m4

m4/inttypes-pri.m4

m4/inttypes_h.m4

m4/lcmessage.m4

m4/lib-ld.m4

m4/lib-link.m4

m4/lib-prefix.m4

m4/lock.m4

m4/longlong.m4

m4/mpfr.m4

m4/nls.m4

m4/po.m4

m4/printf-posix.m4

m4/progtest.m4

m4/readline.m4

m4/size_max.m4

m4/stdint_h.m4

m4/uintmax_t.m4

m4/visibility.m4

m4/wchar_t.m4

m4/wint_t.m4

m4/xsize.m4

main.c

missing_d/ChangeLog

missing_d/fnmatch.c

missing_d/getaddrinfo.c

missing_d/getaddrinfo.h

missing_d/mktime.c

missing_d/setenv.c

missing_d/snprintf.c

missing_d/strerror.c

missing_d/strncasecmp.c

missing_d/system.c

mpfr.c

msg.c

node.c

nonposix.h

old-extension/ChangeLog

old-extension/bindarr.c

old-extension/fileop.c

old-extension/sparr.c

old-extension/spec_array.c

pc/ChangeLog

pc/ChangeLog.0

pc/Makefile

pc/Makefile.ext

pc/Makefile.tst

pc/config.h

pc/config.sed

pc/dlfcn.h

pc/gawkmisc.pc

pc/getid.c

pc/make-config.bat *

pc/popen.c

pc/popen.h

pc/socket.h

pc/testoutcmp.awk

po/ChangeLog

po/LINGUAS

po/Makefile.in.in

po/Makevars

po/Makevars.template

po/POTFILES.in

po/Rules-quot

po/ca.po

po/da.gmo

po/da.po

po/de.gmo

po/de.po

po/en@boldquot.header

po/en@quot.header

po/es.gmo

po/es.po

po/fi.gmo

po/fi.po

po/fr.gmo

po/fr.po

po/gawk.pot

po/id.po

po/insert-header.sin

po/it.gmo

po/it.po

po/ja.gmo

po/ja.po

po/ms.gmo

po/ms.po

po/nl.gmo

po/nl.po

po/pl.gmo

po/pl.po

po/pt_BR.po

po/remove-potcdate.sin

po/ro.po

po/sv.gmo

po/sv.po

po/tr.po

po/vi.gmo

po/vi.po

po/zh_CN.po

posix/ChangeLog

posix/ChangeLog.0

posix/gawkmisc.c

profile.c

protos.h

re.c

replace.c

str_array.c

symbol.c

test/ChangeLog

test/ChangeLog.0

test/Gentests

test/Makefile.am

test/Makefile.in

test/Maketests

test/README

test/aadelete1.awk

test/aadelete1.ok

test/aasorti.awk

test/argarray.ok

test/back89.ok

test/badargs.ok

test/beginfile2.ok

test/charasbytes.awk *

test/clobber.awk

test/colonwarn.ok

test/delarpm2.awk

test/delarpm2.ok

test/dumpvars.ok

test/exit.sh

test/filefuncs.awk

test/fmtspcl.awk

test/fmtspcl.tok

test/fork.awk

test/fork2.awk

test/fts.awk

test/functab4.awk

test/functab4.ok

test/funsmnam.ok

test/funstack.ok

test/fwtest3.awk

test/fwtest3.in

test/fwtest3.ok

test/gensub2.ok

test/gsubtst3.awk

test/gsubtst5.ok

test/hsprint.awk

test/hsprint.ok

test/id.awk

test/id.ok

test/incdupe4.ok

test/incdupe5.ok

test/incdupe6.ok

test/incdupe7.ok

test/indirectcall.awk

test/inftest.ok

test/inplace1.ok

test/inplace2.ok

test/inplace3.ok

test/intest.awk

test/leaddig.awk

test/lintold.awk

test/lintold.ok

test/lintwarn.awk

test/lintwarn.ok

test/localenl.sh

test/mbprintf4.awk

test/mbprintf4.ok

test/mbstr1.ok

test/mixed1.ok

test/nlstrina.awk

test/noeffect.awk

test/noeffect.ok

test/nofile.ok

test/nondec2.awk

test/noparms.ok

test/ofs1.awk *

test/ofs1.ok

test/ordchr.awk

test/ordchr.ok

test/paramres.ok

test/parseme.ok

test/patsplit.ok

test/poundbang.awk *

test/profile2.ok

test/profile3.ok

test/profile4.ok

test/profile5.ok

test/rand.ok

test/range1.awk *

test/readdir0.awk

test/regrange.ok

test/rwarray.awk

test/sort1.awk

test/sort1.ok

test/space.ok

test/strftime.awk

test/symtab6.ok

test/symtab7.ok

test/symtab8.ok

test/testext.ok

test/time.awk

test/time.ok

test/tweakfld.awk

test/unterm.ok

test/uparrfs.awk

test/xref.awk

vms/ChangeLog

vms/ChangeLog.0

vms/backup_gawk_src.com

vms/build_gawk_pcsi_desc.com

vms/compare_gawk_source.com

vms/config_h.com

vms/descrip.mms

vms/gawk_alias_setup.com

vms/gawk_release_note_start.txt

vms/gawk_verb.com

vms/gawkmisc.vms

vms/generate_config_vms_h_gawk.com *

vms/pcsi_gawk_file_list.txt

vms/pcsi_product_gawk.com

vms/redirect.h

vms/remove_old_gawk.com

vms/stage_gawk_install.com

vms/vax/ChangeLog

vms/vms.h

vms/vms_args.c

vms/vms_cli.c

vms/vms_fwrite.c

vms/vms_gawk.c

vms/vms_misc.c

vms/vms_popen.c

vms/vmsbuild.com

vms/vmstest.com

Show diffs side-by-side

added added

removed removed

regexec.c

/* Extended regular expression matching and search library.

This file is part of the GNU C Library.

Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.

The GNU C Library is free software; you can redistribute it and/or

modify it under the terms of the GNU Lesser General Public

License as published by the Free Software Foundation; either

version 2.1 of the License, or (at your option) any later version.

The GNU C Library is distributed in the hope that it will be useful,

but WITHOUT ANY WARRANTY; without even the implied warranty of

MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public

License along with the GNU C Library; if not, see

<http://www.gnu.org/licenses/>. */

static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,

int n) internal_function;

static void match_ctx_clean (re_match_context_t *mctx) internal_function;

static void match_ctx_free (re_match_context_t *cache) internal_function;

static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,

int str_idx, int from, int to)

internal_function;

static int search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)

internal_function;

static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,

int str_idx) internal_function;

static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,

int node, int str_idx)

internal_function;

static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,

re_dfastate_t **limited_sts, int last_node,

int last_str_idx)

internal_function;

static reg_errcode_t re_search_internal (const regex_t *preg,

const char *string, int length,

int start, int range, int stop,

size_t nmatch, regmatch_t pmatch[],

int eflags) internal_function;

static int re_search_2_stub (struct re_pattern_buffer *bufp,

const char *string1, int length1,

const char *string2, int length2,

int start, int range, struct re_registers *regs,

int stop, int ret_len) internal_function;

static int re_search_stub (struct re_pattern_buffer *bufp,

const char *string, int length, int start,

int range, int stop, struct re_registers *regs,

int ret_len) internal_function;

static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,

int nregs, int regs_allocated) internal_function;

static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)

internal_function;

static int check_matching (re_match_context_t *mctx, int fl_longest_match,

int *p_match_first) internal_function;

static int check_halt_state_context (const re_match_context_t *mctx,

const re_dfastate_t *state, int idx)

internal_function;

static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,

regmatch_t *prev_idx_match, int cur_node,

int cur_idx, int nmatch) internal_function;

static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,

int str_idx, int dest_node, int nregs,

regmatch_t *regs,

re_node_set *eps_via_nodes)

internal_function;

static reg_errcode_t set_regs (const regex_t *preg,

const re_match_context_t *mctx,

size_t nmatch, regmatch_t *pmatch,

int fl_backtrack) internal_function;

static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)

internal_function;

#ifdef RE_ENABLE_I18N

static int sift_states_iter_mb (const re_match_context_t *mctx,

re_sift_context_t *sctx,

int node_idx, int str_idx, int max_str_idx)

internal_function;

#endif /* RE_ENABLE_I18N */

static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,

re_sift_context_t *sctx)

internal_function;

static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,

re_sift_context_t *sctx, int str_idx,

re_node_set *cur_dest)

internal_function;

static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,

re_sift_context_t *sctx,

int str_idx,

re_node_set *dest_nodes)

internal_function;

static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,

re_node_set *dest_nodes,

const re_node_set *candidates)

internal_function;

static int check_dst_limits (const re_match_context_t *mctx,

re_node_set *limits,

100

int dst_node, int dst_idx, int src_node,

101

int src_idx) internal_function;

102

static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,

103

int boundaries, int subexp_idx,

104

int from_node, int bkref_idx)

105

internal_function;

106

static int check_dst_limits_calc_pos (const re_match_context_t *mctx,

107

int limit, int subexp_idx,

108

int node, int str_idx,

109

int bkref_idx) internal_function;

110

static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,

111

re_node_set *dest_nodes,

112

const re_node_set *candidates,

113

re_node_set *limits,

114

struct re_backref_cache_entry *bkref_ents,

115

int str_idx) internal_function;

116

static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,

117

re_sift_context_t *sctx,

118

int str_idx, const re_node_set *candidates)

119

internal_function;

120

static reg_errcode_t merge_state_array (const re_dfa_t *dfa,

121

re_dfastate_t **dst,

122

re_dfastate_t **src, int num)

123

internal_function;

124

static re_dfastate_t *find_recover_state (reg_errcode_t *err,

125

re_match_context_t *mctx) internal_function;

126

static re_dfastate_t *transit_state (reg_errcode_t *err,

127

re_match_context_t *mctx,

128

re_dfastate_t *state) internal_function;

129

static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,

130

re_match_context_t *mctx,

131

re_dfastate_t *next_state)

132

internal_function;

133

static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,

134

re_node_set *cur_nodes,

135

int str_idx) internal_function;

136

#if 0

137

static re_dfastate_t *transit_state_sb (reg_errcode_t *err,

138

re_match_context_t *mctx,

139

re_dfastate_t *pstate)

140

internal_function;

141

#endif

142

#ifdef RE_ENABLE_I18N

143

static reg_errcode_t transit_state_mb (re_match_context_t *mctx,

144

re_dfastate_t *pstate)

145

internal_function;

146

#endif /* RE_ENABLE_I18N */

147

static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,

148

const re_node_set *nodes)

149

internal_function;

150

static reg_errcode_t get_subexp (re_match_context_t *mctx,

151

int bkref_node, int bkref_str_idx)

152

internal_function;

153

static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,

154

const re_sub_match_top_t *sub_top,

155

re_sub_match_last_t *sub_last,

156

int bkref_node, int bkref_str)

157

internal_function;

158

static int find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,

159

int subexp_idx, int type) internal_function;

160

static reg_errcode_t check_arrival (re_match_context_t *mctx,

161

state_array_t *path, int top_node,

162

int top_str, int last_node, int last_str,

163

int type) internal_function;

164

static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,

165

int str_idx,

166

re_node_set *cur_nodes,

167

re_node_set *next_nodes)

168

internal_function;

169

static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,

170

re_node_set *cur_nodes,

171

int ex_subexp, int type)

172

internal_function;

173

static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,

174

re_node_set *dst_nodes,

175

int target, int ex_subexp,

176

int type) internal_function;

177

static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,

178

re_node_set *cur_nodes, int cur_str,

179

int subexp_num, int type)

180

internal_function;

181

static int build_trtable (const re_dfa_t *dfa,

182

re_dfastate_t *state) internal_function;

183

#ifdef RE_ENABLE_I18N

184

static int check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,

185

const re_string_t *input, int idx)

186

internal_function;

187

# ifdef _LIBC

188

static unsigned int find_collation_sequence_value (const unsigned char *mbs,

189

size_t name_len)

190

internal_function;

191

# endif /* _LIBC */

192

#endif /* RE_ENABLE_I18N */

193

static int group_nodes_into_DFAstates (const re_dfa_t *dfa,

194

const re_dfastate_t *state,

195

re_node_set *states_node,

196

bitset_t *states_ch) internal_function;

197

static int check_node_accept (const re_match_context_t *mctx,

198

const re_token_t *node, int idx)

199

internal_function;

200

static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len)

201

internal_function;

202

203

#ifdef GAWK

204

#undef MIN /* safety */

205

static int

206

MIN(size_t a, size_t b)

207

{

208

return (a < b ? a : b);

209

}

210

#endif

211

212

/* Entry point for POSIX code. */

213

214

/* regexec searches for a given pattern, specified by PREG, in the

215

string STRING.

216

217

If NMATCH is zero or REG_NOSUB was set in the cflags argument to

218

`regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at

219

least NMATCH elements, and we set them to the offsets of the

220

corresponding matched substrings.

221

222

EFLAGS specifies `execution flags' which affect matching: if

223

REG_NOTBOL is set, then ^ does not match at the beginning of the

224

string; if REG_NOTEOL is set, then $ does not match at the end.

225

226

We return 0 if we find a match and REG_NOMATCH if not. */

227

228

int

229

regexec (preg, string, nmatch, pmatch, eflags)

230

const regex_t *__restrict preg;

231

const char *__restrict string;

232

size_t nmatch;

233

regmatch_t pmatch[];

234

int eflags;

235

{

236

reg_errcode_t err;

237

int start, length;

238

239

if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))

240

return REG_BADPAT;

241

242

if (eflags & REG_STARTEND)

243

{

244

start = pmatch[0].rm_so;

245

length = pmatch[0].rm_eo;

246

}

247

else

248

{

249

start = 0;

250

length = strlen (string);

251

}

252

253

__libc_lock_lock (dfa->lock);

254

if (preg->no_sub)

255

err = re_search_internal (preg, string, length, start, length - start,

256

length, 0, NULL, eflags);

257

else

258

err = re_search_internal (preg, string, length, start, length - start,

259

length, nmatch, pmatch, eflags);

260

__libc_lock_unlock (dfa->lock);

261

return err != REG_NOERROR;

262

}

263

264

#ifdef _LIBC

265

# include <shlib-compat.h>

266

versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);

267

268

# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)

269

__typeof__ (__regexec) __compat_regexec;

270

271

int

272

attribute_compat_text_section

273

__compat_regexec (const regex_t *__restrict preg,

274

const char *__restrict string, size_t nmatch,

275

regmatch_t pmatch[], int eflags)

276

{

277

return regexec (preg, string, nmatch, pmatch,

278

eflags & (REG_NOTBOL | REG_NOTEOL));

279

}

280

compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);

281

# endif

282

#endif

283

284

/* Entry points for GNU code. */

285

286

/* re_match, re_search, re_match_2, re_search_2

287

288

The former two functions operate on STRING with length LENGTH,

289

while the later two operate on concatenation of STRING1 and STRING2

290

with lengths LENGTH1 and LENGTH2, respectively.

291

292

re_match() matches the compiled pattern in BUFP against the string,

293

starting at index START.

294

295

re_search() first tries matching at index START, then it tries to match

296

starting from index START + 1, and so on. The last start position tried

297

is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same

298

way as re_match().)

299

300

The parameter STOP of re_{match,search}_2 specifies that no match exceeding

301

the first STOP characters of the concatenation of the strings should be

302

concerned.

303

304

If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match

305

and all groups is stroed in REGS. (For the "_2" variants, the offsets are

306

computed relative to the concatenation, not relative to the individual

307

strings.)

308

309

On success, re_match* functions return the length of the match, re_search*

310

return the position of the start of the match. Return value -1 means no

311

match was found and -2 indicates an internal error. */

312

313

int

314

re_match (bufp, string, length, start, regs)

315

struct re_pattern_buffer *bufp;

316

const char *string;

317

int length, start;

318

struct re_registers *regs;

319

{

320

return re_search_stub (bufp, string, length, start, 0, length, regs, 1);

321

}

322

#ifdef _LIBC

323

weak_alias (__re_match, re_match)

324

#endif

325

326

int

327

re_search (bufp, string, length, start, range, regs)

328

struct re_pattern_buffer *bufp;

329

const char *string;

330

int length, start, range;

331

struct re_registers *regs;

332

{

333

return re_search_stub (bufp, string, length, start, range, length, regs, 0);

334

}

335

#ifdef _LIBC

336

weak_alias (__re_search, re_search)

337

#endif

338

339

int

340

re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)

341

struct re_pattern_buffer *bufp;

342

const char *string1, *string2;

343

int length1, length2, start, stop;

344

struct re_registers *regs;

345

{

346

return re_search_2_stub (bufp, string1, length1, string2, length2,

347

start, 0, regs, stop, 1);

348

}

349

#ifdef _LIBC

350

weak_alias (__re_match_2, re_match_2)

351

#endif

352

353

int

354

re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)

355

struct re_pattern_buffer *bufp;

356

const char *string1, *string2;

357

int length1, length2, start, range, stop;

358

struct re_registers *regs;

359

{

360

return re_search_2_stub (bufp, string1, length1, string2, length2,

361

start, range, regs, stop, 0);

362

}

363

#ifdef _LIBC

364

weak_alias (__re_search_2, re_search_2)

365

#endif

366

367

static int

368

re_search_2_stub (bufp, string1, length1, string2, length2, start, range, regs,

369

stop, ret_len)

370

struct re_pattern_buffer *bufp;

371

const char *string1, *string2;

372

int length1, length2, start, range, stop, ret_len;

373

struct re_registers *regs;

374

{

375

const char *str;

376

int rval;

377

int len = length1 + length2;

378

char *s = NULL;

379

380

if (BE (length1 < 0 || length2 < 0 || stop < 0 || len < length1, 0))

381

return -2;

382

383

/* Concatenate the strings. */

384

if (length2 > 0)

385

if (length1 > 0)

386

{

387

s = re_malloc (char, len);

388

389

if (BE (s == NULL, 0))

390

return -2;

391

#ifdef _LIBC

392

memcpy (__mempcpy (s, string1, length1), string2, length2);

393

#else

394

memcpy (s, string1, length1);

395

memcpy (s + length1, string2, length2);

396

#endif

397

str = s;

398

}

399

else

400

str = string2;

401

else

402

str = string1;

403

404

rval = re_search_stub (bufp, str, len, start, range, stop, regs, ret_len);

405

re_free (s);

406

return rval;

407

}

408

409

/* The parameters have the same meaning as those of re_search.

410

Additional parameters:

411

If RET_LEN is nonzero the length of the match is returned (re_match style);

412

otherwise the position of the match is returned. */

413

414

static int

415

re_search_stub (bufp, string, length, start, range, stop, regs, ret_len)

416

struct re_pattern_buffer *bufp;

417

const char *string;

418

int length, start, range, stop, ret_len;

419

struct re_registers *regs;

420

{

421

reg_errcode_t result;

422

regmatch_t *pmatch;

423

int nregs, rval;

424

int eflags = 0;

425

426

/* Check for out-of-range. */

427

if (BE (start < 0 || start > length, 0))

428

return -1;

429

if (BE (start + range > length, 0))

430

range = length - start;

431

else if (BE (start + range < 0, 0))

432

range = -start;

433

434

__libc_lock_lock (dfa->lock);

435

436

eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;

437

eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;

438

439

/* Compile fastmap if we haven't yet. */

440

if (range > 0 && bufp->fastmap != NULL && !bufp->fastmap_accurate)

441

re_compile_fastmap (bufp);

442

443

if (BE (bufp->no_sub, 0))

444

regs = NULL;

445

446

/* We need at least 1 register. */

447

if (regs == NULL)

448

nregs = 1;

449

else if (BE (bufp->regs_allocated == REGS_FIXED &&

450

regs->num_regs < bufp->re_nsub + 1, 0))

451

{

452

nregs = regs->num_regs;

453

if (BE (nregs < 1, 0))

454

{

455

/* Nothing can be copied to regs. */

456

regs = NULL;

457

nregs = 1;

458

}

459

}

460

else

461

nregs = bufp->re_nsub + 1;

462

pmatch = re_malloc (regmatch_t, nregs);

463

if (BE (pmatch == NULL, 0))

464

{

465

rval = -2;

466

goto out;

467

}

468

469

result = re_search_internal (bufp, string, length, start, range, stop,

470

nregs, pmatch, eflags);

471

472

rval = 0;

473

474

/* I hope we needn't fill ther regs with -1's when no match was found. */

475

if (result != REG_NOERROR)

476

rval = -1;

477

else if (regs != NULL)

478

{

479

/* If caller wants register contents data back, copy them. */

480

bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,

481

bufp->regs_allocated);

482

if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))

483

rval = -2;

484

}

485

486

if (BE (rval == 0, 1))

487

{

488

if (ret_len)

489

{

490

assert (pmatch[0].rm_so == start);

491

rval = pmatch[0].rm_eo - start;

492

}

493

else

494

rval = pmatch[0].rm_so;

495

}

496

re_free (pmatch);

497

out:

498

__libc_lock_unlock (dfa->lock);

499

return rval;

500

}

501

502

static unsigned

503

re_copy_regs (regs, pmatch, nregs, regs_allocated)

504

struct re_registers *regs;

505

regmatch_t *pmatch;

506

int nregs, regs_allocated;

507

{

508

int rval = REGS_REALLOCATE;

509

int i;

510

int need_regs = nregs + 1;

511

/* We need one extra element beyond `num_regs' for the `-1' marker GNU code

512

uses. */

513

514

/* Have the register data arrays been allocated? */

515

if (regs_allocated == REGS_UNALLOCATED)

516

{ /* No. So allocate them with malloc. */

517

regs->start = re_malloc (regoff_t, need_regs);

518

if (BE (regs->start == NULL, 0))

519

return REGS_UNALLOCATED;

520

regs->end = re_malloc (regoff_t, need_regs);

521

if (BE (regs->end == NULL, 0))

522

{

523

re_free (regs->start);

524

return REGS_UNALLOCATED;

525

}

526

regs->num_regs = need_regs;

527

}

528

else if (regs_allocated == REGS_REALLOCATE)

529

{ /* Yes. If we need more elements than were already

530

allocated, reallocate them. If we need fewer, just

531

leave it alone. */

532

if (BE (need_regs > regs->num_regs, 0))

533

{

534

regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);

535

regoff_t *new_end;

536

if (BE (new_start == NULL, 0))

537

return REGS_UNALLOCATED;

538

new_end = re_realloc (regs->end, regoff_t, need_regs);

539

if (BE (new_end == NULL, 0))

540

{

541

re_free (new_start);

542

return REGS_UNALLOCATED;

543

}

544

regs->start = new_start;

545

regs->end = new_end;

546

regs->num_regs = need_regs;

547

}

548

}

549

else

550

{

551

assert (regs_allocated == REGS_FIXED);

552

/* This function may not be called with REGS_FIXED and nregs too big. */

553

assert (regs->num_regs >= nregs);

554

rval = REGS_FIXED;

555

}

556

557

/* Copy the regs. */

558

for (i = 0; i < nregs; ++i)

559

{

560

regs->start[i] = pmatch[i].rm_so;

561

regs->end[i] = pmatch[i].rm_eo;

562

}

563

for ( ; i < regs->num_regs; ++i)

564

regs->start[i] = regs->end[i] = -1;

565

566

return rval;

567

}

568

569

/* Set REGS to hold NUM_REGS registers, storing them in STARTS and

570

ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use

571

this memory for recording register information. STARTS and ENDS

572

must be allocated using the malloc library routine, and must each

573

be at least NUM_REGS * sizeof (regoff_t) bytes long.

574

575

If NUM_REGS == 0, then subsequent matches should allocate their own

576

577

578

Unless this function is called, the first search or match using

579

PATTERN_BUFFER will allocate its own register data, without

580

freeing the old data. */

581

582

void

583

re_set_registers (bufp, regs, num_regs, starts, ends)

584

struct re_pattern_buffer *bufp;

585

struct re_registers *regs;

586

unsigned num_regs;

587

regoff_t *starts, *ends;

588

{

589

if (num_regs)

590

{

591

bufp->regs_allocated = REGS_REALLOCATE;

592

regs->num_regs = num_regs;

593

regs->start = starts;

594

regs->end = ends;

595

}

596

else

597

{

598

bufp->regs_allocated = REGS_UNALLOCATED;

599

regs->num_regs = 0;

600

regs->start = regs->end = (regoff_t *) 0;

601

}

602

}

603

#ifdef _LIBC

604

weak_alias (__re_set_registers, re_set_registers)

605

#endif

606

607

/* Entry points compatible with 4.2 BSD regex library. We don't define

608

them unless specifically requested. */

609

610

#if defined _REGEX_RE_COMP || defined _LIBC

611

int

612

# ifdef _LIBC

613

weak_function

614

# endif

615

re_exec (s)

616

const char *s;

617

{

618

return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);

619

}

620

#endif /* _REGEX_RE_COMP */

621

622

/* Internal entry point. */

623

624

/* Searches for a compiled pattern PREG in the string STRING, whose

625

length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same

626

mingings with regexec. START, and RANGE have the same meanings

627

with re_search.

628

Return REG_NOERROR if we find a match, and REG_NOMATCH if not,

629

otherwise return the error code.

630

Note: We assume front end functions already check ranges.

631

(START + RANGE >= 0 && START + RANGE <= LENGTH) */

632

633

static reg_errcode_t

634

__attribute_warn_unused_result__

635

re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,

636

eflags)

637

const regex_t *preg;

638

const char *string;

639

int length, start, range, stop, eflags;

640

size_t nmatch;

641

regmatch_t pmatch[];

642

{

643

reg_errcode_t err;

644

const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;

645

int left_lim, right_lim, incr;

646

int fl_longest_match, match_first, match_kind, match_last = -1;

647

int extra_nmatch;

648

int sb, ch;

649

#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)

650

re_match_context_t mctx = { .dfa = dfa };

651

#else

652

re_match_context_t mctx;

653

#endif

654

char *fastmap = (preg->fastmap != NULL && preg->fastmap_accurate

655

&& range && !preg->can_be_null) ? preg->fastmap : NULL;

656

RE_TRANSLATE_TYPE t = preg->translate;

657

658

#if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))

659

memset (&mctx, '\0', sizeof (re_match_context_t));

660

mctx.dfa = dfa;

661

#endif

662

663

extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;

664

nmatch -= extra_nmatch;

665

666

/* Check if the DFA haven't been compiled. */

667

if (BE (preg->used == 0 || dfa == NULL || dfa->init_state == NULL

668

|| dfa->init_state_word == NULL || dfa->init_state_nl == NULL

669

|| dfa->init_state_begbuf == NULL, 0))

670

return REG_NOMATCH;

671

672

#ifdef DEBUG

673

/* We assume front-end functions already check them. */

674

assert (start + range >= 0 && start + range <= length);

675

#endif

676

677

/* If initial states with non-begbuf contexts have no elements,

678

the regex must be anchored. If preg->newline_anchor is set,

679

we'll never use init_state_nl, so do not check it. */

680

if (dfa->init_state->nodes.nelem == 0

681

&& dfa->init_state_word->nodes.nelem == 0

682

&& (dfa->init_state_nl->nodes.nelem == 0

683

|| !preg->newline_anchor))

684

{

685

if (start != 0 && start + range != 0)

686

return REG_NOMATCH;

687

start = range = 0;

688

}

689

690

/* We must check the longest matching, if nmatch > 0. */

691

fl_longest_match = (nmatch != 0 || dfa->nbackref);

692

693

err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,

694

preg->translate, preg->syntax & RE_ICASE, dfa);

695

if (BE (err != REG_NOERROR, 0))

696

goto free_return;

697

mctx.input.stop = stop;

698

mctx.input.raw_stop = stop;

699

mctx.input.newline_anchor = preg->newline_anchor;

700

701

err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);

702

if (BE (err != REG_NOERROR, 0))

703

goto free_return;

704

705

/* We will log all the DFA states through which the dfa pass,

706

if nmatch > 1, or this dfa has "multibyte node", which is a

707

back-reference or a node which can accept multibyte character or

708

multi character collating element. */

709

if (nmatch > 1 || dfa->has_mb_node)

710

{

711

/* Avoid overflow. */

712

if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0))

713

{

714

err = REG_ESPACE;

715

goto free_return;

716

}

717

718

mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);

719

if (BE (mctx.state_log == NULL, 0))

720

{

721

err = REG_ESPACE;

722

goto free_return;

723

}

724

}

725

else

726

mctx.state_log = NULL;

727

728

match_first = start;

729

mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF

730

: CONTEXT_NEWLINE | CONTEXT_BEGBUF;

731

732

/* Check incrementally whether of not the input string match. */

733

incr = (range < 0) ? -1 : 1;

734

left_lim = (range < 0) ? start + range : start;

735

right_lim = (range < 0) ? start : start + range;

736

sb = dfa->mb_cur_max == 1;

737

match_kind =

738

(fastmap

739

? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)

740

| (range >= 0 ? 2 : 0)

741

| (t != NULL ? 1 : 0))

742

: 8);

743

744

for (;; match_first += incr)

745

{

746

err = REG_NOMATCH;

747

if (match_first < left_lim || right_lim < match_first)

748

goto free_return;

749

750

/* Advance as rapidly as possible through the string, until we

751

find a plausible place to start matching. This may be done

752

with varying efficiency, so there are various possibilities:

753

only the most common of them are specialized, in order to

754

save on code size. We use a switch statement for speed. */

755

switch (match_kind)

756

{

757

case 8:

758

/* No fastmap. */

759

break;

760

761

case 7:

762

/* Fastmap with single-byte translation, match forward. */

763

while (BE (match_first < right_lim, 1)

764

&& !fastmap[t[(unsigned char) string[match_first]]])

765

++match_first;

766

goto forward_match_found_start_or_reached_end;

767

768

case 6:

769

/* Fastmap without translation, match forward. */

770

while (BE (match_first < right_lim, 1)

771

&& !fastmap[(unsigned char) string[match_first]])

772

++match_first;

773

774

forward_match_found_start_or_reached_end:

775

if (BE (match_first == right_lim, 0))

776

{

777

ch = match_first >= length

778

? 0 : (unsigned char) string[match_first];

779

if (!fastmap[t ? t[ch] : ch])

780

goto free_return;

781

}

782

break;

783

784

case 4:

785

case 5:

786

/* Fastmap without multi-byte translation, match backwards. */

787

while (match_first >= left_lim)

788

{

789

ch = match_first >= length

790

? 0 : (unsigned char) string[match_first];

791

if (fastmap[t ? t[ch] : ch])

792

break;

793

--match_first;

794

}

795

if (match_first < left_lim)

796

goto free_return;

797

break;

798

799

default:

800

/* In this case, we can't determine easily the current byte,

801

since it might be a component byte of a multibyte

802

character. Then we use the constructed buffer instead. */

803

for (;;)

804

{

805

/* If MATCH_FIRST is out of the valid range, reconstruct the

806

buffers. */

807

unsigned int offset = match_first - mctx.input.raw_mbs_idx;

808

if (BE (offset >= (unsigned int) mctx.input.valid_raw_len, 0))

809

{

810

err = re_string_reconstruct (&mctx.input, match_first,

811

eflags);

812

if (BE (err != REG_NOERROR, 0))

813

goto free_return;

814

815

offset = match_first - mctx.input.raw_mbs_idx;

816

}

817

/* If MATCH_FIRST is out of the buffer, leave it as '\0'.

818

Note that MATCH_FIRST must not be smaller than 0. */

819

ch = (match_first >= length

820

? 0 : re_string_byte_at (&mctx.input, offset));

821

if (fastmap[ch])

822

break;

823

match_first += incr;

824

if (match_first < left_lim || match_first > right_lim)

825

{

826

err = REG_NOMATCH;

827

goto free_return;

828

}

829

}

830

break;

831

}

832

833

/* Reconstruct the buffers so that the matcher can assume that

834

the matching starts from the beginning of the buffer. */

835

err = re_string_reconstruct (&mctx.input, match_first, eflags);

836

if (BE (err != REG_NOERROR, 0))

837

goto free_return;

838

839

#ifdef RE_ENABLE_I18N

840

/* Don't consider this char as a possible match start if it part,

841

yet isn't the head, of a multibyte character. */

842

if (!sb && !re_string_first_byte (&mctx.input, 0))

843

continue;

844

#endif

845

846

/* It seems to be appropriate one, then use the matcher. */

847

/* We assume that the matching starts from 0. */

848

mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;

849

match_last = check_matching (&mctx, fl_longest_match,

850

range >= 0 ? &match_first : NULL);

851

if (match_last != -1)

852

{

853

if (BE (match_last == -2, 0))

854

{

855

err = REG_ESPACE;

856

goto free_return;

857

}

858

else

859

{

860

mctx.match_last = match_last;

861

if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)

862

{

863

re_dfastate_t *pstate = mctx.state_log[match_last];

864

mctx.last_node = check_halt_state_context (&mctx, pstate,

865

match_last);

866

}

867

if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)

868

|| dfa->nbackref)

869

{

870

err = prune_impossible_nodes (&mctx);

871

if (err == REG_NOERROR)

872

break;

873

if (BE (err != REG_NOMATCH, 0))

874

goto free_return;

875

match_last = -1;

876

}

877

else

878

break; /* We found a match. */

879

}

880

}

881

882

match_ctx_clean (&mctx);

883

}

884

885

#ifdef DEBUG

886

assert (match_last != -1);

887

assert (err == REG_NOERROR);

888

#endif

889

890

/* Set pmatch[] if we need. */

891

if (nmatch > 0)

892

{

893

int reg_idx;

894

895

/* Initialize registers. */

896

for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)

897

pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;

898

899

/* Set the points where matching start/end. */

900

pmatch[0].rm_so = 0;

901

pmatch[0].rm_eo = mctx.match_last;

902

903

if (!preg->no_sub && nmatch > 1)

904

{

905

err = set_regs (preg, &mctx, nmatch, pmatch,

906

dfa->has_plural_match && dfa->nbackref > 0);

907

if (BE (err != REG_NOERROR, 0))

908

goto free_return;

909

}

910

911

/* At last, add the offset to the each registers, since we slided

912

the buffers so that we could assume that the matching starts

913

from 0. */

914

for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)

915

if (pmatch[reg_idx].rm_so != -1)

916

{

917

#ifdef RE_ENABLE_I18N

918

if (BE (mctx.input.offsets_needed != 0, 0))

919

{

920

pmatch[reg_idx].rm_so =

921

(pmatch[reg_idx].rm_so == mctx.input.valid_len

922

? mctx.input.valid_raw_len

923

: mctx.input.offsets[pmatch[reg_idx].rm_so]);

924

pmatch[reg_idx].rm_eo =

925

(pmatch[reg_idx].rm_eo == mctx.input.valid_len

926

? mctx.input.valid_raw_len

927

: mctx.input.offsets[pmatch[reg_idx].rm_eo]);

928

}

929

#else

930

assert (mctx.input.offsets_needed == 0);

931

#endif

932

pmatch[reg_idx].rm_so += match_first;

933

pmatch[reg_idx].rm_eo += match_first;

934

}

935

for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)

936

{

937

pmatch[nmatch + reg_idx].rm_so = -1;

938

pmatch[nmatch + reg_idx].rm_eo = -1;

939

}

940

941

if (dfa->subexp_map)

942

for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)

943

if (dfa->subexp_map[reg_idx] != reg_idx)

944

{

945

pmatch[reg_idx + 1].rm_so

946

= pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;

947

pmatch[reg_idx + 1].rm_eo

948

= pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;

949

}

950

}

951

952

free_return:

953

re_free (mctx.state_log);

954

if (dfa->nbackref)

955

match_ctx_free (&mctx);

956

re_string_destruct (&mctx.input);

957

return err;

958

}

959

960

static reg_errcode_t

961

__attribute_warn_unused_result__

962

prune_impossible_nodes (mctx)

963

re_match_context_t *mctx;

964

{

965

const re_dfa_t *const dfa = mctx->dfa;

966

int halt_node, match_last;

967

reg_errcode_t ret;

968

re_dfastate_t **sifted_states;

969

re_dfastate_t **lim_states = NULL;

970

re_sift_context_t sctx;

971

#ifdef DEBUG

972

assert (mctx->state_log != NULL);

973

#endif

974

match_last = mctx->match_last;

975

halt_node = mctx->last_node;

976

977

/* Avoid overflow. */

978

if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0))

979

return REG_ESPACE;

980

981

sifted_states = re_malloc (re_dfastate_t *, match_last + 1);

982

if (BE (sifted_states == NULL, 0))

983

{

984

ret = REG_ESPACE;

985

goto free_return;

986

}

987

if (dfa->nbackref)

988

{

989

lim_states = re_malloc (re_dfastate_t *, match_last + 1);

990

if (BE (lim_states == NULL, 0))

991

{

992

ret = REG_ESPACE;

993

goto free_return;

994

}

995

while (1)

996

{

997

memset (lim_states, '\0',

998

sizeof (re_dfastate_t *) * (match_last + 1));

999

sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,

1000

match_last);

1001

ret = sift_states_backward (mctx, &sctx);

1002

re_node_set_free (&sctx.limits);

1003

if (BE (ret != REG_NOERROR, 0))

1004

goto free_return;

1005

if (sifted_states[0] != NULL || lim_states[0] != NULL)

1006

break;

1007

1008

{

1009

--match_last;

1010

if (match_last < 0)

1011

{

1012

ret = REG_NOMATCH;

1013

goto free_return;

1014

}

1015

} while (mctx->state_log[match_last] == NULL

1016

|| !mctx->state_log[match_last]->halt);

1017

halt_node = check_halt_state_context (mctx,

1018

mctx->state_log[match_last],

1019

match_last);

1020

}

1021

ret = merge_state_array (dfa, sifted_states, lim_states,

1022

match_last + 1);

1023

re_free (lim_states);

1024

lim_states = NULL;

1025

if (BE (ret != REG_NOERROR, 0))

1026

goto free_return;

1027

}

1028

else

1029

{

1030

sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);

1031

ret = sift_states_backward (mctx, &sctx);

1032

re_node_set_free (&sctx.limits);

1033

if (BE (ret != REG_NOERROR, 0))

1034

goto free_return;

1035

if (sifted_states[0] == NULL)

1036

{

1037

ret = REG_NOMATCH;

1038

goto free_return;

1039

}

1040

}

1041

re_free (mctx->state_log);

1042

mctx->state_log = sifted_states;

1043

sifted_states = NULL;

1044

mctx->last_node = halt_node;

1045

mctx->match_last = match_last;

1046

ret = REG_NOERROR;

1047

free_return:

1048

re_free (sifted_states);

1049

re_free (lim_states);

1050

return ret;

1051

}

1052

1053

/* Acquire an initial state and return it.

1054

We must select appropriate initial state depending on the context,

1055

since initial states may have constraints like "\<", "^", etc.. */

1056

1057

static inline re_dfastate_t *

1058

__attribute ((always_inline)) internal_function

1059

acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,

1060

int idx)

1061

{

1062

const re_dfa_t *const dfa = mctx->dfa;

1063

if (dfa->init_state->has_constraint)

1064

{

1065

unsigned int context;

1066

context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);

1067

if (IS_WORD_CONTEXT (context))

1068

return dfa->init_state_word;

1069

else if (IS_ORDINARY_CONTEXT (context))

1070

return dfa->init_state;

1071

else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))

1072

return dfa->init_state_begbuf;

1073

else if (IS_NEWLINE_CONTEXT (context))

1074

return dfa->init_state_nl;

1075

else if (IS_BEGBUF_CONTEXT (context))

1076

{

1077

/* It is relatively rare case, then calculate on demand. */

1078

return re_acquire_state_context (err, dfa,

1079

dfa->init_state->entrance_nodes,

1080

context);

1081

}

1082

else

1083

/* Must not happen? */

1084

return dfa->init_state;

1085

}

1086

else

1087

return dfa->init_state;

1088

}

1089

1090

/* Check whether the regular expression match input string INPUT or not,

1091

and return the index where the matching end, return -1 if not match,

1092

or return -2 in case of an error.

1093

FL_LONGEST_MATCH means we want the POSIX longest matching.

1094

If P_MATCH_FIRST is not NULL, and the match fails, it is set to the

1095

next place where we may want to try matching.

1096

Note that the matcher assume that the maching starts from the current

1097

index of the buffer. */

1098

1099

static int

1100

internal_function __attribute_warn_unused_result__

1101

check_matching (re_match_context_t *mctx, int fl_longest_match,

1102

int *p_match_first)

1103

{

1104

const re_dfa_t *const dfa = mctx->dfa;

1105

reg_errcode_t err;

1106

int match = 0;

1107

int match_last = -1;

1108

int cur_str_idx = re_string_cur_idx (&mctx->input);

1109

re_dfastate_t *cur_state;

1110

int at_init_state = p_match_first != NULL;

1111

int next_start_idx = cur_str_idx;

1112

1113

err = REG_NOERROR;

1114

cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);

1115

/* An initial state must not be NULL (invalid). */

1116

if (BE (cur_state == NULL, 0))

1117

{

1118

assert (err == REG_ESPACE);

1119

return -2;

1120

}

1121

1122

if (mctx->state_log != NULL)

1123

{

1124

mctx->state_log[cur_str_idx] = cur_state;

1125

1126

/* Check OP_OPEN_SUBEXP in the initial state in case that we use them

1127

later. E.g. Processing back references. */

1128

if (BE (dfa->nbackref, 0))

1129

{

1130

at_init_state = 0;

1131

err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);

1132

if (BE (err != REG_NOERROR, 0))

1133

return err;

1134

1135

if (cur_state->has_backref)

1136

{

1137

err = transit_state_bkref (mctx, &cur_state->nodes);

1138

if (BE (err != REG_NOERROR, 0))

1139

return err;

1140

}

1141

}

1142

}

1143

1144

/* If the RE accepts NULL string. */

1145

if (BE (cur_state->halt, 0))

1146

{

1147

if (!cur_state->has_constraint

1148

|| check_halt_state_context (mctx, cur_state, cur_str_idx))

1149

{

1150

if (!fl_longest_match)

1151

return cur_str_idx;

1152

else

1153

{

1154

match_last = cur_str_idx;

1155

match = 1;

1156

}

1157

}

1158

}

1159

1160

while (!re_string_eoi (&mctx->input))

1161

{

1162

re_dfastate_t *old_state = cur_state;

1163

int next_char_idx = re_string_cur_idx (&mctx->input) + 1;

1164

1165

if ((BE (next_char_idx >= mctx->input.bufs_len, 0)

1166

&& mctx->input.bufs_len < mctx->input.len)

1167

|| (BE (next_char_idx >= mctx->input.valid_len, 0)

1168

&& mctx->input.valid_len < mctx->input.len))

1169

{

1170

err = extend_buffers (mctx, next_char_idx + 1);

1171

if (BE (err != REG_NOERROR, 0))

1172

{

1173

assert (err == REG_ESPACE);

1174

return -2;

1175

}

1176

}

1177

1178

cur_state = transit_state (&err, mctx, cur_state);

1179

if (mctx->state_log != NULL)

1180

cur_state = merge_state_with_log (&err, mctx, cur_state);

1181

1182

if (cur_state == NULL)

1183

{

1184

/* Reached the invalid state or an error. Try to recover a valid

1185

state using the state log, if available and if we have not

1186

already found a valid (even if not the longest) match. */

1187

if (BE (err != REG_NOERROR, 0))

1188

return -2;

1189

1190

if (mctx->state_log == NULL

1191

|| (match && !fl_longest_match)

1192

|| (cur_state = find_recover_state (&err, mctx)) == NULL)

1193

break;

1194

}

1195

1196

if (BE (at_init_state, 0))

1197

{

1198

if (old_state == cur_state)

1199

next_start_idx = next_char_idx;

1200

else

1201

at_init_state = 0;

1202

}

1203

1204

if (cur_state->halt)

1205

{

1206

/* Reached a halt state.

1207

Check the halt state can satisfy the current context. */

1208

if (!cur_state->has_constraint

1209

|| check_halt_state_context (mctx, cur_state,

1210

re_string_cur_idx (&mctx->input)))

1211

{

1212

/* We found an appropriate halt state. */

1213

match_last = re_string_cur_idx (&mctx->input);

1214

match = 1;

1215

1216

/* We found a match, do not modify match_first below. */

1217

p_match_first = NULL;

1218

if (!fl_longest_match)

1219

break;

1220

}

1221

}

1222

}

1223

1224

if (p_match_first)

1225

*p_match_first += next_start_idx;

1226

1227

return match_last;

1228

}

1229

1230

/* Check NODE match the current context. */

1231

1232

static int

1233

internal_function

1234

check_halt_node_context (const re_dfa_t *dfa, int node, unsigned int context)

1235

{

1236

re_token_type_t type = dfa->nodes[node].type;

1237

unsigned int constraint = dfa->nodes[node].constraint;

1238

if (type != END_OF_RE)

1239

return 0;

1240

if (!constraint)

1241

return 1;

1242

if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))

1243

return 0;

1244

return 1;

1245

}

1246

1247

/* Check the halt state STATE match the current context.

1248

Return 0 if not match, if the node, STATE has, is a halt node and

1249

match the context, return the node. */

1250

1251

static int

1252

internal_function

1253

check_halt_state_context (const re_match_context_t *mctx,

1254

const re_dfastate_t *state, int idx)

1255

{

1256

int i;

1257

unsigned int context;

1258

#ifdef DEBUG

1259

assert (state->halt);

1260

#endif

1261

context = re_string_context_at (&mctx->input, idx, mctx->eflags);

1262

for (i = 0; i < state->nodes.nelem; ++i)

1263

if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))

1264

return state->nodes.elems[i];

1265

return 0;

1266

}

1267

1268

/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA

1269

corresponding to the DFA).

1270

Return the destination node, and update EPS_VIA_NODES, return -1 in case

1271

of errors. */

1272

1273

static int

1274

internal_function

1275

proceed_next_node (const re_match_context_t *mctx, int nregs, regmatch_t *regs,

1276

int *pidx, int node, re_node_set *eps_via_nodes,

1277

struct re_fail_stack_t *fs)

1278

{

1279

const re_dfa_t *const dfa = mctx->dfa;

1280

int i, err;

1281

if (IS_EPSILON_NODE (dfa->nodes[node].type))

1282

{

1283

re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;

1284

re_node_set *edests = &dfa->edests[node];

1285

int dest_node;

1286

err = re_node_set_insert (eps_via_nodes, node);

1287

if (BE (err < 0, 0))

1288

return -2;

1289

/* Pick up a valid destination, or return -1 if none is found. */

1290

for (dest_node = -1, i = 0; i < edests->nelem; ++i)

1291

{

1292

int candidate = edests->elems[i];

1293

if (!re_node_set_contains (cur_nodes, candidate))

1294

continue;

1295

if (dest_node == -1)

1296

dest_node = candidate;

1297

1298

else

1299

{

1300

/* In order to avoid infinite loop like "(a*)*", return the second

1301

epsilon-transition if the first was already considered. */

1302

if (re_node_set_contains (eps_via_nodes, dest_node))

1303

return candidate;

1304

1305

/* Otherwise, push the second epsilon-transition on the fail stack. */

1306

else if (fs != NULL

1307

&& push_fail_stack (fs, *pidx, candidate, nregs, regs,

1308

eps_via_nodes))

1309

return -2;

1310

1311

/* We know we are going to exit. */

1312

break;

1313

}

1314

}

1315

return dest_node;

1316

}

1317

else

1318

{

1319

int naccepted = 0;

1320

re_token_type_t type = dfa->nodes[node].type;

1321

1322

#ifdef RE_ENABLE_I18N

1323

if (dfa->nodes[node].accept_mb)

1324

naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);

1325

else

1326

#endif /* RE_ENABLE_I18N */

1327

if (type == OP_BACK_REF)

1328

{

1329

int subexp_idx = dfa->nodes[node].opr.idx + 1;

1330

naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;

1331

if (fs != NULL)

1332

{

1333

if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)

1334

return -1;

1335

else if (naccepted)

1336

{

1337

char *buf = (char *) re_string_get_buffer (&mctx->input);

1338

if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,

1339

naccepted) != 0)

1340

return -1;

1341

}

1342

}

1343

1344

if (naccepted == 0)

1345

{

1346

int dest_node;

1347

err = re_node_set_insert (eps_via_nodes, node);

1348

if (BE (err < 0, 0))

1349

return -2;

1350

dest_node = dfa->edests[node].elems[0];

1351

if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,

1352

dest_node))

1353

return dest_node;

1354

}

1355

}

1356

1357

if (naccepted != 0

1358

|| check_node_accept (mctx, dfa->nodes + node, *pidx))

1359

{

1360

int dest_node = dfa->nexts[node];

1361

*pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;

1362

if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL

1363

|| !re_node_set_contains (&mctx->state_log[*pidx]->nodes,

1364

dest_node)))

1365

return -1;

1366

re_node_set_empty (eps_via_nodes);

1367

return dest_node;

1368

}

1369

}

1370

return -1;

1371

}

1372

1373

static reg_errcode_t

1374

internal_function __attribute_warn_unused_result__

1375

push_fail_stack (struct re_fail_stack_t *fs, int str_idx, int dest_node,

1376

int nregs, regmatch_t *regs, re_node_set *eps_via_nodes)

1377

{

1378

reg_errcode_t err;

1379

int num = fs->num++;

1380

if (fs->num == fs->alloc)

1381

{

1382

struct re_fail_stack_ent_t *new_array;

1383

new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)

1384

* fs->alloc * 2));

1385

if (new_array == NULL)

1386

return REG_ESPACE;

1387

fs->alloc *= 2;

1388

fs->stack = new_array;

1389

}

1390

fs->stack[num].idx = str_idx;

1391

fs->stack[num].node = dest_node;

1392

fs->stack[num].regs = re_malloc (regmatch_t, nregs);

1393

if (fs->stack[num].regs == NULL)

1394

return REG_ESPACE;

1395

memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);

1396

err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);

1397

return err;

1398

}

1399

1400

static int

1401

internal_function

1402

pop_fail_stack (struct re_fail_stack_t *fs, int *pidx, int nregs,

1403

regmatch_t *regs, re_node_set *eps_via_nodes)

1404

{

1405

int num = --fs->num;

1406

assert (num >= 0);

1407

*pidx = fs->stack[num].idx;

1408

memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);

1409

re_node_set_free (eps_via_nodes);

1410

re_free (fs->stack[num].regs);

1411

*eps_via_nodes = fs->stack[num].eps_via_nodes;

1412

return fs->stack[num].node;

1413

}

1414

1415

/* Set the positions where the subexpressions are starts/ends to registers

1416

PMATCH.

1417

Note: We assume that pmatch[0] is already set, and

1418

pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */

1419

1420

static reg_errcode_t

1421

internal_function __attribute_warn_unused_result__

1422

set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,

1423

regmatch_t *pmatch, int fl_backtrack)

1424

{

1425

const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;

1426

int idx, cur_node;

1427

re_node_set eps_via_nodes;

1428

struct re_fail_stack_t *fs;

1429

struct re_fail_stack_t fs_body = { 0, 2, NULL };

1430

regmatch_t *prev_idx_match;

1431

int prev_idx_match_malloced = 0;

1432

1433

#ifdef DEBUG

1434

assert (nmatch > 1);

1435

assert (mctx->state_log != NULL);

1436

#endif

1437

if (fl_backtrack)

1438

{

1439

fs = &fs_body;

1440

fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);

1441

if (fs->stack == NULL)

1442

return REG_ESPACE;

1443

}

1444

else

1445

fs = NULL;

1446

1447

cur_node = dfa->init_node;

1448

re_node_set_init_empty (&eps_via_nodes);

1449

1450

#ifdef HAVE_ALLOCA

1451

if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))

1452

prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));

1453

else

1454

#endif

1455

{

1456

prev_idx_match = re_malloc (regmatch_t, nmatch);

1457

if (prev_idx_match == NULL)

1458

{

1459

free_fail_stack_return (fs);

1460

return REG_ESPACE;

1461

}

1462

prev_idx_match_malloced = 1;

1463

}

1464

memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);

1465

1466

for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)

1467

{

1468

update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);

1469

1470

if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)

1471

{

1472

int reg_idx;

1473

if (fs)

1474

{

1475

for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)

1476

if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)

1477

break;

1478

if (reg_idx == nmatch)

1479

{

1480

re_node_set_free (&eps_via_nodes);

1481

if (prev_idx_match_malloced)

1482

re_free (prev_idx_match);

1483

return free_fail_stack_return (fs);

1484

}

1485

cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,

1486

&eps_via_nodes);

1487

}

1488

else

1489

{

1490

re_node_set_free (&eps_via_nodes);

1491

if (prev_idx_match_malloced)

1492

re_free (prev_idx_match);

1493

return REG_NOERROR;

1494

}

1495

}

1496

1497

/* Proceed to next node. */

1498

cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,

1499

&eps_via_nodes, fs);

1500

1501

if (BE (cur_node < 0, 0))

1502

{

1503

if (BE (cur_node == -2, 0))

1504

{

1505

re_node_set_free (&eps_via_nodes);

1506

if (prev_idx_match_malloced)

1507

re_free (prev_idx_match);

1508

free_fail_stack_return (fs);

1509

return REG_ESPACE;

1510

}

1511

if (fs)

1512

cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,

1513

&eps_via_nodes);

1514

else

1515

{

1516

re_node_set_free (&eps_via_nodes);

1517

if (prev_idx_match_malloced)

1518

re_free (prev_idx_match);

1519

return REG_NOMATCH;

1520

}

1521

}

1522

}

1523

re_node_set_free (&eps_via_nodes);

1524

if (prev_idx_match_malloced)

1525

re_free (prev_idx_match);

1526

return free_fail_stack_return (fs);

1527

}

1528

1529

static reg_errcode_t

1530

internal_function

1531

free_fail_stack_return (struct re_fail_stack_t *fs)

1532

{

1533

if (fs)

1534

{

1535

int fs_idx;

1536

for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)

1537

{

1538

re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);

1539

re_free (fs->stack[fs_idx].regs);

1540

}

1541

re_free (fs->stack);

1542

}

1543

return REG_NOERROR;

1544

}

1545

1546

static void

1547

internal_function

1548

update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,

1549

regmatch_t *prev_idx_match, int cur_node, int cur_idx, int nmatch)

1550

{

1551

int type = dfa->nodes[cur_node].type;

1552

if (type == OP_OPEN_SUBEXP)

1553

{

1554

int reg_num = dfa->nodes[cur_node].opr.idx + 1;

1555

1556

/* We are at the first node of this sub expression. */

1557

if (reg_num < nmatch)

1558

{

1559

pmatch[reg_num].rm_so = cur_idx;

1560

pmatch[reg_num].rm_eo = -1;

1561

}

1562

}

1563

else if (type == OP_CLOSE_SUBEXP)

1564

{

1565

int reg_num = dfa->nodes[cur_node].opr.idx + 1;

1566

if (reg_num < nmatch)

1567

{

1568

/* We are at the last node of this sub expression. */

1569

if (pmatch[reg_num].rm_so < cur_idx)

1570

{

1571

pmatch[reg_num].rm_eo = cur_idx;

1572

/* This is a non-empty match or we are not inside an optional

1573

subexpression. Accept this right away. */

1574

memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);

1575

}

1576

else

1577

{

1578

if (dfa->nodes[cur_node].opt_subexp

1579

&& prev_idx_match[reg_num].rm_so != -1)

1580

/* We transited through an empty match for an optional

1581

subexpression, like (a?)*, and this is not the subexp's

1582

first match. Copy back the old content of the registers

1583

so that matches of an inner subexpression are undone as

1584

well, like in ((a?))*. */

1585

memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);

1586

else

1587

/* We completed a subexpression, but it may be part of

1588

an optional one, so do not update PREV_IDX_MATCH. */

1589

pmatch[reg_num].rm_eo = cur_idx;

1590

}

1591

}

1592

}

1593

}

1594

1595

/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0

1596

and sift the nodes in each states according to the following rules.

1597

Updated state_log will be wrote to STATE_LOG.

1598

1599

Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...

1600

1. When STR_IDX == MATCH_LAST(the last index in the state_log):

1601

If `a' isn't the LAST_NODE and `a' can't epsilon transit to

1602

the LAST_NODE, we throw away the node `a'.

1603

2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts

1604

string `s' and transit to `b':

1605

i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw

1606

away the node `a'.

1607

ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is

1608

thrown away, we throw away the node `a'.

1609

3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':

1610

i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the

1611

node `a'.

1612

ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,

1613

we throw away the node `a'. */

1614

1615

#define STATE_NODE_CONTAINS(state,node) \

1616

((state) != NULL && re_node_set_contains (&(state)->nodes, node))

1617

1618

static reg_errcode_t

1619

internal_function

1620

sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)

1621

{

1622

reg_errcode_t err;

1623

int null_cnt = 0;

1624

int str_idx = sctx->last_str_idx;

1625

re_node_set cur_dest;

1626

1627

#ifdef DEBUG

1628

assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);

1629

#endif

1630

1631

/* Build sifted state_log[str_idx]. It has the nodes which can epsilon

1632

transit to the last_node and the last_node itself. */

1633

err = re_node_set_init_1 (&cur_dest, sctx->last_node);

1634

if (BE (err != REG_NOERROR, 0))

1635

return err;

1636

err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);

1637

if (BE (err != REG_NOERROR, 0))

1638

goto free_return;

1639

1640

/* Then check each states in the state_log. */

1641

while (str_idx > 0)

1642

{

1643

/* Update counters. */

1644

null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;

1645

if (null_cnt > mctx->max_mb_elem_len)

1646

{

1647

memset (sctx->sifted_states, '\0',

1648

sizeof (re_dfastate_t *) * str_idx);

1649

re_node_set_free (&cur_dest);

1650

return REG_NOERROR;

1651

}

1652

re_node_set_empty (&cur_dest);

1653

--str_idx;

1654

1655

if (mctx->state_log[str_idx])

1656

{

1657

err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);

1658

if (BE (err != REG_NOERROR, 0))

1659

goto free_return;

1660

}

1661

1662

/* Add all the nodes which satisfy the following conditions:

1663

- It can epsilon transit to a node in CUR_DEST.

1664

- It is in CUR_SRC.

1665

And update state_log. */

1666

err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);

1667

if (BE (err != REG_NOERROR, 0))

1668

goto free_return;

1669

}

1670

err = REG_NOERROR;

1671

free_return:

1672

re_node_set_free (&cur_dest);

1673

return err;

1674

}

1675

1676

static reg_errcode_t

1677

internal_function __attribute_warn_unused_result__

1678

build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,

1679

int str_idx, re_node_set *cur_dest)

1680

{

1681

const re_dfa_t *const dfa = mctx->dfa;

1682

const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;

1683

int i;

1684

1685

/* Then build the next sifted state.

1686

We build the next sifted state on `cur_dest', and update

1687

`sifted_states[str_idx]' with `cur_dest'.

1688

Note:

1689

`cur_dest' is the sifted state from `state_log[str_idx + 1]'.

1690

`cur_src' points the node_set of the old `state_log[str_idx]'

1691

(with the epsilon nodes pre-filtered out). */

1692

for (i = 0; i < cur_src->nelem; i++)

1693

{

1694

int prev_node = cur_src->elems[i];

1695

int naccepted = 0;

1696

int ret;

1697

1698

#ifdef DEBUG

1699

re_token_type_t type = dfa->nodes[prev_node].type;

1700

assert (!IS_EPSILON_NODE (type));

1701

#endif

1702

#ifdef RE_ENABLE_I18N

1703

/* If the node may accept `multi byte'. */

1704

if (dfa->nodes[prev_node].accept_mb)

1705

naccepted = sift_states_iter_mb (mctx, sctx, prev_node,

1706

str_idx, sctx->last_str_idx);

1707

#endif /* RE_ENABLE_I18N */

1708

1709

/* We don't check backreferences here.

1710

See update_cur_sifted_state(). */

1711

if (!naccepted

1712

&& check_node_accept (mctx, dfa->nodes + prev_node, str_idx)

1713

&& STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],

1714

dfa->nexts[prev_node]))

1715

naccepted = 1;

1716

1717

if (naccepted == 0)

1718

continue;

1719

1720

if (sctx->limits.nelem)

1721

{

1722

int to_idx = str_idx + naccepted;

1723

if (check_dst_limits (mctx, &sctx->limits,

1724

dfa->nexts[prev_node], to_idx,

1725

prev_node, str_idx))

1726

continue;

1727

}

1728

ret = re_node_set_insert (cur_dest, prev_node);

1729

if (BE (ret == -1, 0))

1730

return REG_ESPACE;

1731

}

1732

1733

return REG_NOERROR;

1734

}

1735

1736

/* Helper functions. */

1737

1738

static reg_errcode_t

1739

internal_function

1740

clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx)

1741

{

1742

int top = mctx->state_log_top;

1743

1744

if ((next_state_log_idx >= mctx->input.bufs_len

1745

&& mctx->input.bufs_len < mctx->input.len)

1746

|| (next_state_log_idx >= mctx->input.valid_len

1747

&& mctx->input.valid_len < mctx->input.len))

1748

{

1749

reg_errcode_t err;

1750

err = extend_buffers (mctx, next_state_log_idx + 1);

1751

if (BE (err != REG_NOERROR, 0))

1752

return err;

1753

}

1754

1755

if (top < next_state_log_idx)

1756

{

1757

memset (mctx->state_log + top + 1, '\0',

1758

sizeof (re_dfastate_t *) * (next_state_log_idx - top));

1759

mctx->state_log_top = next_state_log_idx;

1760

}

1761

return REG_NOERROR;

1762

}

1763

1764

static reg_errcode_t

1765

internal_function

1766

merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,

1767

re_dfastate_t **src, int num)

1768

{

1769

int st_idx;

1770

reg_errcode_t err;

1771

for (st_idx = 0; st_idx < num; ++st_idx)

1772

{

1773

if (dst[st_idx] == NULL)

1774

dst[st_idx] = src[st_idx];

1775

else if (src[st_idx] != NULL)

1776

{

1777

re_node_set merged_set;

1778

err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,

1779

&src[st_idx]->nodes);

1780

if (BE (err != REG_NOERROR, 0))

1781

return err;

1782

dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);

1783

re_node_set_free (&merged_set);

1784

if (BE (err != REG_NOERROR, 0))

1785

return err;

1786

}

1787

}

1788

return REG_NOERROR;

1789

}

1790

1791

static reg_errcode_t

1792

internal_function

1793

update_cur_sifted_state (const re_match_context_t *mctx,

1794

re_sift_context_t *sctx, int str_idx,

1795

re_node_set *dest_nodes)

1796

{

1797

const re_dfa_t *const dfa = mctx->dfa;

1798

reg_errcode_t err = REG_NOERROR;

1799

const re_node_set *candidates;

1800

candidates = ((mctx->state_log[str_idx] == NULL) ? NULL

1801

: &mctx->state_log[str_idx]->nodes);

1802

1803

if (dest_nodes->nelem == 0)

1804

sctx->sifted_states[str_idx] = NULL;

1805

else

1806

{

1807

if (candidates)

1808

{

1809

/* At first, add the nodes which can epsilon transit to a node in

1810

DEST_NODE. */

1811

err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);

1812

if (BE (err != REG_NOERROR, 0))

1813

return err;

1814

1815

/* Then, check the limitations in the current sift_context. */

1816

if (sctx->limits.nelem)

1817

{

1818

err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,

1819

mctx->bkref_ents, str_idx);

1820

if (BE (err != REG_NOERROR, 0))

1821

return err;

1822

}

1823

}

1824

1825

sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);

1826

if (BE (err != REG_NOERROR, 0))

1827

return err;

1828

}

1829

1830

if (candidates && mctx->state_log[str_idx]->has_backref)

1831

{

1832

err = sift_states_bkref (mctx, sctx, str_idx, candidates);

1833

if (BE (err != REG_NOERROR, 0))

1834

return err;

1835

}

1836

return REG_NOERROR;

1837

}

1838

1839

static reg_errcode_t

1840

internal_function __attribute_warn_unused_result__

1841

add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,

1842

const re_node_set *candidates)

1843

{

1844

reg_errcode_t err = REG_NOERROR;

1845

int i;

1846

1847

re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);

1848

if (BE (err != REG_NOERROR, 0))

1849

return err;

1850

1851

if (!state->inveclosure.alloc)

1852

{

1853

err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);

1854

if (BE (err != REG_NOERROR, 0))

1855

return REG_ESPACE;

1856

for (i = 0; i < dest_nodes->nelem; i++)

1857

{

1858

err = re_node_set_merge (&state->inveclosure,

1859

dfa->inveclosures + dest_nodes->elems[i]);

1860

if (BE (err != REG_NOERROR, 0))

1861

return REG_ESPACE;

1862

}

1863

}

1864

return re_node_set_add_intersect (dest_nodes, candidates,

1865

&state->inveclosure);

1866

}

1867

1868

static reg_errcode_t

1869

internal_function

1870

sub_epsilon_src_nodes (const re_dfa_t *dfa, int node, re_node_set *dest_nodes,

1871

const re_node_set *candidates)

1872

{

1873

int ecl_idx;

1874

reg_errcode_t err;

1875

re_node_set *inv_eclosure = dfa->inveclosures + node;

1876

re_node_set except_nodes;

1877

re_node_set_init_empty (&except_nodes);

1878

for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)

1879

{

1880

int cur_node = inv_eclosure->elems[ecl_idx];

1881

if (cur_node == node)

1882

continue;

1883

if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))

1884

{

1885

int edst1 = dfa->edests[cur_node].elems[0];

1886

int edst2 = ((dfa->edests[cur_node].nelem > 1)

1887

? dfa->edests[cur_node].elems[1] : -1);

1888

if ((!re_node_set_contains (inv_eclosure, edst1)

1889

&& re_node_set_contains (dest_nodes, edst1))

1890

|| (edst2 > 0

1891

&& !re_node_set_contains (inv_eclosure, edst2)

1892

&& re_node_set_contains (dest_nodes, edst2)))

1893

{

1894

err = re_node_set_add_intersect (&except_nodes, candidates,

1895

dfa->inveclosures + cur_node);

1896

if (BE (err != REG_NOERROR, 0))

1897

{

1898

re_node_set_free (&except_nodes);

1899

return err;

1900

}

1901

}

1902

}

1903

}

1904

for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)

1905

{

1906

int cur_node = inv_eclosure->elems[ecl_idx];

1907

if (!re_node_set_contains (&except_nodes, cur_node))

1908

{

1909

int idx = re_node_set_contains (dest_nodes, cur_node) - 1;

1910

re_node_set_remove_at (dest_nodes, idx);

1911

}

1912

}

1913

re_node_set_free (&except_nodes);

1914

return REG_NOERROR;

1915

}

1916

1917

static int

1918

internal_function

1919

check_dst_limits (const re_match_context_t *mctx, re_node_set *limits,

1920

int dst_node, int dst_idx, int src_node, int src_idx)

1921

{

1922

const re_dfa_t *const dfa = mctx->dfa;

1923

int lim_idx, src_pos, dst_pos;

1924

1925

int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);

1926

int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);

1927

for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)

1928

{

1929

int subexp_idx;

1930

struct re_backref_cache_entry *ent;

1931

ent = mctx->bkref_ents + limits->elems[lim_idx];

1932

subexp_idx = dfa->nodes[ent->node].opr.idx;

1933

1934

dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],

1935

subexp_idx, dst_node, dst_idx,

1936

dst_bkref_idx);

1937

src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],

1938

subexp_idx, src_node, src_idx,

1939

src_bkref_idx);

1940

1941

/* In case of:

1942

<src> <dst> ( <subexp> )

1943

( <subexp> ) <src> <dst>

1944

( <subexp1> <src> <subexp2> <dst> <subexp3> ) */

1945

if (src_pos == dst_pos)

1946

continue; /* This is unrelated limitation. */

1947

else

1948

return 1;

1949

}

1950

return 0;

1951

}

1952

1953

static int

1954

internal_function

1955

check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,

1956

int subexp_idx, int from_node, int bkref_idx)

1957

{

1958

const re_dfa_t *const dfa = mctx->dfa;

1959

const re_node_set *eclosures = dfa->eclosures + from_node;

1960

int node_idx;

1961

1962

/* Else, we are on the boundary: examine the nodes on the epsilon

1963

closure. */

1964

for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)

1965

{

1966

int node = eclosures->elems[node_idx];

1967

switch (dfa->nodes[node].type)

1968

{

1969

case OP_BACK_REF:

1970

if (bkref_idx != -1)

1971

{

1972

struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;

1973

1974

{

1975

int dst, cpos;

1976

1977

if (ent->node != node)

1978

continue;

1979

1980

if (subexp_idx < BITSET_WORD_BITS

1981

&& !(ent->eps_reachable_subexps_map

1982

& ((bitset_word_t) 1 << subexp_idx)))

1983

continue;

1984

1985

/* Recurse trying to reach the OP_OPEN_SUBEXP and

1986

OP_CLOSE_SUBEXP cases below. But, if the

1987

destination node is the same node as the source

1988

node, don't recurse because it would cause an

1989

infinite loop: a regex that exhibits this behavior

1990

is ()\1*\1* */

1991

dst = dfa->edests[node].elems[0];

1992

if (dst == from_node)

1993

{

1994

if (boundaries & 1)

1995

return -1;

1996

else /* if (boundaries & 2) */

1997

return 0;

1998

}

1999

2000

cpos =

2001

check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,

2002

dst, bkref_idx);

2003

if (cpos == -1 /* && (boundaries & 1) */)

2004

return -1;

2005

if (cpos == 0 && (boundaries & 2))

2006

return 0;

2007

2008

if (subexp_idx < BITSET_WORD_BITS)

2009

ent->eps_reachable_subexps_map

2010

&= ~((bitset_word_t) 1 << subexp_idx);

2011

}

2012

while (ent++->more);

2013

}

2014

break;

2015

2016

case OP_OPEN_SUBEXP:

2017

if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)

2018

return -1;

2019

break;

2020

2021

case OP_CLOSE_SUBEXP:

2022

if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)

2023

return 0;

2024

break;

2025

2026

default:

2027

break;

2028

}

2029

}

2030

2031

return (boundaries & 2) ? 1 : 0;

2032

}

2033

2034

static int

2035

internal_function

2036

check_dst_limits_calc_pos (const re_match_context_t *mctx, int limit,

2037

int subexp_idx, int from_node, int str_idx,

2038

int bkref_idx)

2039

{

2040

struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;

2041

int boundaries;

2042

2043

/* If we are outside the range of the subexpression, return -1 or 1. */

2044

if (str_idx < lim->subexp_from)

2045

return -1;

2046

2047

if (lim->subexp_to < str_idx)

2048

return 1;

2049

2050

/* If we are within the subexpression, return 0. */

2051

boundaries = (str_idx == lim->subexp_from);

2052

boundaries |= (str_idx == lim->subexp_to) << 1;

2053

if (boundaries == 0)

2054

return 0;

2055

2056

/* Else, examine epsilon closure. */

2057

return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,

2058

from_node, bkref_idx);

2059

}

2060

2061

/* Check the limitations of sub expressions LIMITS, and remove the nodes

2062

which are against limitations from DEST_NODES. */

2063

2064

static reg_errcode_t

2065

internal_function

2066

check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,

2067

const re_node_set *candidates, re_node_set *limits,

2068

struct re_backref_cache_entry *bkref_ents, int str_idx)

2069

{

2070

reg_errcode_t err;

2071

int node_idx, lim_idx;

2072

2073

for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)

2074

{

2075

int subexp_idx;

2076

struct re_backref_cache_entry *ent;

2077

ent = bkref_ents + limits->elems[lim_idx];

2078

2079

if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)

2080

continue; /* This is unrelated limitation. */

2081

2082

subexp_idx = dfa->nodes[ent->node].opr.idx;

2083

if (ent->subexp_to == str_idx)

2084

{

2085

int ops_node = -1;

2086

int cls_node = -1;

2087

for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)

2088

{

2089

int node = dest_nodes->elems[node_idx];

2090

re_token_type_t type = dfa->nodes[node].type;

2091

if (type == OP_OPEN_SUBEXP

2092

&& subexp_idx == dfa->nodes[node].opr.idx)

2093

ops_node = node;

2094

else if (type == OP_CLOSE_SUBEXP

2095

&& subexp_idx == dfa->nodes[node].opr.idx)

2096

cls_node = node;

2097

}

2098

2099

/* Check the limitation of the open subexpression. */

2100

/* Note that (ent->subexp_to = str_idx != ent->subexp_from). */

2101

if (ops_node >= 0)

2102

{

2103

err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,

2104

candidates);

2105

if (BE (err != REG_NOERROR, 0))

2106

return err;

2107

}

2108

2109

/* Check the limitation of the close subexpression. */

2110

if (cls_node >= 0)

2111

for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)

2112

{

2113

int node = dest_nodes->elems[node_idx];

2114

if (!re_node_set_contains (dfa->inveclosures + node,

2115

cls_node)

2116

&& !re_node_set_contains (dfa->eclosures + node,

2117

cls_node))

2118

{

2119

/* It is against this limitation.

2120

Remove it form the current sifted state. */

2121

err = sub_epsilon_src_nodes (dfa, node, dest_nodes,

2122

candidates);

2123

if (BE (err != REG_NOERROR, 0))

2124

return err;

2125

--node_idx;

2126

}

2127

}

2128

}

2129

else /* (ent->subexp_to != str_idx) */

2130

{

2131

for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)

2132

{

2133

int node = dest_nodes->elems[node_idx];

2134

re_token_type_t type = dfa->nodes[node].type;

2135

if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)

2136

{

2137

if (subexp_idx != dfa->nodes[node].opr.idx)

2138

continue;

2139

/* It is against this limitation.

2140

Remove it form the current sifted state. */

2141

err = sub_epsilon_src_nodes (dfa, node, dest_nodes,

2142

candidates);

2143

if (BE (err != REG_NOERROR, 0))

2144

return err;

2145

}

2146

}

2147

}

2148

}

2149

return REG_NOERROR;

2150

}

2151

2152

static reg_errcode_t

2153

internal_function __attribute_warn_unused_result__

2154

sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,

2155

int str_idx, const re_node_set *candidates)

2156

{

2157

const re_dfa_t *const dfa = mctx->dfa;

2158

reg_errcode_t err;

2159

int node_idx, node;

2160

re_sift_context_t local_sctx;

2161

int first_idx = search_cur_bkref_entry (mctx, str_idx);

2162

2163

if (first_idx == -1)

2164

return REG_NOERROR;

2165

2166

local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */

2167

2168

for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)

2169

{

2170

int enabled_idx;

2171

re_token_type_t type;

2172

struct re_backref_cache_entry *entry;

2173

node = candidates->elems[node_idx];

2174

type = dfa->nodes[node].type;

2175

/* Avoid infinite loop for the REs like "()\1+". */

2176

if (node == sctx->last_node && str_idx == sctx->last_str_idx)

2177

continue;

2178

if (type != OP_BACK_REF)

2179

continue;

2180

2181

entry = mctx->bkref_ents + first_idx;

2182

enabled_idx = first_idx;

2183

2184

{

2185

int subexp_len;

2186

int to_idx;

2187

int dst_node;

2188

int ret;

2189

re_dfastate_t *cur_state;

2190

2191

if (entry->node != node)

2192

continue;

2193

subexp_len = entry->subexp_to - entry->subexp_from;

2194

to_idx = str_idx + subexp_len;

2195

dst_node = (subexp_len ? dfa->nexts[node]

2196

: dfa->edests[node].elems[0]);

2197

2198

if (to_idx > sctx->last_str_idx

2199

|| sctx->sifted_states[to_idx] == NULL

2200

|| !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)

2201

|| check_dst_limits (mctx, &sctx->limits, node,

2202

str_idx, dst_node, to_idx))

2203

continue;

2204

2205

if (local_sctx.sifted_states == NULL)

2206

{

2207

local_sctx = *sctx;

2208

err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);

2209

if (BE (err != REG_NOERROR, 0))

2210

goto free_return;

2211

}

2212

local_sctx.last_node = node;

2213

local_sctx.last_str_idx = str_idx;

2214

ret = re_node_set_insert (&local_sctx.limits, enabled_idx);

2215

if (BE (ret < 0, 0))

2216

{

2217

err = REG_ESPACE;

2218

goto free_return;

2219

}

2220

cur_state = local_sctx.sifted_states[str_idx];

2221

err = sift_states_backward (mctx, &local_sctx);

2222

if (BE (err != REG_NOERROR, 0))

2223

goto free_return;

2224

if (sctx->limited_states != NULL)

2225

{

2226

err = merge_state_array (dfa, sctx->limited_states,

2227

local_sctx.sifted_states,

2228

str_idx + 1);

2229

if (BE (err != REG_NOERROR, 0))

2230

goto free_return;

2231

}

2232

local_sctx.sifted_states[str_idx] = cur_state;

2233

re_node_set_remove (&local_sctx.limits, enabled_idx);

2234

2235

/* mctx->bkref_ents may have changed, reload the pointer. */

2236

entry = mctx->bkref_ents + enabled_idx;

2237

}

2238

while (enabled_idx++, entry++->more);

2239

}

2240

err = REG_NOERROR;

2241

free_return:

2242

if (local_sctx.sifted_states != NULL)

2243

{

2244

re_node_set_free (&local_sctx.limits);

2245

}

2246

2247

return err;

2248

}

2249

2250

2251

#ifdef RE_ENABLE_I18N

2252

static int

2253

internal_function

2254

sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,

2255

int node_idx, int str_idx, int max_str_idx)

2256

{

2257

const re_dfa_t *const dfa = mctx->dfa;

2258

int naccepted;

2259

/* Check the node can accept `multi byte'. */

2260

naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);

2261

if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&

2262

!STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],

2263

dfa->nexts[node_idx]))

2264

/* The node can't accept the `multi byte', or the

2265

destination was already thrown away, then the node

2266

could't accept the current input `multi byte'. */

2267

naccepted = 0;

2268

/* Otherwise, it is sure that the node could accept

2269

`naccepted' bytes input. */

2270

return naccepted;

2271

}

2272

#endif /* RE_ENABLE_I18N */

2273

2274

2275

/* Functions for state transition. */

2276

2277

/* Return the next state to which the current state STATE will transit by

2278

accepting the current input byte, and update STATE_LOG if necessary.

2279

If STATE can accept a multibyte char/collating element/back reference

2280

update the destination of STATE_LOG. */

2281

2282

static re_dfastate_t *

2283

internal_function __attribute_warn_unused_result__

2284

transit_state (reg_errcode_t *err, re_match_context_t *mctx,

2285

re_dfastate_t *state)

2286

{

2287

re_dfastate_t **trtable;

2288

unsigned char ch;

2289

2290

#ifdef RE_ENABLE_I18N

2291

/* If the current state can accept multibyte. */

2292

if (BE (state->accept_mb, 0))

2293

{

2294

*err = transit_state_mb (mctx, state);

2295

if (BE (*err != REG_NOERROR, 0))

2296

return NULL;

2297

}

2298

#endif /* RE_ENABLE_I18N */

2299

2300

/* Then decide the next state with the single byte. */

2301

#if 0

2302

if (0)

2303

/* don't use transition table */

2304

return transit_state_sb (err, mctx, state);

2305

#endif

2306

2307

/* Use transition table */

2308

ch = re_string_fetch_byte (&mctx->input);

2309

for (;;)

2310

{

2311

trtable = state->trtable;

2312

if (BE (trtable != NULL, 1))

2313

return trtable[ch];

2314

2315

trtable = state->word_trtable;

2316

if (BE (trtable != NULL, 1))

2317

{

2318

unsigned int context;

2319

context

2320

= re_string_context_at (&mctx->input,

2321

re_string_cur_idx (&mctx->input) - 1,

2322

mctx->eflags);

2323

if (IS_WORD_CONTEXT (context))

2324

return trtable[ch + SBC_MAX];

2325

else

2326

return trtable[ch];

2327

}

2328

2329

if (!build_trtable (mctx->dfa, state))

2330

{

2331

*err = REG_ESPACE;

2332

return NULL;

2333

}

2334

2335

/* Retry, we now have a transition table. */

2336

}

2337

}

2338

2339

/* Update the state_log if we need */

2340

re_dfastate_t *

2341

internal_function

2342

merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,

2343

re_dfastate_t *next_state)

2344

{

2345

const re_dfa_t *const dfa = mctx->dfa;

2346

int cur_idx = re_string_cur_idx (&mctx->input);

2347

2348

if (cur_idx > mctx->state_log_top)

2349

{

2350

mctx->state_log[cur_idx] = next_state;

2351

mctx->state_log_top = cur_idx;

2352

}

2353

else if (mctx->state_log[cur_idx] == 0)

2354

{

2355

mctx->state_log[cur_idx] = next_state;

2356

}

2357

else

2358

{

2359

re_dfastate_t *pstate;

2360

unsigned int context;

2361

re_node_set next_nodes, *log_nodes, *table_nodes = NULL;

2362

/* If (state_log[cur_idx] != 0), it implies that cur_idx is

2363

the destination of a multibyte char/collating element/

2364

back reference. Then the next state is the union set of

2365

these destinations and the results of the transition table. */

2366

pstate = mctx->state_log[cur_idx];

2367

log_nodes = pstate->entrance_nodes;

2368

if (next_state != NULL)

2369

{

2370

table_nodes = next_state->entrance_nodes;

2371

*err = re_node_set_init_union (&next_nodes, table_nodes,

2372

log_nodes);

2373

if (BE (*err != REG_NOERROR, 0))

2374

return NULL;

2375

}

2376

else

2377

next_nodes = *log_nodes;

2378

/* Note: We already add the nodes of the initial state,

2379

then we don't need to add them here. */

2380

2381

context = re_string_context_at (&mctx->input,

2382

re_string_cur_idx (&mctx->input) - 1,

2383

mctx->eflags);

2384

next_state = mctx->state_log[cur_idx]

2385

= re_acquire_state_context (err, dfa, &next_nodes, context);

2386

/* We don't need to check errors here, since the return value of

2387

this function is next_state and ERR is already set. */

2388

2389

if (table_nodes != NULL)

2390

re_node_set_free (&next_nodes);

2391

}

2392

2393

if (BE (dfa->nbackref, 0) && next_state != NULL)

2394

{

2395

/* Check OP_OPEN_SUBEXP in the current state in case that we use them

2396

later. We must check them here, since the back references in the

2397

next state might use them. */

2398

*err = check_subexp_matching_top (mctx, &next_state->nodes,

2399

cur_idx);

2400

if (BE (*err != REG_NOERROR, 0))

2401

return NULL;

2402

2403

/* If the next state has back references. */

2404

if (next_state->has_backref)

2405

{

2406

*err = transit_state_bkref (mctx, &next_state->nodes);

2407

if (BE (*err != REG_NOERROR, 0))

2408

return NULL;

2409

next_state = mctx->state_log[cur_idx];

2410

}

2411

}

2412

2413

return next_state;

2414

}

2415

2416

/* Skip bytes in the input that correspond to part of a

2417

multi-byte match, then look in the log for a state

2418

from which to restart matching. */

2419

re_dfastate_t *

2420

internal_function

2421

find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)

2422

{

2423

re_dfastate_t *cur_state;

2424

2425

{

2426

int max = mctx->state_log_top;

2427

int cur_str_idx = re_string_cur_idx (&mctx->input);

2428

2429

2430

{

2431

if (++cur_str_idx > max)

2432

return NULL;

2433

re_string_skip_bytes (&mctx->input, 1);

2434

}

2435

while (mctx->state_log[cur_str_idx] == NULL);

2436

2437

cur_state = merge_state_with_log (err, mctx, NULL);

2438

}

2439

while (*err == REG_NOERROR && cur_state == NULL);

2440

return cur_state;

2441

}

2442

2443

/* Helper functions for transit_state. */

2444

2445

/* From the node set CUR_NODES, pick up the nodes whose types are

2446

OP_OPEN_SUBEXP and which have corresponding back references in the regular

2447

expression. And register them to use them later for evaluating the

2448

correspoding back references. */

2449

2450

static reg_errcode_t

2451

internal_function

2452

check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,

2453

int str_idx)

2454

{

2455

const re_dfa_t *const dfa = mctx->dfa;

2456

int node_idx;

2457

reg_errcode_t err;

2458

2459

/* TODO: This isn't efficient.

2460

Because there might be more than one nodes whose types are

2461

OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all

2462

nodes.

2463

E.g. RE: (a){2} */

2464

for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)

2465

{

2466

int node = cur_nodes->elems[node_idx];

2467

if (dfa->nodes[node].type == OP_OPEN_SUBEXP

2468

&& dfa->nodes[node].opr.idx < BITSET_WORD_BITS

2469

&& (dfa->used_bkref_map

2470

& ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))

2471

{

2472

err = match_ctx_add_subtop (mctx, node, str_idx);

2473

if (BE (err != REG_NOERROR, 0))

2474

return err;

2475

}

2476

}

2477

return REG_NOERROR;

2478

}

2479

2480

#if 0

2481

/* Return the next state to which the current state STATE will transit by

2482

accepting the current input byte. */

2483

2484

static re_dfastate_t *

2485

transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,

2486

re_dfastate_t *state)

2487

{

2488

const re_dfa_t *const dfa = mctx->dfa;

2489

re_node_set next_nodes;

2490

re_dfastate_t *next_state;

2491

int node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);

2492

unsigned int context;

2493

2494

*err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);

2495

if (BE (*err != REG_NOERROR, 0))

2496

return NULL;

2497

for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)

2498

{

2499

int cur_node = state->nodes.elems[node_cnt];

2500

if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))

2501

{

2502

*err = re_node_set_merge (&next_nodes,

2503

dfa->eclosures + dfa->nexts[cur_node]);

2504

if (BE (*err != REG_NOERROR, 0))

2505

{

2506

re_node_set_free (&next_nodes);

2507

return NULL;

2508

}

2509

}

2510

}

2511

context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);

2512

next_state = re_acquire_state_context (err, dfa, &next_nodes, context);

2513

/* We don't need to check errors here, since the return value of

2514

this function is next_state and ERR is already set. */

2515

2516

re_node_set_free (&next_nodes);

2517

re_string_skip_bytes (&mctx->input, 1);

2518

return next_state;

2519

}

2520

#endif

2521

2522

#ifdef RE_ENABLE_I18N

2523

static reg_errcode_t

2524

internal_function

2525

transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)

2526

{

2527

const re_dfa_t *const dfa = mctx->dfa;

2528

reg_errcode_t err;

2529

int i;

2530

2531

for (i = 0; i < pstate->nodes.nelem; ++i)

2532

{

2533

re_node_set dest_nodes, *new_nodes;

2534

int cur_node_idx = pstate->nodes.elems[i];

2535

int naccepted, dest_idx;

2536

unsigned int context;

2537

re_dfastate_t *dest_state;

2538

2539

if (!dfa->nodes[cur_node_idx].accept_mb)

2540

continue;

2541

2542

if (dfa->nodes[cur_node_idx].constraint)

2543

{

2544

context = re_string_context_at (&mctx->input,

2545

re_string_cur_idx (&mctx->input),

2546

mctx->eflags);

2547

if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,

2548

context))

2549

continue;

2550

}

2551

2552

/* How many bytes the node can accept? */

2553

naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,

2554

re_string_cur_idx (&mctx->input));

2555

if (naccepted == 0)

2556

continue;

2557

2558

/* The node can accepts `naccepted' bytes. */

2559

dest_idx = re_string_cur_idx (&mctx->input) + naccepted;

2560

mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted

2561

: mctx->max_mb_elem_len);

2562

err = clean_state_log_if_needed (mctx, dest_idx);

2563

if (BE (err != REG_NOERROR, 0))

2564

return err;

2565

#ifdef DEBUG

2566

assert (dfa->nexts[cur_node_idx] != -1);

2567

#endif

2568

new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];

2569

2570

dest_state = mctx->state_log[dest_idx];

2571

if (dest_state == NULL)

2572

dest_nodes = *new_nodes;

2573

else

2574

{

2575

err = re_node_set_init_union (&dest_nodes,

2576

dest_state->entrance_nodes, new_nodes);

2577

if (BE (err != REG_NOERROR, 0))

2578

return err;

2579

}

2580

context = re_string_context_at (&mctx->input, dest_idx - 1,

2581

mctx->eflags);

2582

mctx->state_log[dest_idx]

2583

= re_acquire_state_context (&err, dfa, &dest_nodes, context);

2584

if (dest_state != NULL)

2585

re_node_set_free (&dest_nodes);

2586

if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))

2587

return err;

2588

}

2589

return REG_NOERROR;

2590

}

2591

#endif /* RE_ENABLE_I18N */

2592

2593

static reg_errcode_t

2594

internal_function

2595

transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)

2596

{

2597

const re_dfa_t *const dfa = mctx->dfa;

2598

reg_errcode_t err;

2599

int i;

2600

int cur_str_idx = re_string_cur_idx (&mctx->input);

2601

2602

for (i = 0; i < nodes->nelem; ++i)

2603

{

2604

int dest_str_idx, prev_nelem, bkc_idx;

2605

int node_idx = nodes->elems[i];

2606

unsigned int context;

2607

const re_token_t *node = dfa->nodes + node_idx;

2608

re_node_set *new_dest_nodes;

2609

2610

/* Check whether `node' is a backreference or not. */

2611

if (node->type != OP_BACK_REF)

2612

continue;

2613

2614

if (node->constraint)

2615

{

2616

context = re_string_context_at (&mctx->input, cur_str_idx,

2617

mctx->eflags);

2618

if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))

2619

continue;

2620

}

2621

2622

/* `node' is a backreference.

2623

Check the substring which the substring matched. */

2624

bkc_idx = mctx->nbkref_ents;

2625

err = get_subexp (mctx, node_idx, cur_str_idx);

2626

if (BE (err != REG_NOERROR, 0))

2627

goto free_return;

2628

2629

/* And add the epsilon closures (which is `new_dest_nodes') of

2630

the backreference to appropriate state_log. */

2631

#ifdef DEBUG

2632

assert (dfa->nexts[node_idx] != -1);

2633

#endif

2634

for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)

2635

{

2636

int subexp_len;

2637

re_dfastate_t *dest_state;

2638

struct re_backref_cache_entry *bkref_ent;

2639

bkref_ent = mctx->bkref_ents + bkc_idx;

2640

if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)

2641

continue;

2642

subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;

2643

new_dest_nodes = (subexp_len == 0

2644

? dfa->eclosures + dfa->edests[node_idx].elems[0]

2645

: dfa->eclosures + dfa->nexts[node_idx]);

2646

dest_str_idx = (cur_str_idx + bkref_ent->subexp_to

2647

- bkref_ent->subexp_from);

2648

context = re_string_context_at (&mctx->input, dest_str_idx - 1,

2649

mctx->eflags);

2650

dest_state = mctx->state_log[dest_str_idx];

2651

prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0

2652

: mctx->state_log[cur_str_idx]->nodes.nelem);

2653

/* Add `new_dest_node' to state_log. */

2654

if (dest_state == NULL)

2655

{

2656

mctx->state_log[dest_str_idx]

2657

= re_acquire_state_context (&err, dfa, new_dest_nodes,

2658

context);

2659

if (BE (mctx->state_log[dest_str_idx] == NULL

2660

&& err != REG_NOERROR, 0))

2661

goto free_return;

2662

}

2663

else

2664

{

2665

re_node_set dest_nodes;

2666

err = re_node_set_init_union (&dest_nodes,

2667

dest_state->entrance_nodes,

2668

new_dest_nodes);

2669

if (BE (err != REG_NOERROR, 0))

2670

{

2671

re_node_set_free (&dest_nodes);

2672

goto free_return;

2673

}

2674

mctx->state_log[dest_str_idx]

2675

= re_acquire_state_context (&err, dfa, &dest_nodes, context);

2676

re_node_set_free (&dest_nodes);

2677

if (BE (mctx->state_log[dest_str_idx] == NULL

2678

&& err != REG_NOERROR, 0))

2679

goto free_return;

2680

}

2681

/* We need to check recursively if the backreference can epsilon

2682

transit. */

2683

if (subexp_len == 0

2684

&& mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)

2685

{

2686

err = check_subexp_matching_top (mctx, new_dest_nodes,

2687

cur_str_idx);

2688

if (BE (err != REG_NOERROR, 0))

2689

goto free_return;

2690

err = transit_state_bkref (mctx, new_dest_nodes);

2691

if (BE (err != REG_NOERROR, 0))

2692

goto free_return;

2693

}

2694

}

2695

}

2696

err = REG_NOERROR;

2697

free_return:

2698

return err;

2699

}

2700

2701

/* Enumerate all the candidates which the backreference BKREF_NODE can match

2702

at BKREF_STR_IDX, and register them by match_ctx_add_entry().

2703

Note that we might collect inappropriate candidates here.

2704

However, the cost of checking them strictly here is too high, then we

2705

delay these checking for prune_impossible_nodes(). */

2706

2707

static reg_errcode_t

2708

internal_function __attribute_warn_unused_result__

2709

get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx)

2710

{

2711

const re_dfa_t *const dfa = mctx->dfa;

2712

int subexp_num, sub_top_idx;

2713

const char *buf = (const char *) re_string_get_buffer (&mctx->input);

2714

/* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */

2715

int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);

2716

if (cache_idx != -1)

2717

{

2718

const struct re_backref_cache_entry *entry

2719

= mctx->bkref_ents + cache_idx;

2720

2721

if (entry->node == bkref_node)

2722

return REG_NOERROR; /* We already checked it. */

2723

while (entry++->more);

2724

}

2725

2726

subexp_num = dfa->nodes[bkref_node].opr.idx;

2727

2728

/* For each sub expression */

2729

for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)

2730

{

2731

reg_errcode_t err;

2732

re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];

2733

re_sub_match_last_t *sub_last;

2734

int sub_last_idx, sl_str, bkref_str_off;

2735

2736

if (dfa->nodes[sub_top->node].opr.idx != subexp_num)

2737

continue; /* It isn't related. */

2738

2739

sl_str = sub_top->str_idx;

2740

bkref_str_off = bkref_str_idx;

2741

/* At first, check the last node of sub expressions we already

2742

evaluated. */

2743

for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)

2744

{

2745

int sl_str_diff;

2746

sub_last = sub_top->lasts[sub_last_idx];

2747

sl_str_diff = sub_last->str_idx - sl_str;

2748

/* The matched string by the sub expression match with the substring

2749

at the back reference? */

2750

if (sl_str_diff > 0)

2751

{

2752

if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))

2753

{

2754

/* Not enough chars for a successful match. */

2755

if (bkref_str_off + sl_str_diff > mctx->input.len)

2756

break;

2757

2758

err = clean_state_log_if_needed (mctx,

2759

bkref_str_off

2760

+ sl_str_diff);

2761

if (BE (err != REG_NOERROR, 0))

2762

return err;

2763

buf = (const char *) re_string_get_buffer (&mctx->input);

2764

}

2765

if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)

2766

/* We don't need to search this sub expression any more. */

2767

break;

2768

}

2769

bkref_str_off += sl_str_diff;

2770

sl_str += sl_str_diff;

2771

err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,

2772

bkref_str_idx);

2773

2774

/* Reload buf, since the preceding call might have reallocated

2775

the buffer. */

2776

buf = (const char *) re_string_get_buffer (&mctx->input);

2777

2778

if (err == REG_NOMATCH)

2779

continue;

2780

if (BE (err != REG_NOERROR, 0))

2781

return err;

2782

}

2783

2784

if (sub_last_idx < sub_top->nlasts)

2785

continue;

2786

if (sub_last_idx > 0)

2787

++sl_str;

2788

/* Then, search for the other last nodes of the sub expression. */

2789

for (; sl_str <= bkref_str_idx; ++sl_str)

2790

{

2791

int cls_node, sl_str_off;

2792

const re_node_set *nodes;

2793

sl_str_off = sl_str - sub_top->str_idx;

2794

/* The matched string by the sub expression match with the substring

2795

at the back reference? */

2796

if (sl_str_off > 0)

2797

{

2798

if (BE (bkref_str_off >= mctx->input.valid_len, 0))

2799

{

2800

/* If we are at the end of the input, we cannot match. */

2801

if (bkref_str_off >= mctx->input.len)

2802

break;

2803

2804

err = extend_buffers (mctx, bkref_str_off + 1);

2805

if (BE (err != REG_NOERROR, 0))

2806

return err;

2807

2808

buf = (const char *) re_string_get_buffer (&mctx->input);

2809

}

2810

if (buf [bkref_str_off++] != buf[sl_str - 1])

2811

break; /* We don't need to search this sub expression

2812

any more. */

2813

}

2814

if (mctx->state_log[sl_str] == NULL)

2815

continue;

2816

/* Does this state have a ')' of the sub expression? */

2817

nodes = &mctx->state_log[sl_str]->nodes;

2818

cls_node = find_subexp_node (dfa, nodes, subexp_num,

2819

OP_CLOSE_SUBEXP);

2820

if (cls_node == -1)

2821

continue; /* No. */

2822

if (sub_top->path == NULL)

2823

{

2824

sub_top->path = calloc (sizeof (state_array_t),

2825

sl_str - sub_top->str_idx + 1);

2826

if (sub_top->path == NULL)

2827

return REG_ESPACE;

2828

}

2829

/* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node

2830

in the current context? */

2831

err = check_arrival (mctx, sub_top->path, sub_top->node,

2832

sub_top->str_idx, cls_node, sl_str,

2833

OP_CLOSE_SUBEXP);

2834

if (err == REG_NOMATCH)

2835

continue;

2836

if (BE (err != REG_NOERROR, 0))

2837

return err;

2838

sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);

2839

if (BE (sub_last == NULL, 0))

2840

return REG_ESPACE;

2841

err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,

2842

bkref_str_idx);

2843

if (err == REG_NOMATCH)

2844

continue;

2845

}

2846

}

2847

return REG_NOERROR;

2848

}

2849

2850

/* Helper functions for get_subexp(). */

2851

2852

/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.

2853

If it can arrive, register the sub expression expressed with SUB_TOP

2854

and SUB_LAST. */

2855

2856

static reg_errcode_t

2857

internal_function

2858

get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,

2859

re_sub_match_last_t *sub_last, int bkref_node, int bkref_str)

2860

{

2861

reg_errcode_t err;

2862

int to_idx;

2863

/* Can the subexpression arrive the back reference? */

2864

err = check_arrival (mctx, &sub_last->path, sub_last->node,

2865

sub_last->str_idx, bkref_node, bkref_str,

2866

OP_OPEN_SUBEXP);

2867

if (err != REG_NOERROR)

2868

return err;

2869

err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,

2870

sub_last->str_idx);

2871

if (BE (err != REG_NOERROR, 0))

2872

return err;

2873

to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;

2874

return clean_state_log_if_needed (mctx, to_idx);

2875

}

2876

2877

/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.

2878

Search '(' if FL_OPEN, or search ')' otherwise.

2879

TODO: This function isn't efficient...

2880

Because there might be more than one nodes whose types are

2881

OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all

2882

nodes.

2883

E.g. RE: (a){2} */

2884

2885

static int

2886

internal_function

2887

find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,

2888

int subexp_idx, int type)

2889

{

2890

int cls_idx;

2891

for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)

2892

{

2893

int cls_node = nodes->elems[cls_idx];

2894

const re_token_t *node = dfa->nodes + cls_node;

2895

if (node->type == type

2896

&& node->opr.idx == subexp_idx)

2897

return cls_node;

2898

}

2899

return -1;

2900

}

2901

2902

/* Check whether the node TOP_NODE at TOP_STR can arrive to the node

2903

LAST_NODE at LAST_STR. We record the path onto PATH since it will be

2904

heavily reused.

2905

Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */

2906

2907

static reg_errcode_t

2908

internal_function __attribute_warn_unused_result__

2909

check_arrival (re_match_context_t *mctx, state_array_t *path, int top_node,

2910

int top_str, int last_node, int last_str, int type)

2911

{

2912

const re_dfa_t *const dfa = mctx->dfa;

2913

reg_errcode_t err = REG_NOERROR;

2914

int subexp_num, backup_cur_idx, str_idx, null_cnt;

2915

re_dfastate_t *cur_state = NULL;

2916

re_node_set *cur_nodes, next_nodes;

2917

re_dfastate_t **backup_state_log;

2918

unsigned int context;

2919

2920

subexp_num = dfa->nodes[top_node].opr.idx;

2921

/* Extend the buffer if we need. */

2922

if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))

2923

{

2924

re_dfastate_t **new_array;

2925

int old_alloc = path->alloc;

2926

path->alloc += last_str + mctx->max_mb_elem_len + 1;

2927

new_array = re_realloc (path->array, re_dfastate_t *, path->alloc);

2928

if (BE (new_array == NULL, 0))

2929

{

2930

path->alloc = old_alloc;

2931

return REG_ESPACE;

2932

}

2933

path->array = new_array;

2934

memset (new_array + old_alloc, '\0',

2935

sizeof (re_dfastate_t *) * (path->alloc - old_alloc));

2936

}

2937

2938

str_idx = path->next_idx ? path->next_idx : top_str;

2939

2940

/* Temporary modify MCTX. */

2941

backup_state_log = mctx->state_log;

2942

backup_cur_idx = mctx->input.cur_idx;

2943

mctx->state_log = path->array;

2944

mctx->input.cur_idx = str_idx;

2945

2946

/* Setup initial node set. */

2947

context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);

2948

if (str_idx == top_str)

2949

{

2950

err = re_node_set_init_1 (&next_nodes, top_node);

2951

if (BE (err != REG_NOERROR, 0))

2952

return err;

2953

err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);

2954

if (BE (err != REG_NOERROR, 0))

2955

{

2956

re_node_set_free (&next_nodes);

2957

return err;

2958

}

2959

}

2960

else

2961

{

2962

cur_state = mctx->state_log[str_idx];

2963

if (cur_state && cur_state->has_backref)

2964

{

2965

err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);

2966

if (BE (err != REG_NOERROR, 0))

2967

return err;

2968

}

2969

else

2970

re_node_set_init_empty (&next_nodes);

2971

}

2972

if (str_idx == top_str || (cur_state && cur_state->has_backref))

2973

{

2974

if (next_nodes.nelem)

2975

{

2976

err = expand_bkref_cache (mctx, &next_nodes, str_idx,

2977

subexp_num, type);

2978

if (BE (err != REG_NOERROR, 0))

2979

{

2980

re_node_set_free (&next_nodes);

2981

return err;

2982

}

2983

}

2984

cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);

2985

if (BE (cur_state == NULL && err != REG_NOERROR, 0))

2986

{

2987

re_node_set_free (&next_nodes);

2988

return err;

2989

}

2990

mctx->state_log[str_idx] = cur_state;

2991

}

2992

2993

for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)

2994

{

2995

re_node_set_empty (&next_nodes);

2996

if (mctx->state_log[str_idx + 1])

2997

{

2998

err = re_node_set_merge (&next_nodes,

2999

&mctx->state_log[str_idx + 1]->nodes);

3000

if (BE (err != REG_NOERROR, 0))

3001

{

3002

re_node_set_free (&next_nodes);

3003

return err;

3004

}

3005

}

3006

if (cur_state)

3007

{

3008

err = check_arrival_add_next_nodes (mctx, str_idx,

3009

&cur_state->non_eps_nodes,

3010

&next_nodes);

3011

if (BE (err != REG_NOERROR, 0))

3012

{

3013

re_node_set_free (&next_nodes);

3014

return err;

3015

}

3016

}

3017

++str_idx;

3018

if (next_nodes.nelem)

3019

{

3020

err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);

3021

if (BE (err != REG_NOERROR, 0))

3022

{

3023

re_node_set_free (&next_nodes);

3024

return err;

3025

}

3026

err = expand_bkref_cache (mctx, &next_nodes, str_idx,

3027

subexp_num, type);

3028

if (BE (err != REG_NOERROR, 0))

3029

{

3030

re_node_set_free (&next_nodes);

3031

return err;

3032

}

3033

}

3034

context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);

3035

cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);

3036

if (BE (cur_state == NULL && err != REG_NOERROR, 0))

3037

{

3038

re_node_set_free (&next_nodes);

3039

return err;

3040

}

3041

mctx->state_log[str_idx] = cur_state;

3042

null_cnt = cur_state == NULL ? null_cnt + 1 : 0;

3043

}

3044

re_node_set_free (&next_nodes);

3045

cur_nodes = (mctx->state_log[last_str] == NULL ? NULL

3046

: &mctx->state_log[last_str]->nodes);

3047

path->next_idx = str_idx;

3048

3049

/* Fix MCTX. */

3050

mctx->state_log = backup_state_log;

3051

mctx->input.cur_idx = backup_cur_idx;

3052

3053

/* Then check the current node set has the node LAST_NODE. */

3054

if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))

3055

return REG_NOERROR;

3056

3057

return REG_NOMATCH;

3058

}

3059

3060

/* Helper functions for check_arrival. */

3061

3062

/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them

3063

to NEXT_NODES.

3064

TODO: This function is similar to the functions transit_state*(),

3065

however this function has many additional works.

3066

Can't we unify them? */

3067

3068

static reg_errcode_t

3069

internal_function __attribute_warn_unused_result__

3070

check_arrival_add_next_nodes (re_match_context_t *mctx, int str_idx,

3071

re_node_set *cur_nodes, re_node_set *next_nodes)

3072

{

3073

const re_dfa_t *const dfa = mctx->dfa;

3074

int result;

3075

int cur_idx;

3076

#ifdef RE_ENABLE_I18N

3077

reg_errcode_t err = REG_NOERROR;

3078

#endif

3079

re_node_set union_set;

3080

re_node_set_init_empty (&union_set);

3081

for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)

3082

{

3083

int naccepted = 0;

3084

int cur_node = cur_nodes->elems[cur_idx];

3085

#ifdef DEBUG

3086

re_token_type_t type = dfa->nodes[cur_node].type;

3087

assert (!IS_EPSILON_NODE (type));

3088

#endif

3089

#ifdef RE_ENABLE_I18N

3090

/* If the node may accept `multi byte'. */

3091

if (dfa->nodes[cur_node].accept_mb)

3092

{

3093

naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,

3094

str_idx);

3095

if (naccepted > 1)

3096

{

3097

re_dfastate_t *dest_state;

3098

int next_node = dfa->nexts[cur_node];

3099

int next_idx = str_idx + naccepted;

3100

dest_state = mctx->state_log[next_idx];

3101

re_node_set_empty (&union_set);

3102

if (dest_state)

3103

{

3104

err = re_node_set_merge (&union_set, &dest_state->nodes);

3105

if (BE (err != REG_NOERROR, 0))

3106

{

3107

re_node_set_free (&union_set);

3108

return err;

3109

}

3110

}

3111

result = re_node_set_insert (&union_set, next_node);

3112

if (BE (result < 0, 0))

3113

{

3114

re_node_set_free (&union_set);

3115

return REG_ESPACE;

3116

}

3117

mctx->state_log[next_idx] = re_acquire_state (&err, dfa,

3118

&union_set);

3119

if (BE (mctx->state_log[next_idx] == NULL

3120

&& err != REG_NOERROR, 0))

3121

{

3122

re_node_set_free (&union_set);

3123

return err;

3124

}

3125

}

3126

}

3127

#endif /* RE_ENABLE_I18N */

3128

if (naccepted

3129

|| check_node_accept (mctx, dfa->nodes + cur_node, str_idx))

3130

{

3131

result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);

3132

if (BE (result < 0, 0))

3133

{

3134

re_node_set_free (&union_set);

3135

return REG_ESPACE;

3136

}

3137

}

3138

}

3139

re_node_set_free (&union_set);

3140

return REG_NOERROR;

3141

}

3142

3143

/* For all the nodes in CUR_NODES, add the epsilon closures of them to

3144

CUR_NODES, however exclude the nodes which are:

3145

- inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.

3146

- out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.

3147

3148

3149

static reg_errcode_t

3150

internal_function

3151

check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,

3152

int ex_subexp, int type)

3153

{

3154

reg_errcode_t err;

3155

int idx, outside_node;

3156

re_node_set new_nodes;

3157

#ifdef DEBUG

3158

assert (cur_nodes->nelem);

3159

#endif

3160

err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);

3161

if (BE (err != REG_NOERROR, 0))

3162

return err;

3163

/* Create a new node set NEW_NODES with the nodes which are epsilon

3164

closures of the node in CUR_NODES. */

3165

3166

for (idx = 0; idx < cur_nodes->nelem; ++idx)

3167

{

3168

int cur_node = cur_nodes->elems[idx];

3169

const re_node_set *eclosure = dfa->eclosures + cur_node;

3170

outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);

3171

if (outside_node == -1)

3172

{

3173

/* There are no problematic nodes, just merge them. */

3174

err = re_node_set_merge (&new_nodes, eclosure);

3175

if (BE (err != REG_NOERROR, 0))

3176

{

3177

re_node_set_free (&new_nodes);

3178

return err;

3179

}

3180

}

3181

else

3182

{

3183

/* There are problematic nodes, re-calculate incrementally. */

3184

err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,

3185

ex_subexp, type);

3186

if (BE (err != REG_NOERROR, 0))

3187

{

3188

re_node_set_free (&new_nodes);

3189

return err;

3190

}

3191

}

3192

}

3193

re_node_set_free (cur_nodes);

3194

*cur_nodes = new_nodes;

3195

return REG_NOERROR;

3196

}

3197

3198

/* Helper function for check_arrival_expand_ecl.

3199

Check incrementally the epsilon closure of TARGET, and if it isn't

3200

problematic append it to DST_NODES. */

3201

3202

static reg_errcode_t

3203

internal_function __attribute_warn_unused_result__

3204

check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,

3205

int target, int ex_subexp, int type)

3206

{

3207

int cur_node;

3208

for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)

3209

{

3210

int err;

3211

3212

if (dfa->nodes[cur_node].type == type

3213

&& dfa->nodes[cur_node].opr.idx == ex_subexp)

3214

{

3215

if (type == OP_CLOSE_SUBEXP)

3216

{

3217

err = re_node_set_insert (dst_nodes, cur_node);

3218

if (BE (err == -1, 0))

3219

return REG_ESPACE;

3220

}

3221

break;

3222

}

3223

err = re_node_set_insert (dst_nodes, cur_node);

3224

if (BE (err == -1, 0))

3225

return REG_ESPACE;

3226

if (dfa->edests[cur_node].nelem == 0)

3227

break;

3228

if (dfa->edests[cur_node].nelem == 2)

3229

{

3230

err = check_arrival_expand_ecl_sub (dfa, dst_nodes,

3231

dfa->edests[cur_node].elems[1],

3232

ex_subexp, type);

3233

if (BE (err != REG_NOERROR, 0))

3234

return err;

3235

}

3236

cur_node = dfa->edests[cur_node].elems[0];

3237

}

3238

return REG_NOERROR;

3239

}

3240

3241

3242

/* For all the back references in the current state, calculate the

3243

destination of the back references by the appropriate entry

3244

in MCTX->BKREF_ENTS. */

3245

3246

static reg_errcode_t

3247

internal_function __attribute_warn_unused_result__

3248

expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,

3249

int cur_str, int subexp_num, int type)

3250

{

3251

const re_dfa_t *const dfa = mctx->dfa;

3252

reg_errcode_t err;

3253

int cache_idx_start = search_cur_bkref_entry (mctx, cur_str);

3254

struct re_backref_cache_entry *ent;

3255

3256

if (cache_idx_start == -1)

3257

return REG_NOERROR;

3258

3259

restart:

3260

ent = mctx->bkref_ents + cache_idx_start;

3261

3262

{

3263

int to_idx, next_node;

3264

3265

/* Is this entry ENT is appropriate? */

3266

if (!re_node_set_contains (cur_nodes, ent->node))

3267

continue; /* No. */

3268

3269

to_idx = cur_str + ent->subexp_to - ent->subexp_from;

3270

/* Calculate the destination of the back reference, and append it

3271

to MCTX->STATE_LOG. */

3272

if (to_idx == cur_str)

3273

{

3274

/* The backreference did epsilon transit, we must re-check all the

3275

node in the current state. */

3276

re_node_set new_dests;

3277

reg_errcode_t err2, err3;

3278

next_node = dfa->edests[ent->node].elems[0];

3279

if (re_node_set_contains (cur_nodes, next_node))

3280

continue;

3281

err = re_node_set_init_1 (&new_dests, next_node);

3282

err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);

3283

err3 = re_node_set_merge (cur_nodes, &new_dests);

3284

re_node_set_free (&new_dests);

3285

if (BE (err != REG_NOERROR || err2 != REG_NOERROR

3286

|| err3 != REG_NOERROR, 0))

3287

{

3288

err = (err != REG_NOERROR ? err

3289

: (err2 != REG_NOERROR ? err2 : err3));

3290

return err;

3291

}

3292

/* TODO: It is still inefficient... */

3293

goto restart;

3294

}

3295

else

3296

{

3297

re_node_set union_set;

3298

next_node = dfa->nexts[ent->node];

3299

if (mctx->state_log[to_idx])

3300

{

3301

int ret;

3302

if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,

3303

next_node))

3304

continue;

3305

err = re_node_set_init_copy (&union_set,

3306

&mctx->state_log[to_idx]->nodes);

3307

ret = re_node_set_insert (&union_set, next_node);

3308

if (BE (err != REG_NOERROR || ret < 0, 0))

3309

{

3310

re_node_set_free (&union_set);

3311

err = err != REG_NOERROR ? err : REG_ESPACE;

3312

return err;

3313

}

3314

}

3315

else

3316

{

3317

err = re_node_set_init_1 (&union_set, next_node);

3318

if (BE (err != REG_NOERROR, 0))

3319

return err;

3320

}

3321

mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);

3322

re_node_set_free (&union_set);

3323

if (BE (mctx->state_log[to_idx] == NULL

3324

&& err != REG_NOERROR, 0))

3325

return err;

3326

}

3327

}

3328

while (ent++->more);

3329

return REG_NOERROR;

3330

}

3331

3332

/* Build transition table for the state.

3333

Return 1 if succeeded, otherwise return NULL. */

3334

3335

static int

3336

internal_function

3337

build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)

3338

{

3339

reg_errcode_t err;

3340

int i, j, ch, need_word_trtable = 0;

3341

bitset_word_t elem, mask;

3342

bool dests_node_malloced = false;

3343

bool dest_states_malloced = false;

3344

int ndests; /* Number of the destination states from `state'. */

3345

re_dfastate_t **trtable;

3346

re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;

3347

re_node_set follows, *dests_node;

3348

bitset_t *dests_ch;

3349

bitset_t acceptable;

3350

3351

struct dests_alloc

3352

{

3353

re_node_set dests_node[SBC_MAX];

3354

bitset_t dests_ch[SBC_MAX];

3355

} *dests_alloc;

3356

3357

/* We build DFA states which corresponds to the destination nodes

3358

from `state'. `dests_node[i]' represents the nodes which i-th

3359

destination state contains, and `dests_ch[i]' represents the

3360

characters which i-th destination state accepts. */

3361

#ifdef HAVE_ALLOCA

3362

if (__libc_use_alloca (sizeof (struct dests_alloc)))

3363

dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));

3364

else

3365

#endif

3366

{

3367

dests_alloc = re_malloc (struct dests_alloc, 1);

3368

if (BE (dests_alloc == NULL, 0))

3369

return 0;

3370

dests_node_malloced = true;

3371

}

3372

dests_node = dests_alloc->dests_node;

3373

dests_ch = dests_alloc->dests_ch;

3374

3375

/* Initialize transiton table. */

3376

state->word_trtable = state->trtable = NULL;

3377

3378

/* At first, group all nodes belonging to `state' into several

3379

destinations. */

3380

ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);

3381

if (BE (ndests <= 0, 0))

3382

{

3383

if (dests_node_malloced)

3384

free (dests_alloc);

3385

/* Return 0 in case of an error, 1 otherwise. */

3386

if (ndests == 0)

3387

{

3388

state->trtable = (re_dfastate_t **)

3389

calloc (sizeof (re_dfastate_t *), SBC_MAX);

3390

if (BE (state->trtable == NULL, 0))

3391

return 0;

3392

return 1;

3393

}

3394

return 0;

3395

}

3396

3397

err = re_node_set_alloc (&follows, ndests + 1);

3398

if (BE (err != REG_NOERROR, 0))

3399

goto out_free;

3400

3401

/* Avoid arithmetic overflow in size calculation. */

3402

if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)

3403

/ (3 * sizeof (re_dfastate_t *)))

3404

< ndests),

3405

0))

3406

goto out_free;

3407

3408

#ifdef HAVE_ALLOCA

3409

if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX

3410

+ ndests * 3 * sizeof (re_dfastate_t *)))

3411

dest_states = (re_dfastate_t **)

3412

alloca (ndests * 3 * sizeof (re_dfastate_t *));

3413

else

3414

#endif

3415

{

3416

dest_states = (re_dfastate_t **)

3417

malloc (ndests * 3 * sizeof (re_dfastate_t *));

3418

if (BE (dest_states == NULL, 0))

3419

{

3420

out_free:

3421

if (dest_states_malloced)

3422

free (dest_states);

3423

re_node_set_free (&follows);

3424

for (i = 0; i < ndests; ++i)

3425

re_node_set_free (dests_node + i);

3426

if (dests_node_malloced)

3427

free (dests_alloc);

3428

return 0;

3429

}

3430

dest_states_malloced = true;

3431

}

3432

dest_states_word = dest_states + ndests;

3433

dest_states_nl = dest_states_word + ndests;

3434

bitset_empty (acceptable);

3435

3436

/* Then build the states for all destinations. */

3437

for (i = 0; i < ndests; ++i)

3438

{

3439

int next_node;

3440

re_node_set_empty (&follows);

3441

/* Merge the follows of this destination states. */

3442

for (j = 0; j < dests_node[i].nelem; ++j)

3443

{

3444

next_node = dfa->nexts[dests_node[i].elems[j]];

3445

if (next_node != -1)

3446

{

3447

err = re_node_set_merge (&follows, dfa->eclosures + next_node);

3448

if (BE (err != REG_NOERROR, 0))

3449

goto out_free;

3450

}

3451

}

3452

dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);

3453

if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))

3454

goto out_free;

3455

/* If the new state has context constraint,

3456

build appropriate states for these contexts. */

3457

if (dest_states[i]->has_constraint)

3458

{

3459

dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,

3460

CONTEXT_WORD);

3461

if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))

3462

goto out_free;

3463

3464

if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)

3465

need_word_trtable = 1;

3466

3467

dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,

3468

CONTEXT_NEWLINE);

3469

if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))

3470

goto out_free;

3471

}

3472

else

3473

{

3474

dest_states_word[i] = dest_states[i];

3475

dest_states_nl[i] = dest_states[i];

3476

}

3477

bitset_merge (acceptable, dests_ch[i]);

3478

}

3479

3480

if (!BE (need_word_trtable, 0))

3481

{

3482

/* We don't care about whether the following character is a word

3483

character, or we are in a single-byte character set so we can

3484

discern by looking at the character code: allocate a

3485

256-entry transition table. */

3486

trtable = state->trtable =

3487

(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);

3488

if (BE (trtable == NULL, 0))

3489

goto out_free;

3490

3491

/* For all characters ch...: */

3492

for (i = 0; i < BITSET_WORDS; ++i)

3493

for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;

3494

elem;

3495

mask <<= 1, elem >>= 1, ++ch)

3496

if (BE (elem & 1, 0))

3497

{

3498

/* There must be exactly one destination which accepts

3499

character ch. See group_nodes_into_DFAstates. */

3500

for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)

3501

;

3502

3503

/* j-th destination accepts the word character ch. */

3504

if (dfa->word_char[i] & mask)

3505

trtable[ch] = dest_states_word[j];

3506

else

3507

trtable[ch] = dest_states[j];

3508

}

3509

}

3510

else

3511

{

3512

/* We care about whether the following character is a word

3513

character, and we are in a multi-byte character set: discern

3514

by looking at the character code: build two 256-entry

3515

transition tables, one starting at trtable[0] and one

3516

starting at trtable[SBC_MAX]. */

3517

trtable = state->word_trtable =

3518

(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);

3519

if (BE (trtable == NULL, 0))

3520

goto out_free;

3521

3522

/* For all characters ch...: */

3523

for (i = 0; i < BITSET_WORDS; ++i)

3524

for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;

3525

elem;

3526

mask <<= 1, elem >>= 1, ++ch)

3527

if (BE (elem & 1, 0))

3528

{

3529

/* There must be exactly one destination which accepts

3530

character ch. See group_nodes_into_DFAstates. */

3531

for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)

3532

;

3533

3534

/* j-th destination accepts the word character ch. */

3535

trtable[ch] = dest_states[j];

3536

trtable[ch + SBC_MAX] = dest_states_word[j];

3537

}

3538

}

3539

3540

/* new line */

3541

if (bitset_contain (acceptable, NEWLINE_CHAR))

3542

{

3543

/* The current state accepts newline character. */

3544

for (j = 0; j < ndests; ++j)

3545

if (bitset_contain (dests_ch[j], NEWLINE_CHAR))

3546

{

3547

/* k-th destination accepts newline character. */

3548

trtable[NEWLINE_CHAR] = dest_states_nl[j];

3549

if (need_word_trtable)

3550

trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];

3551

/* There must be only one destination which accepts

3552

newline. See group_nodes_into_DFAstates. */

3553

break;

3554

}

3555

}

3556

3557

if (dest_states_malloced)

3558

free (dest_states);

3559

3560

re_node_set_free (&follows);

3561

for (i = 0; i < ndests; ++i)

3562

re_node_set_free (dests_node + i);

3563

3564

if (dests_node_malloced)

3565

free (dests_alloc);

3566

3567

return 1;

3568

}

3569

3570

/* Group all nodes belonging to STATE into several destinations.

3571

Then for all destinations, set the nodes belonging to the destination

3572

to DESTS_NODE[i] and set the characters accepted by the destination

3573

to DEST_CH[i]. This function return the number of destinations. */

3574

3575

static int

3576

internal_function

3577

group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,

3578

re_node_set *dests_node, bitset_t *dests_ch)

3579

{

3580

reg_errcode_t err;

3581

int result;

3582

int i, j, k;

3583

int ndests; /* Number of the destinations from `state'. */

3584

bitset_t accepts; /* Characters a node can accept. */

3585

const re_node_set *cur_nodes = &state->nodes;

3586

bitset_empty (accepts);

3587

ndests = 0;

3588

3589

/* For all the nodes belonging to `state', */

3590

for (i = 0; i < cur_nodes->nelem; ++i)

3591

{

3592

re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];

3593

re_token_type_t type = node->type;

3594

unsigned int constraint = node->constraint;

3595

3596

/* Enumerate all single byte character this node can accept. */

3597

if (type == CHARACTER)

3598

bitset_set (accepts, node->opr.c);

3599

else if (type == SIMPLE_BRACKET)

3600

{

3601

bitset_merge (accepts, node->opr.sbcset);

3602

}

3603

else if (type == OP_PERIOD)

3604

{

3605

#ifdef RE_ENABLE_I18N

3606

if (dfa->mb_cur_max > 1)

3607

bitset_merge (accepts, dfa->sb_char);

3608

else

3609

#endif

3610

bitset_set_all (accepts);

3611

if (!(dfa->syntax & RE_DOT_NEWLINE))

3612

bitset_clear (accepts, '\n');

3613

if (dfa->syntax & RE_DOT_NOT_NULL)

3614

bitset_clear (accepts, '\0');

3615

}

3616

#ifdef RE_ENABLE_I18N

3617

else if (type == OP_UTF8_PERIOD)

3618

{

3619

memset (accepts, '\xff', sizeof (bitset_t) / 2);

3620

if (!(dfa->syntax & RE_DOT_NEWLINE))

3621

bitset_clear (accepts, '\n');

3622

if (dfa->syntax & RE_DOT_NOT_NULL)

3623

bitset_clear (accepts, '\0');

3624

}

3625

#endif

3626

else

3627

continue;

3628

3629

/* Check the `accepts' and sift the characters which are not

3630

match it the context. */

3631

if (constraint)

3632

{

3633

if (constraint & NEXT_NEWLINE_CONSTRAINT)

3634

{

3635

bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);

3636

bitset_empty (accepts);

3637

if (accepts_newline)

3638

bitset_set (accepts, NEWLINE_CHAR);

3639

else

3640

continue;

3641

}

3642

if (constraint & NEXT_ENDBUF_CONSTRAINT)

3643

{

3644

bitset_empty (accepts);

3645

continue;

3646

}

3647

3648

if (constraint & NEXT_WORD_CONSTRAINT)

3649

{

3650

bitset_word_t any_set = 0;

3651

if (type == CHARACTER && !node->word_char)

3652

{

3653

bitset_empty (accepts);

3654

continue;

3655

}

3656

#ifdef RE_ENABLE_I18N

3657

if (dfa->mb_cur_max > 1)

3658

for (j = 0; j < BITSET_WORDS; ++j)

3659

any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));

3660

else

3661

#endif

3662

for (j = 0; j < BITSET_WORDS; ++j)

3663

any_set |= (accepts[j] &= dfa->word_char[j]);

3664

if (!any_set)

3665

continue;

3666

}

3667

if (constraint & NEXT_NOTWORD_CONSTRAINT)

3668

{

3669

bitset_word_t any_set = 0;

3670

if (type == CHARACTER && node->word_char)

3671

{

3672

bitset_empty (accepts);

3673

continue;

3674

}

3675

#ifdef RE_ENABLE_I18N

3676

if (dfa->mb_cur_max > 1)

3677

for (j = 0; j < BITSET_WORDS; ++j)

3678

any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));

3679

else

3680

#endif

3681

for (j = 0; j < BITSET_WORDS; ++j)

3682

any_set |= (accepts[j] &= ~dfa->word_char[j]);

3683

if (!any_set)

3684

continue;

3685

}

3686

}

3687

3688

/* Then divide `accepts' into DFA states, or create a new

3689

state. Above, we make sure that accepts is not empty. */

3690

for (j = 0; j < ndests; ++j)

3691

{

3692

bitset_t intersec; /* Intersection sets, see below. */

3693

bitset_t remains;

3694

/* Flags, see below. */

3695

bitset_word_t has_intersec, not_subset, not_consumed;

3696

3697

/* Optimization, skip if this state doesn't accept the character. */

3698

if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))

3699

continue;

3700

3701

/* Enumerate the intersection set of this state and `accepts'. */

3702

has_intersec = 0;

3703

for (k = 0; k < BITSET_WORDS; ++k)

3704

has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];

3705

/* And skip if the intersection set is empty. */

3706

if (!has_intersec)

3707

continue;

3708

3709

/* Then check if this state is a subset of `accepts'. */

3710

not_subset = not_consumed = 0;

3711

for (k = 0; k < BITSET_WORDS; ++k)

3712

{

3713

not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];

3714

not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];

3715

}

3716

3717

/* If this state isn't a subset of `accepts', create a

3718

new group state, which has the `remains'. */

3719

if (not_subset)

3720

{

3721

bitset_copy (dests_ch[ndests], remains);

3722

bitset_copy (dests_ch[j], intersec);

3723

err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);

3724

if (BE (err != REG_NOERROR, 0))

3725

goto error_return;

3726

++ndests;

3727

}

3728

3729

/* Put the position in the current group. */

3730

result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);

3731

if (BE (result < 0, 0))

3732

goto error_return;

3733

3734

/* If all characters are consumed, go to next node. */

3735

if (!not_consumed)

3736

break;

3737

}

3738

/* Some characters remain, create a new group. */

3739

if (j == ndests)

3740

{

3741

bitset_copy (dests_ch[ndests], accepts);

3742

err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);

3743

if (BE (err != REG_NOERROR, 0))

3744

goto error_return;

3745

++ndests;

3746

bitset_empty (accepts);

3747

}

3748

}

3749

return ndests;

3750

error_return:

3751

for (j = 0; j < ndests; ++j)

3752

re_node_set_free (dests_node + j);

3753

return -1;

3754

}

3755

3756

#ifdef RE_ENABLE_I18N

3757

/* Check how many bytes the node `dfa->nodes[node_idx]' accepts.

3758

Return the number of the bytes the node accepts.

3759

STR_IDX is the current index of the input string.

3760

3761

This function handles the nodes which can accept one character, or

3762

one collating element like '.', '[a-z]', opposite to the other nodes

3763

can only accept one byte. */

3764

3765

static int

3766

internal_function

3767

check_node_accept_bytes (const re_dfa_t *dfa, int node_idx,

3768

const re_string_t *input, int str_idx)

3769

{

3770

const re_token_t *node = dfa->nodes + node_idx;

3771

int char_len, elem_len;

3772

int i;

3773

wint_t wc;

3774

3775

if (BE (node->type == OP_UTF8_PERIOD, 0))

3776

{

3777

unsigned char c = re_string_byte_at (input, str_idx), d;

3778

if (BE (c < 0xc2, 1))

3779

return 0;

3780

3781

if (str_idx + 2 > input->len)

3782

return 0;

3783

3784

d = re_string_byte_at (input, str_idx + 1);

3785

if (c < 0xe0)

3786

return (d < 0x80 || d > 0xbf) ? 0 : 2;

3787

else if (c < 0xf0)

3788

{

3789

char_len = 3;

3790

if (c == 0xe0 && d < 0xa0)

3791

return 0;

3792

}

3793

else if (c < 0xf8)

3794

{

3795

char_len = 4;

3796

if (c == 0xf0 && d < 0x90)

3797

return 0;

3798

}

3799

else if (c < 0xfc)

3800

{

3801

char_len = 5;

3802

if (c == 0xf8 && d < 0x88)

3803

return 0;

3804

}

3805

else if (c < 0xfe)

3806

{

3807

char_len = 6;

3808

if (c == 0xfc && d < 0x84)

3809

return 0;

3810

}

3811

else

3812

return 0;

3813

3814

if (str_idx + char_len > input->len)

3815

return 0;

3816

3817

for (i = 1; i < char_len; ++i)

3818

{

3819

d = re_string_byte_at (input, str_idx + i);

3820

if (d < 0x80 || d > 0xbf)

3821

return 0;

3822

}

3823

return char_len;

3824

}

3825

3826

char_len = re_string_char_size_at (input, str_idx);

3827

if (node->type == OP_PERIOD)

3828

{

3829

if (char_len <= 1)

3830

return 0;

3831

/* FIXME: I don't think this if is needed, as both '\n'

3832

and '\0' are char_len == 1. */

3833

/* '.' accepts any one character except the following two cases. */

3834

if ((!(dfa->syntax & RE_DOT_NEWLINE) &&

3835

re_string_byte_at (input, str_idx) == '\n') ||

3836

((dfa->syntax & RE_DOT_NOT_NULL) &&

3837

re_string_byte_at (input, str_idx) == '\0'))

3838

return 0;

3839

return char_len;

3840

}

3841

3842

elem_len = re_string_elem_size_at (input, str_idx);

3843

wc = __btowc(*(input->mbs+str_idx));

3844

if (((elem_len <= 1 && char_len <= 1) || char_len == 0) && (wc != WEOF && wc < SBC_MAX))

3845

return 0;

3846

3847

if (node->type == COMPLEX_BRACKET)

3848

{

3849

const re_charset_t *cset = node->opr.mbcset;

3850

# ifdef _LIBC

3851

const unsigned char *pin

3852

= ((const unsigned char *) re_string_get_buffer (input) + str_idx);

3853

int j;

3854

uint32_t nrules;

3855

# endif /* _LIBC */

3856

int match_len = 0;

3857

wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)

3858

? re_string_wchar_at (input, str_idx) : 0);

3859

3860

/* match with multibyte character? */

3861

for (i = 0; i < cset->nmbchars; ++i)

3862

if (wc == cset->mbchars[i])

3863

{

3864

match_len = char_len;

3865

goto check_node_accept_bytes_match;

3866

}

3867

/* match with character_class? */

3868

for (i = 0; i < cset->nchar_classes; ++i)

3869

{

3870

wctype_t wt = cset->char_classes[i];

3871

if (__iswctype (wc, wt))

3872

{

3873

match_len = char_len;

3874

goto check_node_accept_bytes_match;

3875

}

3876

}

3877

3878

# ifdef _LIBC

3879

nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);

3880

if (nrules != 0)

3881

{

3882

unsigned int in_collseq = 0;

3883

const int32_t *table, *indirect;

3884

const unsigned char *weights, *extra;

3885

const char *collseqwc;

3886

/* This #include defines a local function! */

3887

# include <locale/weight.h>

3888

3889

/* match with collating_symbol? */

3890

if (cset->ncoll_syms)

3891

extra = (const unsigned char *)

3892

_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);

3893

for (i = 0; i < cset->ncoll_syms; ++i)

3894

{

3895

const unsigned char *coll_sym = extra + cset->coll_syms[i];

3896

/* Compare the length of input collating element and

3897

the length of current collating element. */

3898

if (*coll_sym != elem_len)

3899

continue;

3900

/* Compare each bytes. */

3901

for (j = 0; j < *coll_sym; j++)

3902

if (pin[j] != coll_sym[1 + j])

3903

break;

3904

if (j == *coll_sym)

3905

{

3906

/* Match if every bytes is equal. */

3907

match_len = j;

3908

goto check_node_accept_bytes_match;

3909

}

3910

}

3911

3912

if (cset->nranges)

3913

{

3914

if (elem_len <= char_len)

3915

{

3916

collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);

3917

in_collseq = __collseq_table_lookup (collseqwc, wc);

3918

}

3919

else

3920

in_collseq = find_collation_sequence_value (pin, elem_len);

3921

}

3922

/* match with range expression? */

3923

for (i = 0; i < cset->nranges; ++i)

3924

if (cset->range_starts[i] <= in_collseq

3925

&& in_collseq <= cset->range_ends[i])

3926

{

3927

match_len = elem_len;

3928

goto check_node_accept_bytes_match;

3929

}

3930

3931

/* match with equivalence_class? */

3932

if (cset->nequiv_classes)

3933

{

3934

const unsigned char *cp = pin;

3935

table = (const int32_t *)

3936

_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);

3937

weights = (const unsigned char *)

3938

_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);

3939

extra = (const unsigned char *)

3940

_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);

3941

indirect = (const int32_t *)

3942

_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);

3943

int32_t idx = findidx (&cp, elem_len);

3944

if (idx > 0)

3945

for (i = 0; i < cset->nequiv_classes; ++i)

3946

{

3947

int32_t equiv_class_idx = cset->equiv_classes[i];

3948

size_t weight_len = weights[idx & 0xffffff];

3949

if (weight_len == weights[equiv_class_idx & 0xffffff]

3950

&& (idx >> 24) == (equiv_class_idx >> 24))

3951

{

3952

int cnt = 0;

3953

3954

idx &= 0xffffff;

3955

equiv_class_idx &= 0xffffff;

3956

3957

while (cnt <= weight_len

3958

&& (weights[equiv_class_idx + 1 + cnt]

3959

== weights[idx + 1 + cnt]))

3960

++cnt;

3961

if (cnt > weight_len)

3962

{

3963

match_len = elem_len;

3964

goto check_node_accept_bytes_match;

3965

}

3966

}

3967

}

3968

}

3969

}

3970

else

3971

# endif /* _LIBC */

3972

{

3973

/* match with range expression? */

3974

for (i = 0; i < cset->nranges; ++i)

3975

{

3976

if (cset->range_starts[i] <= wc

3977

&& wc <= cset->range_ends[i])

3978

{

3979

match_len = char_len;

3980

goto check_node_accept_bytes_match;

3981

}

3982

}

3983

}

3984

check_node_accept_bytes_match:

3985

if (!cset->non_match)

3986

return match_len;

3987

else

3988

{

3989

if (match_len > 0)

3990

return 0;

3991

else

3992

return (elem_len > char_len) ? elem_len : char_len;

3993

}

3994

}

3995

return 0;

3996

}

3997

3998

# ifdef _LIBC

3999

static unsigned int

4000

internal_function

4001

find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)

4002

{

4003

uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);

4004

if (nrules == 0)

4005

{

4006

if (mbs_len == 1)

4007

{

4008

/* No valid character. Match it as a single byte character. */

4009

const unsigned char *collseq = (const unsigned char *)

4010

_NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);

4011

return collseq[mbs[0]];

4012

}

4013

return UINT_MAX;

4014

}

4015

else

4016

{

4017

int32_t idx;

4018

const unsigned char *extra = (const unsigned char *)

4019

_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);

4020

int32_t extrasize = (const unsigned char *)

4021

_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;

4022

4023

for (idx = 0; idx < extrasize;)

4024

{

4025

int mbs_cnt, found = 0;

4026

int32_t elem_mbs_len;

4027

/* Skip the name of collating element name. */

4028

idx = idx + extra[idx] + 1;

4029

elem_mbs_len = extra[idx++];

4030

if (mbs_len == elem_mbs_len)

4031

{

4032

for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)

4033

if (extra[idx + mbs_cnt] != mbs[mbs_cnt])

4034

break;

4035

if (mbs_cnt == elem_mbs_len)

4036

/* Found the entry. */

4037

found = 1;

4038

}

4039

/* Skip the byte sequence of the collating element. */

4040

idx += elem_mbs_len;

4041

/* Adjust for the alignment. */

4042

idx = (idx + 3) & ~3;

4043

/* Skip the collation sequence value. */

4044

idx += sizeof (uint32_t);

4045

/* Skip the wide char sequence of the collating element. */

4046

idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1);

4047

/* If we found the entry, return the sequence value. */

4048

if (found)

4049

return *(uint32_t *) (extra + idx);

4050

/* Skip the collation sequence value. */

4051

idx += sizeof (uint32_t);

4052

}

4053

return UINT_MAX;

4054

}

4055

}

4056

# endif /* _LIBC */

4057

#endif /* RE_ENABLE_I18N */

4058

4059

/* Check whether the node accepts the byte which is IDX-th

4060

byte of the INPUT. */

4061

4062

static int

4063

internal_function

4064

check_node_accept (const re_match_context_t *mctx, const re_token_t *node,

4065

int idx)

4066

{

4067

unsigned char ch;

4068

ch = re_string_byte_at (&mctx->input, idx);

4069

switch (node->type)

4070

{

4071

case CHARACTER:

4072

if (node->opr.c != ch)

4073

return 0;

4074

break;

4075

4076

case SIMPLE_BRACKET:

4077

if (!bitset_contain (node->opr.sbcset, ch))

4078

return 0;

4079

break;

4080

4081

#ifdef RE_ENABLE_I18N

4082

case OP_UTF8_PERIOD:

4083

if (ch >= 0x80)

4084

return 0;

4085

/* FALLTHROUGH */

4086

#endif

4087

case OP_PERIOD:

4088

if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))

4089

|| (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))

4090

return 0;

4091

break;

4092

4093

default:

4094

return 0;

4095

}

4096

4097

if (node->constraint)

4098

{

4099

/* The node has constraints. Check whether the current context

4100

satisfies the constraints. */

4101

unsigned int context = re_string_context_at (&mctx->input, idx,

4102

mctx->eflags);

4103

if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))

4104

return 0;

4105

}

4106

4107

return 1;

4108

}

4109

4110

/* Extend the buffers, if the buffers have run out. */

4111

4112

static reg_errcode_t

4113

internal_function __attribute_warn_unused_result__

4114

extend_buffers (re_match_context_t *mctx, int min_len)

4115

{

4116

reg_errcode_t ret;

4117

re_string_t *pstr = &mctx->input;

4118

4119

/* Avoid overflow. */

4120

if (BE (INT_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0))

4121

return REG_ESPACE;

4122

4123

/* Double the lengthes of the buffers, but allocate at least MIN_LEN. */

4124

ret = re_string_realloc_buffers (pstr,

4125

MAX (min_len,

4126

MIN (pstr->len, pstr->bufs_len * 2)));

4127

if (BE (ret != REG_NOERROR, 0))

4128

return ret;

4129

4130

if (mctx->state_log != NULL)

4131

{

4132

/* And double the length of state_log. */

4133

/* XXX We have no indication of the size of this buffer. If this

4134

allocation fail we have no indication that the state_log array

4135

does not have the right size. */

4136

re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,

4137

pstr->bufs_len + 1);

4138

if (BE (new_array == NULL, 0))

4139

return REG_ESPACE;

4140

mctx->state_log = new_array;

4141

}

4142

4143

/* Then reconstruct the buffers. */

4144

if (pstr->icase)

4145

{

4146

#ifdef RE_ENABLE_I18N

4147

if (pstr->mb_cur_max > 1)

4148

{

4149

ret = build_wcs_upper_buffer (pstr);

4150

if (BE (ret != REG_NOERROR, 0))

4151

return ret;

4152

}

4153

else

4154

#endif /* RE_ENABLE_I18N */

4155

build_upper_buffer (pstr);

4156

}

4157

else

4158

{

4159

#ifdef RE_ENABLE_I18N

4160

if (pstr->mb_cur_max > 1)

4161

build_wcs_buffer (pstr);

4162

else

4163

#endif /* RE_ENABLE_I18N */

4164

{

4165

if (pstr->trans != NULL)

4166

re_string_translate_buffer (pstr);

4167

}

4168

}

4169

return REG_NOERROR;

4170

}

4171

4172

4173

/* Functions for matching context. */

4174

4175

/* Initialize MCTX. */

4176

4177

static reg_errcode_t

4178

internal_function __attribute_warn_unused_result__

4179

match_ctx_init (re_match_context_t *mctx, int eflags, int n)

4180

{

4181

mctx->eflags = eflags;

4182

mctx->match_last = -1;

4183

if (n > 0)

4184

{

4185

mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);

4186

mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);

4187

if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))

4188

return REG_ESPACE;

4189

}

4190

/* Already zero-ed by the caller.

4191

else

4192

mctx->bkref_ents = NULL;

4193

mctx->nbkref_ents = 0;

4194

mctx->nsub_tops = 0; */

4195

mctx->abkref_ents = n;

4196

mctx->max_mb_elem_len = 1;

4197

mctx->asub_tops = n;

4198

return REG_NOERROR;

4199

}

4200

4201

/* Clean the entries which depend on the current input in MCTX.

4202

This function must be invoked when the matcher changes the start index

4203

of the input, or changes the input string. */

4204

4205

static void

4206

internal_function

4207

match_ctx_clean (re_match_context_t *mctx)

4208

{

4209

int st_idx;

4210

for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)

4211

{

4212

int sl_idx;

4213

re_sub_match_top_t *top = mctx->sub_tops[st_idx];

4214

for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)

4215

{

4216

re_sub_match_last_t *last = top->lasts[sl_idx];

4217

re_free (last->path.array);

4218

re_free (last);

4219

}

4220

re_free (top->lasts);

4221

if (top->path)

4222

{

4223

re_free (top->path->array);

4224

re_free (top->path);

4225

}

4226

free (top);

4227

}

4228

4229

mctx->nsub_tops = 0;

4230

mctx->nbkref_ents = 0;

4231

}

4232

4233

/* Free all the memory associated with MCTX. */

4234

4235

static void

4236

internal_function

4237

match_ctx_free (re_match_context_t *mctx)

4238

{

4239

/* First, free all the memory associated with MCTX->SUB_TOPS. */

4240

match_ctx_clean (mctx);

4241

re_free (mctx->sub_tops);

4242

re_free (mctx->bkref_ents);

4243

}

4244

4245

/* Add a new backreference entry to MCTX.

4246

Note that we assume that caller never call this function with duplicate

4247

entry, and call with STR_IDX which isn't smaller than any existing entry.

4248

4249

4250

static reg_errcode_t

4251

internal_function __attribute_warn_unused_result__

4252

match_ctx_add_entry (re_match_context_t *mctx, int node, int str_idx, int from,

4253

int to)

4254

{

4255

if (mctx->nbkref_ents >= mctx->abkref_ents)

4256

{

4257

struct re_backref_cache_entry* new_entry;

4258

new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,

4259

mctx->abkref_ents * 2);

4260

if (BE (new_entry == NULL, 0))

4261

{

4262

re_free (mctx->bkref_ents);

4263

return REG_ESPACE;

4264

}

4265

mctx->bkref_ents = new_entry;

4266

memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',

4267

sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);

4268

mctx->abkref_ents *= 2;

4269

}

4270

if (mctx->nbkref_ents > 0

4271

&& mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)

4272

mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;

4273

4274

mctx->bkref_ents[mctx->nbkref_ents].node = node;

4275

mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;

4276

mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;

4277

mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;

4278

4279

/* This is a cache that saves negative results of check_dst_limits_calc_pos.

4280

If bit N is clear, means that this entry won't epsilon-transition to

4281

an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If

4282

it is set, check_dst_limits_calc_pos_1 will recurse and try to find one

4283

such node.

4284

4285

A backreference does not epsilon-transition unless it is empty, so set

4286

to all zeros if FROM != TO. */

4287

mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map

4288

= (from == to ? ~0 : 0);

4289

4290

mctx->bkref_ents[mctx->nbkref_ents++].more = 0;

4291

if (mctx->max_mb_elem_len < to - from)

4292

mctx->max_mb_elem_len = to - from;

4293

return REG_NOERROR;

4294

}

4295

4296

/* Search for the first entry which has the same str_idx, or -1 if none is

4297

found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */

4298

4299

static int

4300

internal_function

4301

search_cur_bkref_entry (const re_match_context_t *mctx, int str_idx)

4302

{

4303

int left, right, mid, last;

4304

last = right = mctx->nbkref_ents;

4305

for (left = 0; left < right;)

4306

{

4307

mid = (left + right) / 2;

4308

if (mctx->bkref_ents[mid].str_idx < str_idx)

4309

left = mid + 1;

4310

else

4311

right = mid;

4312

}

4313

if (left < last && mctx->bkref_ents[left].str_idx == str_idx)

4314

return left;

4315

else

4316

return -1;

4317

}

4318

4319

/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches

4320

at STR_IDX. */

4321

4322

static reg_errcode_t

4323

internal_function __attribute_warn_unused_result__

4324

match_ctx_add_subtop (re_match_context_t *mctx, int node, int str_idx)

4325

{

4326

#ifdef DEBUG

4327

assert (mctx->sub_tops != NULL);

4328

assert (mctx->asub_tops > 0);

4329

#endif

4330

if (BE (mctx->nsub_tops == mctx->asub_tops, 0))

4331

{

4332

int new_asub_tops = mctx->asub_tops * 2;

4333

re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,

4334

re_sub_match_top_t *,

4335

new_asub_tops);

4336

if (BE (new_array == NULL, 0))

4337

return REG_ESPACE;

4338

mctx->sub_tops = new_array;

4339

mctx->asub_tops = new_asub_tops;

4340

}

4341

mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));

4342

if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))

4343

return REG_ESPACE;

4344

mctx->sub_tops[mctx->nsub_tops]->node = node;

4345

mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;

4346

return REG_NOERROR;

4347

}

4348

4349

/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches

4350

at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */

4351

4352

static re_sub_match_last_t *

4353

internal_function

4354

match_ctx_add_sublast (re_sub_match_top_t *subtop, int node, int str_idx)

4355

{

4356

re_sub_match_last_t *new_entry;

4357

if (BE (subtop->nlasts == subtop->alasts, 0))

4358

{

4359

int new_alasts = 2 * subtop->alasts + 1;

4360

re_sub_match_last_t **new_array = re_realloc (subtop->lasts,

4361

re_sub_match_last_t *,

4362

new_alasts);

4363

if (BE (new_array == NULL, 0))

4364

return NULL;

4365

subtop->lasts = new_array;

4366

subtop->alasts = new_alasts;

4367

}

4368

new_entry = calloc (1, sizeof (re_sub_match_last_t));

4369

if (BE (new_entry != NULL, 1))

4370

{

4371

subtop->lasts[subtop->nlasts] = new_entry;

4372

new_entry->node = node;

4373

new_entry->str_idx = str_idx;

4374

++subtop->nlasts;

4375

}

4376

return new_entry;

4377

}

4378

4379

static void

4380

internal_function

4381

sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,

4382

re_dfastate_t **limited_sts, int last_node, int last_str_idx)

4383

{

4384

sctx->sifted_states = sifted_sts;

4385

sctx->limited_states = limited_sts;

4386

sctx->last_node = last_node;

4387

sctx->last_str_idx = last_str_idx;

4388

re_node_set_init_empty (&sctx->limits);

4389

}

Older »