~vcs-imports/gawk/master

« back to all changes in this revision

Viewing changes to support/regexec.c

Committer: Arnold D. Robbins
Date: 2014-01-22 19:54:07 UTC
mto: (408.8.1) (509.1.1) (522.1.1) (731.1.8)
mto: This revision was merged to the branch mainline in revision 502.
Revision ID: git-v1:51038bda2cde5e5a5a4fa1fd54626d4cb107e7ef

Update README.solaris.

files added:
README.cvs

README_d/README.os2

awklib/eg/data/BBS-list

awklib/eg/misc/arraymax.awk

awklib/eg/misc/arraymax.data

bisonfix.awk

config.guess

config.rpath

config.sub

depcomp

dfa.c

dfa.h

doc/api-figure1.eps

doc/api-figure1.fig

doc/api-figure1.pdf

doc/api-figure1.png

doc/api-figure1.txt

doc/api-figure2.eps

doc/api-figure2.fig

doc/api-figure2.pdf

doc/api-figure2.png

doc/api-figure2.txt

doc/api-figure3.eps

doc/api-figure3.fig

doc/api-figure3.pdf

doc/api-figure3.png

doc/api-figure3.txt

doc/gawktexi.in

doc/general-program.eps

doc/general-program.fig

doc/general-program.pdf

doc/general-program.png

doc/general-program.txt

doc/process-flow.eps

doc/process-flow.fig

doc/process-flow.pdf

doc/process-flow.png

doc/process-flow.txt

doc/sidebar.awk

doc/statist.eps

doc/statist.jpg

doc/statist.pdf

extension/build-aux

extension/build-aux/ChangeLog

extension/build-aux/ar-lib

extension/build-aux/config.guess

extension/build-aux/config.rpath

extension/build-aux/config.sub

extension/build-aux/depcomp

extension/build-aux/install-sh

extension/build-aux/ltmain.sh

extension/build-aux/missing

extension/m4

extension/m4/ChangeLog

extension/m4/dirfd.m4

extension/m4/gettext.m4

extension/m4/iconv.m4

extension/m4/intlmacosx.m4

extension/m4/lib-ld.m4

extension/m4/lib-link.m4

extension/m4/lib-prefix.m4

extension/m4/libtool.m4

extension/m4/ltoptions.m4

extension/m4/ltsugar.m4

extension/m4/ltversion.m4

extension/m4/lt~obsolete.m4

extension/m4/nls.m4

extension/m4/po.m4

extension/m4/progtest.m4

extension/rwarray0.c

getopt.c

getopt.h

getopt1.c

getopt_int.h

install-sh

m4/isc-posix.m4

m4/libsigsegv.m4

mbsupport.h

missing

missing_d/gawkbool.h

mkinstalldirs

po/fix-merge.awk

random.c

random.h

regcomp.c

regex.c

regex.h

regex_internal.c

regex_internal.h

regexec.c

test/Gentests.vms

test/longdbl.awk

test/longdbl.in

test/longdbl.ok

xalloc.h

ylwrap

files removed:
ChangeLog.1

NEWS.1

README_d/ChangeLog.1

README_d/README.aix

README_d/README.alpine

README_d/README.development

README_d/README.freebsd

README_d/README.macosx

README_d/README.rpm

README_d/README.zLinux

TODO

awklib/ChangeLog.1

awklib/eg/data/mail-list

awklib/eg/lib/have_mpfr.awk

awklib/eg/lib/intdiv0.awk

awklib/eg/lib/isnumeric.awk

awklib/eg/lib/ns_passwd.awk

awklib/eg/lib/processarray.awk

awklib/eg/lib/shellquote.awk

awklib/eg/lib/tocsv.awk

awklib/eg/misc/arrayorder.awk

awklib/eg/misc/arrayorder.data

awklib/eg/misc/sample.csv

awklib/eg/misc/test-csv.awk

awklib/eg/network/catpipeclient.awk

awklib/eg/network/catpipeserver.awk

awklib/eg/network/daytimeclient.awk

awklib/eg/network/daytimeserver.awk

awklib/eg/network/mailpopclient.awk

awklib/eg/prog/pi.awk

awklib/eg/test-programs

awklib/eg/test-programs/gen-float-table.awk

awklib/eg/test-programs/gen-float-table.c

awklib/eg/test-programs/gen-float-table.py

build-aux

build-aux/ChangeLog

build-aux/ar-lib

build-aux/compile

build-aux/config.guess

build-aux/config.rpath

build-aux/config.sub

build-aux/depcomp

build-aux/install-sh

build-aux/ltmain.sh

build-aux/missing

build-aux/texinfo.tex

build-aux/ylwrap

doc/ChangeLog.1

doc/gawk_api-figure1.eps

doc/gawk_api-figure1.fig

doc/gawk_api-figure1.pdf

doc/gawk_api-figure1.png

doc/gawk_api-figure1.txt

doc/gawk_api-figure2.eps

doc/gawk_api-figure2.fig

doc/gawk_api-figure2.pdf

doc/gawk_api-figure2.png

doc/gawk_api-figure2.txt

doc/gawk_api-figure3.eps

doc/gawk_api-figure3.fig

doc/gawk_api-figure3.pdf

doc/gawk_api-figure3.png

doc/gawk_api-figure3.txt

doc/gawk_array-elements.eps

doc/gawk_array-elements.fig

doc/gawk_array-elements.pdf

doc/gawk_array-elements.png

doc/gawk_array-elements.txt

doc/gawk_general-program.eps

doc/gawk_general-program.fig

doc/gawk_general-program.pdf

doc/gawk_general-program.png

doc/gawk_general-program.txt

doc/gawk_process-flow.eps

doc/gawk_process-flow.fig

doc/gawk_process-flow.pdf

doc/gawk_process-flow.png

doc/gawk_process-flow.txt

doc/gawk_statist.eps

doc/gawk_statist.jpg

doc/gawk_statist.pdf

doc/gawk_statist.txt

doc/gawkbug.1

doc/gawkworkflow.info

doc/gawkworkflow.texi

doc/implementation-notes.txt

doc/it

doc/it/ChangeLog

doc/it/README.txt

doc/it/compila_gawk.sh

doc/it/compila_pma.sh

doc/it/epsf.tex

doc/it/gawk-api-figura1.eps

doc/it/gawk-api-figura1.fig

doc/it/gawk-api-figura1.pdf

doc/it/gawk-api-figura1.png

doc/it/gawk-api-figura1.txt

doc/it/gawk-api-figura2.eps

doc/it/gawk-api-figura2.fig

doc/it/gawk-api-figura2.pdf

doc/it/gawk-api-figura2.png

doc/it/gawk-api-figura2.txt

doc/it/gawk-api-figura3.eps

doc/it/gawk-api-figura3.fig

doc/it/gawk-api-figura3.pdf

doc/it/gawk-api-figura3.png

doc/it/gawk-api-figura3.txt

doc/it/gawk-flusso-elaborazione.eps

doc/it/gawk-flusso-elaborazione.fig

doc/it/gawk-flusso-elaborazione.pdf

doc/it/gawk-flusso-elaborazione.png

doc/it/gawk-flusso-elaborazione.txt

doc/it/gawk-programma-generico.eps

doc/it/gawk-programma-generico.fig

doc/it/gawk-programma-generico.pdf

doc/it/gawk-programma-generico.png

doc/it/gawk-programma-generico.txt

doc/it/gawk-vettore-elementi.eps

doc/it/gawk-vettore-elementi.fig

doc/it/gawk-vettore-elementi.pdf

doc/it/gawk-vettore-elementi.png

doc/it/gawk-vettore-elementi.txt

doc/it/gawk.1

doc/it/gawk.texi

doc/it/gawkbug.1

doc/it/gendocs.sh

doc/it/gendocs_template

doc/it/genera_formati.sh

doc/it/lflashlight-small.xpic

doc/it/lflashlight.eps

doc/it/lflashlight.pdf

doc/it/margini.texi

doc/it/pm-gawk.1

doc/it/pm-gawk.texi

doc/it/rflashlight-small.xpic

doc/it/rflashlight.eps

doc/it/rflashlight.pdf

doc/it/texinfo.tex

doc/it/txi-it.tex

doc/pm-gawk.1

doc/pm-gawk.info

doc/pm-gawk.texi

doc/wordlist

doc/wordlist2

doc/wordlist3

doc/wordlist4

doc/wordlist5

doc/wordlist6

extension/ChangeLog.1

extension/ext_custom.h

extension/intdiv.c

extension/po

extension/po/.gitignore

extension/po/ChangeLog

extension/po/Makefile.in.in

extension/po/Makevars

extension/po/Makevars.template

extension/po/POTFILES.in

extension/po/Rules-quot

extension/po/boldquot.sed

extension/po/en@boldquot.header

extension/po/en@quot.header

extension/po/gawk-extensions.pot

extension/po/insert-header.sin

extension/po/quot.sed

extension/po/remove-potcdate.sin

extension/po/stamp-po

extras

extras/ChangeLog

extras/Makefile.am

extras/Makefile.in

extras/gawk.csh

extras/gawk.sh

gawkbug.in

helpers/changed-files.awk

helpers/chlistref.awk

helpers/find-utf.sh

helpers/fix-merge.awk

helpers/genbin.awk

helpers/gendec.c

helpers/mb_cur_max.c

helpers/pull-all.sh

helpers/quoteconvert2.sh

helpers/scanfmt.c

helpers/test-build.sh

helpers/timeformat.c

helpers/tryfmt.c

helpers/update-aux.sh

helpers/update-branches.sh

helpers/update-support.sh

m4/ChangeLog.1

m4/ax_check_compile_flag.m4

m4/c-bool.m4

m4/dirfd.m4

m4/extern-inline.m4

m4/fcntl-o.m4

m4/host-cpu-c-abi.m4

m4/libtool.m4

m4/ltoptions.m4

m4/ltsugar.m4

m4/ltversion.m4

m4/lt~obsolete.m4

m4/pma.m4

m4/threadlib.m4

m4/triplet-transformation.m4

missing_d/ChangeLog.1

missing_d/mktime-internal.h

missing_d/strptime.c

missing_d/strsignal.c

missing_d/timegm.c

old-extension/notes.mbx

pc/ChangeLog.1

pc/GenMakefileTst.awk

pc/Makefile.tst.prologue

pc/langinfo.h

pc/make-config.sh

po/ChangeLog.1

po/bg.gmo

po/bg.po

po/ca.gmo

po/id.gmo

po/ka.gmo

po/ka.po

po/ko.gmo

po/ko.po

po/pt.gmo

po/pt.po

po/pt_BR.gmo

po/ro.gmo

po/sr.gmo

po/sr.po

po/uk.gmo

po/uk.po

po/xhe.po

po/zh_CN.gmo

posix/ChangeLog.1

printf.c

support

support/ChangeLog

support/ChangeLog.0

support/Makefile.am

support/Makefile.in

support/agpl-3.0.txt

support/attribute.h

support/cdefs.h

support/dfa.c

support/dfa.h

support/dynarray.h

support/flexmember.h

support/getopt.c

support/getopt.h

support/getopt1.c

support/getopt_int.h

support/idx.h

support/intprops-internal.h

support/intprops.h

support/libc-config.h

support/localeinfo.c

support/localeinfo.h

support/malloc

support/malloc/dynarray-skeleton.c

support/malloc/dynarray.h

support/malloc/dynarray_at_failure.c

support/malloc/dynarray_emplace_enlarge.c

support/malloc/dynarray_finalize.c

support/malloc/dynarray_resize.c

support/malloc/dynarray_resize_clear.c

support/pma.c

support/pma.h

support/random.c

support/random.h

support/regcomp.c

support/regex.c

support/regex.h

support/regex_internal.c

support/regex_internal.h

support/regexec.c

support/verify.h

support/xalloc.h

test/ChangeLog.1

test/anchor.awk

test/anchor.in

test/anchor.ok

test/apiterm.awk

test/apiterm.in

test/apiterm.ok

test/argcasfile.awk

test/argcasfile.in

test/argcasfile.ok

test/arrayind1.awk

test/arrayind1.in

test/arrayind1.ok

test/arrayind2.awk

test/arrayind2.ok

test/arrayind3.awk

test/arrayind3.ok

test/arraysort2.awk

test/arraysort2.ok

test/arraytype-mpfr.ok

test/arraytype.awk

test/arraytype.ok

test/arrdbg.awk

test/aryprm9.awk

test/aryprm9.ok

test/aryunasgn.awk

test/aryunasgn.ok

test/asortbool.awk

test/asortbool.ok

test/asortsymtab-mpfr.ok

test/asortsymtab.awk

test/asortsymtab.ok

test/assignnumfield.awk

test/assignnumfield.in

test/assignnumfield.ok

test/assignnumfield2.awk

test/assignnumfield2.ok

test/badbuild.awk

test/badbuild.in

test/badbuild.ok

test/callparam.awk

test/callparam.ok

test/check_retest.awk

test/checknegtime.awk

test/clos1way2.awk

test/clos1way2.in

test/clos1way2.ok

test/clos1way3.awk

test/clos1way3.ok

test/clos1way4.awk

test/clos1way4.ok

test/clos1way5.awk

test/clos1way5.ok

test/clos1way6.awk

test/clos1way6.ok

test/close_status.awk

test/close_status.ok

test/cmdlinefsbacknl.ok

test/cmdlinefsbacknl.sh

test/cmdlinefsbacknl2.ok

test/cmdlinefsbacknl2.sh

test/commas.awk

test/commas.ok

test/concat5.awk

test/concat5.ok

test/crlf.awk

test/crlf.ok

test/csv1.awk

test/csv1.in

test/csv1.ok

test/csv2.awk

test/csv2.ok

test/csv3.awk

test/csv3.in

test/csv3.ok

test/csvodd.awk

test/csvodd.in

test/csvodd.ok

test/dbugarray1.awk

test/dbugarray1.in

test/dbugarray1.ok

test/dbugarray2.awk

test/dbugarray2.in

test/dbugarray2.ok

test/dbugarray3.awk

test/dbugarray3.in

test/dbugarray3.ok

test/dbugarray4.awk

test/dbugarray4.in

test/dbugarray4.ok

test/dbugeval.in

test/dbugeval.ok

test/dbugeval2.awk

test/dbugeval2.in

test/dbugeval2.ok

test/dbugeval3.awk

test/dbugeval3.in

test/dbugeval3.ok

test/dbugeval4.awk

test/dbugeval4.in

test/dbugeval4.ok

test/dbugtypedre1.awk

test/dbugtypedre1.in

test/dbugtypedre1.ok

test/dbugtypedre2.awk

test/dbugtypedre2.in

test/dbugtypedre2.ok

test/dfacheck1.awk

test/dfacheck1.in

test/dfacheck1.ok

test/dfacheck2.awk

test/dfacheck2.in

test/dfacheck2.ok

test/divzero.awk

test/divzero.ok

test/divzero2.awk

test/divzero2.ok

test/elemnew1.awk

test/elemnew1.ok

test/elemnew2.awk

test/elemnew2.ok

test/elemnew3.awk

test/elemnew3.ok

test/eofsrc1.ok

test/eofsrc1a.awk

test/eofsrc1b.awk

test/errno.awk

test/errno.in

test/errno.ok

test/escapebrace.awk

test/escapebrace.in

test/escapebrace.ok

test/exitval3.awk

test/exitval3.ok

test/fieldassign.awk

test/fieldassign.in

test/fieldassign.ok

test/fix-fmtspcl.awk

test/fldterm.awk

test/fldterm.in

test/fldterm.ok

test/forcenum.awk

test/forcenum.ok

test/fpat4.awk

test/fpat4.ok

test/fpat5.awk

test/fpat5.in

test/fpat5.ok

test/fpat6.awk

test/fpat6.in

test/fpat6.ok

test/fpat7.awk

test/fpat7.in

test/fpat7.ok

test/fpat8.awk

test/fpat8.in

test/fpat8.ok

test/fpat9.awk

test/fpat9.in

test/fpat9.ok

test/fr

test/fr/LC_MESSAGES

test/fr/LC_MESSAGES/nlstringtest.mo

test/fscaret.awk

test/fscaret.in

test/fscaret.ok

test/fsnul1.awk

test/fsnul1.in

test/fsnul1.ok

test/functab5.awk

test/functab5.ok

test/functab6.awk

test/functab6.ok

test/fwtest4.awk

test/fwtest4.in

test/fwtest4.ok

test/fwtest5.awk

test/fwtest5.in

test/fwtest5.ok

test/fwtest6.awk

test/fwtest6.in

test/fwtest6.ok

test/fwtest7.awk

test/fwtest7.in

test/fwtest7.ok

test/fwtest8.awk

test/fwtest8.in

test/fwtest8.ok

test/genpot.awk

test/genpot.ok

test/gensub3.awk

test/gensub3.in

test/gensub3.ok

test/gensub4.awk

test/gensub4.ok

test/getfile.awk

test/getfile.ok

test/getlnfa.awk

test/getlnfa.ok

test/gsubind.awk

test/gsubind.ok

test/gsubnulli18n.awk

test/gsubnulli18n.ok

test/hex2.awk

test/hex2.in

test/hex2.ok

test/ignrcas3.awk

test/ignrcas3.ok

test/ignrcas4.awk

test/ignrcas4.ok

test/indirectbuiltin.awk

test/indirectbuiltin.ok

test/indirectbuiltin2.awk

test/indirectbuiltin2.ok

test/indirectcall2.awk

test/indirectcall2.ok

test/indirectcall3.awk

test/indirectcall3.ok

test/inf-nan-torture.awk

test/inf-nan-torture.in

test/inf-nan-torture.ok

test/inplace2bcomp.1.ok

test/inplace2bcomp.1.orig.ok

test/inplace2bcomp.2.ok

test/inplace2bcomp.2.orig.ok

test/inplace2bcomp.ok

test/inplace3bcomp.1.ok

test/inplace3bcomp.1.orig.ok

test/inplace3bcomp.2.ok

test/inplace3bcomp.2.orig.ok

test/inplace3bcomp.ok

test/inpref.awk

test/inpref.in

test/inpref.ok

test/intarray.awk

test/intarray.ok

test/iolint.awk

test/iolint.ok

test/isarrayunset.awk

test/isarrayunset.ok

test/lintexp.awk

test/lintexp.ok

test/lintindex.awk

test/lintindex.ok

test/lintint.awk

test/lintint.ok

test/lintlength.awk

test/lintlength.ok

test/lintplus.awk

test/lintplus.ok

test/lintplus2.awk

test/lintplus2.ok

test/lintplus3.awk

test/lintplus3.in

test/lintplus3.ok

test/lintset.awk

test/lintset.ok

test/litoct.in

test/makepmafile.c

test/match4.awk

test/match4.ok

test/mbprintf5.awk

test/mbprintf5.in

test/mbprintf5.ok

test/mbstr2.awk

test/mbstr2.in

test/mbstr2.ok

test/mdim1.awk

test/mdim1.ok

test/mdim2.awk

test/mdim2.ok

test/mdim3.awk

test/mdim3.ok

test/mdim4.awk

test/mdim4.in

test/mdim4.ok

test/mdim5.awk

test/mdim5.ok

test/mdim6.awk

test/mdim6.ok

test/mdim7.awk

test/mdim7.ok

test/mdim8.awk

test/mdim8.in

test/mdim8.ok

test/memleak.awk

test/memleak.ok

test/mktime.awk

test/mktime.in

test/mktime.ok

test/mmap8k.awk

test/mmap8k.ok

test/modifiers.ok

test/modifiers.sh

test/mpfranswer42.awk

test/mpfranswer42.ok

test/mpfrbigint2.awk

test/mpfrbigint2.in

test/mpfrbigint2.ok

test/mpfrcase.awk

test/mpfrcase.in

test/mpfrcase.ok

test/mpfrcase2.awk

test/mpfrcase2.in

test/mpfrcase2.ok

test/mpfrfield.awk

test/mpfrfield.in

test/mpfrfield.ok

test/mpfrmemok1.awk

test/mpfrmemok1.ok

test/mpfrnegzero2.awk

test/mpfrnegzero2.ok

test/mpfrnonum.awk

test/mpfrnonum.in

test/mpfrnonum.ok

test/mpfrrem.awk

test/mpfrrem.ok

test/mpfrrndeval.awk

test/mpfrrndeval.ok

test/mpfrsqrt.awk

test/mpfrsqrt.ok

test/mpfrstrtonum.awk

test/mpfrstrtonum.ok

test/mpfruplus.ok

test/mpgforcenum.awk

test/mpgforcenum.ok

test/mtchi18n2.awk

test/mtchi18n2.ok

test/muldimposix.awk

test/muldimposix.ok

test/negtime.awk

test/negtime.ok

test/nlstringtest-nogettext.ok

test/nlstringtest.awk

test/nlstringtest.ok

test/nlstringtest.po

test/nonfatal1.awk

test/nonfatal1.ok

test/nonfatal2.awk

test/nonfatal2.ok

test/nonfatal3.awk

test/nonfatal3.ok

test/nonl.in

test/nsawk1.awk

test/nsawk1a.ok

test/nsawk1b.ok

test/nsawk1c.ok

test/nsawk2.awk

test/nsawk2a.ok

test/nsawk2b.ok

test/nsbad.awk

test/nsbad.ok

test/nsbad2.awk

test/nsbad2.ok

test/nsbad3.awk

test/nsbad3.ok

test/nsbad_cmd.ok

test/nsforloop.awk

test/nsforloop.ok

test/nsfuncrecurse.awk

test/nsfuncrecurse.ok

test/nsidentifier.awk

test/nsidentifier.ok

test/nsindirect1.awk

test/nsindirect1.ok

test/nsindirect2.awk

test/nsindirect2.ok

test/nsprof1.awk

test/nsprof1.ok

test/nsprof2.awk

test/nsprof2.ok

test/nsprof3.awk

test/nsprof3.ok

test/nulinsrc.awk

test/nulinsrc.ok

test/numrange-mpfr.ok

test/numrange.awk

test/numrange.ok

test/numstr1.awk

test/numstr1.ok

test/octdec.awk

test/octdec.ok

test/ofmtstrnum.awk

test/ofmtstrnum.ok

test/paramasfunc1.awk

test/paramasfunc1.ok

test/paramasfunc2.awk

test/paramasfunc2.ok

test/pma.ok

test/posix_compare.awk

test/posix_compare.ok

test/printf-corners-mpfr.ok

test/printf-corners.awk

test/printf-corners.ok

test/printfbad4.awk

test/printfbad4.ok

test/printfchar.awk

test/printfchar.ok

test/printhuge.awk

test/printhuge.ok

test/profile0.awk

test/profile0.in

test/profile0.ok

test/profile10.awk

test/profile10.ok

test/profile11.awk

test/profile11.ok

test/profile12.awk

test/profile12.in

test/profile12.ok

test/profile13.awk

test/profile13.ok

test/profile14.awk

test/profile14.ok

test/profile15.awk

test/profile15.ok

test/profile16.awk

test/profile16.ok

test/profile17.awk

test/profile17.ok

test/profile6.awk

test/profile6.ok

test/profile7.awk

test/profile7.ok

test/profile8.awk

test/profile8.ok

test/profile9.awk

test/profile9.ok

test/pty2.awk

test/pty2.ok

test/randtest.ok

test/randtest.sh

test/range2.awk

test/range2.ok

test/re_test.awk

test/re_test.ok

test/readall.ok

test/readall1.awk

test/readall2.awk

test/readbuf.awk

test/readbuf.ok

test/readdir_retest.awk

test/readfile2.awk

test/readfile2.ok

test/rebrackloc.awk

test/rebrackloc.in

test/rebrackloc.ok

test/rebuild.awk

test/rebuild.in

test/rebuild.ok

test/regex3minus.awk

test/regex3minus.ok

test/regexpbad.awk

test/regexpbad.ok

test/regexpbrack.awk

test/regexpbrack.in

test/regexpbrack.ok

test/regexpbrack2.awk

test/regexpbrack2.in

test/regexpbrack2.ok

test/regexsub.awk

test/regexsub.ok

test/regnul1.awk

test/regnul1.ok

test/regnul2.awk

test/regnul2.ok

test/rscompat.awk

test/rscompat.in

test/rscompat.ok

test/rsgetline.awk

test/rsgetline.in

test/rsgetline.ok

test/rsglstdin.ok

test/rsnullre.awk

test/rsnullre.in

test/rsnullre.ok

test/rsnulw.awk

test/rsnulw.in

test/rsnulw.ok

test/rsstart2.in

test/sandbox1.awk

test/sandbox1.ok

test/setrec0.awk

test/setrec0.in

test/setrec0.ok

test/setrec1.awk

test/setrec1.ok

test/shadowbuiltin.awk

test/shadowbuiltin.ok

test/sigpipe1.awk

test/sigpipe1.ok

test/sortfor2.awk

test/sortfor2.in

test/sortfor2.ok

test/sortglos.awk

test/sortglos.in

test/sortglos.ok

test/sourcesplit.ok

test/spacere.awk

test/spacere.ok

test/status-close.awk

test/status-close.ok

test/strfieldnum.awk

test/strfieldnum.in

test/strfieldnum.ok

test/strftfld.awk

test/strftfld.in

test/strftfld.ok

test/strnum2.awk

test/strnum2.ok

test/strsubscript.awk

test/strsubscript.ok

test/strtonum1.awk

test/strtonum1.ok

test/stupid1.awk

test/stupid1.ok

test/stupid2.awk

test/stupid2.ok

test/stupid3.awk

test/stupid3.ok

test/stupid4.awk

test/stupid4.ok

test/stupid5.awk

test/stupid5.ok

test/subback.awk

test/subback.in

test/subback.ok

test/symtab10.awk

test/symtab10.ok

test/symtab11.awk

test/symtab11.ok

test/symtab12.awk

test/symtab12.ok

test/synerr3.awk

test/synerr3.ok

test/tailrecurse.awk

test/tailrecurse.ok

test/testext-mpfr.ok

test/timeout.awk

test/timeout.ok

test/trailbs.awk

test/trailbs.in

test/trailbs.ok

test/typedregex1.awk

test/typedregex1.ok

test/typedregex2.awk

test/typedregex2.ok

test/typedregex3.awk

test/typedregex3.ok

test/typedregex4.awk

test/typedregex4.ok

test/typedregex5.awk

test/typedregex5.in

test/typedregex5.ok

test/typedregex6.awk

test/typedregex6.in

test/typedregex6.ok

test/typeof1.awk

test/typeof1.ok

test/typeof2.awk

test/typeof2.ok

test/typeof3.awk

test/typeof3.ok

test/typeof4.awk

test/typeof4.ok

test/typeof5.awk

test/typeof5.in

test/typeof5.ok

test/typeof6.awk

test/typeof6.ok

test/typeof7.awk

test/typeof7.ok

test/typeof8.awk

test/typeof8.ok

test/unicode1.awk

test/unicode1.ok

test/uplus.awk

test/uplus.ok

test/valgrind.awk

test/watchpoint1.awk

test/watchpoint1.in

test/watchpoint1.ok

test/watchpoint1.script

vms/ChangeLog.1

vms/vms_sort.com

files modified:
.gitignore

ABOUT-NLS

ChangeLog

ChangeLog.0

Checklist

INSTALL

Makefile.am

Makefile.in

NEWS

POSIX.STD

README

README.git

README_d/ChangeLog

README_d/README.VMS

README_d/README.mpfr

README_d/README.pc

aclocal.m4

array.c

awk.h

awkgram.c

awkgram.y

awklib/.gitignore

awklib/ChangeLog

awklib/ChangeLog.0

awklib/Makefile.am

awklib/Makefile.in

awklib/eg/lib/assert.awk

awklib/eg/lib/bits2str.awk

awklib/eg/lib/ctime.awk

awklib/eg/lib/ftrans.awk

awklib/eg/lib/getopt.awk

awklib/eg/lib/grcat.c

awklib/eg/lib/groupawk.in

awklib/eg/lib/inplace.awk

awklib/eg/lib/noassign.awk

awklib/eg/lib/pwcat.c

awklib/eg/lib/quicksort.awk

awklib/eg/lib/readable.awk

awklib/eg/lib/strtonum.awk

awklib/eg/misc/findpat.awk

awklib/eg/network/coreserv.awk

awklib/eg/network/eliza.awk

awklib/eg/network/fingerclient.awk

awklib/eg/network/remconf.awk

awklib/eg/network/statist.awk

awklib/eg/network/stoxpred.awk

awklib/eg/network/webgrab.awk

awklib/eg/prog/alarm.awk

awklib/eg/prog/anagram.awk

awklib/eg/prog/cut.awk

awklib/eg/prog/egrep.awk

awklib/eg/prog/extract.awk

awklib/eg/prog/id.awk

awklib/eg/prog/igawk.sh

awklib/eg/prog/indirectcall.awk

awklib/eg/prog/labels.awk

awklib/eg/prog/split.awk

awklib/eg/prog/tee.awk

awklib/eg/prog/testbits.awk

awklib/eg/prog/translate.awk

awklib/eg/prog/uniq.awk

awklib/eg/prog/wc.awk

awklib/extract.awk

builtin.c

cint_array.c

cmd.h

command.c

command.y

configh.in

configure

configure.ac

custom.h

debug.c

doc/ChangeLog

doc/Makefile.am

doc/Makefile.in

doc/ad.block

doc/awkcard.in

doc/bc_notes

doc/gawk.1

doc/gawk.info

doc/gawk.texi

doc/gawkinet.info

doc/gawkinet.texi

doc/texinfo.tex

eval.c

ext.c

extension/.gitignore

extension/ABOUT-NLS

extension/ChangeLog

extension/INSTALL

extension/Makefile.am

extension/Makefile.in

extension/aclocal.m4

extension/configh.in

extension/configure

extension/configure.ac

extension/filefuncs.3am

extension/filefuncs.c

extension/fnmatch.3am

extension/fnmatch.c

extension/fork.3am

extension/fork.c

extension/gawkfts.c

extension/gawkfts.h

extension/inplace.3am

extension/inplace.c

extension/ordchr.3am

extension/ordchr.c

extension/readdir.3am

extension/readdir.c

extension/readfile.3am

extension/readfile.c

extension/revoutput.3am

extension/revoutput.c

extension/revtwoway.3am

extension/revtwoway.c

extension/rwarray.3am

extension/rwarray.c

extension/stack.c

extension/testext.c

extension/time.3am

extension/time.c

field.c

floatcomp.c

floatmagic.h

gawkapi.c

gawkapi.h

gawkmisc.c

gettext.h

helpers/ChangeLog

helpers/fixdump.awk

helpers/testdfa.c

helpers/testnet.c

int_array.c

interpret.h

io.c

m4/ChangeLog

m4/arch.m4

m4/codeset.m4

m4/gettext.m4

m4/glibc2.m4

m4/glibc21.m4

m4/iconv.m4

m4/intdiv0.m4

m4/intl.m4

m4/intldir.m4

m4/intlmacosx.m4

m4/intmax.m4

m4/inttypes-pri.m4

m4/inttypes_h.m4

m4/lcmessage.m4

m4/lib-ld.m4

m4/lib-link.m4

m4/lib-prefix.m4

m4/lock.m4

m4/longlong.m4

m4/mpfr.m4

m4/nls.m4

m4/po.m4

m4/printf-posix.m4

m4/progtest.m4

m4/readline.m4

m4/size_max.m4

m4/stdint_h.m4

m4/uintmax_t.m4

m4/visibility.m4

m4/wchar_t.m4

m4/wint_t.m4

m4/xsize.m4

main.c

missing_d/ChangeLog

missing_d/fnmatch.c

missing_d/getaddrinfo.c

missing_d/getaddrinfo.h

missing_d/mktime.c

missing_d/setenv.c

missing_d/snprintf.c

missing_d/strerror.c

missing_d/strncasecmp.c

missing_d/system.c

mpfr.c

msg.c

node.c

nonposix.h

old-extension/ChangeLog

old-extension/bindarr.c

old-extension/fileop.c

old-extension/sparr.c

old-extension/spec_array.c

pc/ChangeLog

pc/ChangeLog.0

pc/Makefile

pc/Makefile.ext

pc/Makefile.tst

pc/config.h

pc/config.sed

pc/dlfcn.h

pc/gawkmisc.pc

pc/getid.c

pc/make-config.bat *

pc/popen.c

pc/popen.h

pc/socket.h

pc/testoutcmp.awk

po/ChangeLog

po/LINGUAS

po/Makefile.in.in

po/Makevars

po/Makevars.template

po/POTFILES.in

po/Rules-quot

po/ca.po

po/da.gmo

po/da.po

po/de.gmo

po/de.po

po/en@boldquot.header

po/en@quot.header

po/es.gmo

po/es.po

po/fi.gmo

po/fi.po

po/fr.gmo

po/fr.po

po/gawk.pot

po/id.po

po/insert-header.sin

po/it.gmo

po/it.po

po/ja.gmo

po/ja.po

po/ms.gmo

po/ms.po

po/nl.gmo

po/nl.po

po/pl.gmo

po/pl.po

po/pt_BR.po

po/remove-potcdate.sin

po/ro.po

po/sv.gmo

po/sv.po

po/tr.po

po/vi.gmo

po/vi.po

po/zh_CN.po

posix/ChangeLog

posix/ChangeLog.0

posix/gawkmisc.c

profile.c

protos.h

re.c

replace.c

str_array.c

symbol.c

test/ChangeLog

test/ChangeLog.0

test/Gentests

test/Makefile.am

test/Makefile.in

test/Maketests

test/README

test/aadelete1.awk

test/aadelete1.ok

test/aasorti.awk

test/argarray.ok

test/back89.ok

test/badargs.ok

test/beginfile2.ok

test/charasbytes.awk *

test/clobber.awk

test/colonwarn.ok

test/delarpm2.awk

test/delarpm2.ok

test/dumpvars.ok

test/exit.sh

test/filefuncs.awk

test/fmtspcl.awk

test/fmtspcl.tok

test/fork.awk

test/fork2.awk

test/fts.awk

test/functab4.awk

test/functab4.ok

test/funsmnam.ok

test/funstack.ok

test/fwtest3.awk

test/fwtest3.in

test/fwtest3.ok

test/gensub2.ok

test/gsubtst3.awk

test/gsubtst5.ok

test/hsprint.awk

test/hsprint.ok

test/id.awk

test/id.ok

test/incdupe4.ok

test/incdupe5.ok

test/incdupe6.ok

test/incdupe7.ok

test/indirectcall.awk

test/inftest.ok

test/inplace1.ok

test/inplace2.ok

test/inplace3.ok

test/intest.awk

test/leaddig.awk

test/lintold.awk

test/lintold.ok

test/lintwarn.awk

test/lintwarn.ok

test/localenl.sh

test/mbprintf4.awk

test/mbprintf4.ok

test/mbstr1.ok

test/mixed1.ok

test/nlstrina.awk

test/noeffect.awk

test/noeffect.ok

test/nofile.ok

test/nondec2.awk

test/noparms.ok

test/ofs1.awk *

test/ofs1.ok

test/ordchr.awk

test/ordchr.ok

test/paramres.ok

test/parseme.ok

test/patsplit.ok

test/poundbang.awk *

test/profile2.ok

test/profile3.ok

test/profile4.ok

test/profile5.ok

test/rand.ok

test/range1.awk *

test/readdir0.awk

test/regrange.ok

test/rwarray.awk

test/sort1.awk

test/sort1.ok

test/space.ok

test/strftime.awk

test/symtab6.ok

test/symtab7.ok

test/symtab8.ok

test/testext.ok

test/time.awk

test/time.ok

test/tweakfld.awk

test/unterm.ok

test/uparrfs.awk

test/xref.awk

vms/ChangeLog

vms/ChangeLog.0

vms/backup_gawk_src.com

vms/build_gawk_pcsi_desc.com

vms/compare_gawk_source.com

vms/config_h.com

vms/descrip.mms

vms/gawk_alias_setup.com

vms/gawk_release_note_start.txt

vms/gawk_verb.com

vms/gawkmisc.vms

vms/generate_config_vms_h_gawk.com *

vms/pcsi_gawk_file_list.txt

vms/pcsi_product_gawk.com

vms/redirect.h

vms/remove_old_gawk.com

vms/stage_gawk_install.com

vms/vax/ChangeLog

vms/vms.h

vms/vms_args.c

vms/vms_cli.c

vms/vms_fwrite.c

vms/vms_gawk.c

vms/vms_misc.c

vms/vms_popen.c

vms/vmsbuild.com

vms/vmstest.com

Show diffs side-by-side

added added

removed removed

support/regexec.c

/* Extended regular expression matching and search library.

This file is part of the GNU C Library.

Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.

The GNU C Library is free software; you can redistribute it and/or

modify it under the terms of the GNU Lesser General Public

License as published by the Free Software Foundation; either

version 2.1 of the License, or (at your option) any later version.

The GNU C Library is distributed in the hope that it will be useful,

but WITHOUT ANY WARRANTY; without even the implied warranty of

MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public

License along with the GNU C Library; if not, see

<https://www.gnu.org/licenses/>. */

static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,

Idx n);

static void match_ctx_clean (re_match_context_t *mctx);

static void match_ctx_free (re_match_context_t *cache);

static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,

Idx str_idx, Idx from, Idx to);

static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx);

static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,

Idx str_idx);

static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,

Idx node, Idx str_idx);

static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,

re_dfastate_t **limited_sts, Idx last_node,

Idx last_str_idx);

static reg_errcode_t re_search_internal (const regex_t *preg,

const char *string, Idx length,

Idx start, Idx last_start, Idx stop,

size_t nmatch, regmatch_t pmatch[],

int eflags);

static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,

const char *string1, Idx length1,

const char *string2, Idx length2,

Idx start, regoff_t range,

struct re_registers *regs,

Idx stop, bool ret_len);

static regoff_t re_search_stub (struct re_pattern_buffer *bufp,

const char *string, Idx length, Idx start,

regoff_t range, Idx stop,

struct re_registers *regs,

bool ret_len);

static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,

Idx nregs, int regs_allocated);

static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx);

static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,

Idx *p_match_first);

static Idx check_halt_state_context (const re_match_context_t *mctx,

const re_dfastate_t *state, Idx idx);

static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,

regmatch_t *prev_idx_match, Idx cur_node,

Idx cur_idx, Idx nmatch);

static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,

Idx str_idx, Idx dest_node, Idx nregs,

regmatch_t *regs, regmatch_t *prevregs,

re_node_set *eps_via_nodes);

static reg_errcode_t set_regs (const regex_t *preg,

const re_match_context_t *mctx,

size_t nmatch, regmatch_t *pmatch,

bool fl_backtrack);

static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);

static int sift_states_iter_mb (const re_match_context_t *mctx,

re_sift_context_t *sctx,

Idx node_idx, Idx str_idx, Idx max_str_idx);

static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,

re_sift_context_t *sctx);

static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,

re_sift_context_t *sctx, Idx str_idx,

re_node_set *cur_dest);

static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,

re_sift_context_t *sctx,

Idx str_idx,

re_node_set *dest_nodes);

static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,

re_node_set *dest_nodes,

const re_node_set *candidates);

static bool check_dst_limits (const re_match_context_t *mctx,

const re_node_set *limits,

Idx dst_node, Idx dst_idx, Idx src_node,

Idx src_idx);

static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,

int boundaries, Idx subexp_idx,

Idx from_node, Idx bkref_idx);

static int check_dst_limits_calc_pos (const re_match_context_t *mctx,

Idx limit, Idx subexp_idx,

Idx node, Idx str_idx,

Idx bkref_idx);

static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,

re_node_set *dest_nodes,

const re_node_set *candidates,

re_node_set *limits,

100

struct re_backref_cache_entry *bkref_ents,

101

Idx str_idx);

102

static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,

103

re_sift_context_t *sctx,

104

Idx str_idx, const re_node_set *candidates);

105

static reg_errcode_t merge_state_array (const re_dfa_t *dfa,

106

re_dfastate_t **dst,

107

re_dfastate_t **src, Idx num);

108

static re_dfastate_t *find_recover_state (reg_errcode_t *err,

109

re_match_context_t *mctx);

110

static re_dfastate_t *transit_state (reg_errcode_t *err,

111

re_match_context_t *mctx,

112

re_dfastate_t *state);

113

static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,

114

re_match_context_t *mctx,

115

re_dfastate_t *next_state);

116

static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,

117

re_node_set *cur_nodes,

118

Idx str_idx);

119

#if 0

120

static re_dfastate_t *transit_state_sb (reg_errcode_t *err,

121

re_match_context_t *mctx,

122

re_dfastate_t *pstate);

123

#endif

124

static reg_errcode_t transit_state_mb (re_match_context_t *mctx,

125

re_dfastate_t *pstate);

126

static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,

127

const re_node_set *nodes);

128

static reg_errcode_t get_subexp (re_match_context_t *mctx,

129

Idx bkref_node, Idx bkref_str_idx);

130

static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,

131

const re_sub_match_top_t *sub_top,

132

re_sub_match_last_t *sub_last,

133

Idx bkref_node, Idx bkref_str);

134

static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,

135

Idx subexp_idx, int type);

136

static reg_errcode_t check_arrival (re_match_context_t *mctx,

137

state_array_t *path, Idx top_node,

138

Idx top_str, Idx last_node, Idx last_str,

139

int type);

140

static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,

141

Idx str_idx,

142

re_node_set *cur_nodes,

143

re_node_set *next_nodes);

144

static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,

145

re_node_set *cur_nodes,

146

Idx ex_subexp, int type);

147

static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,

148

re_node_set *dst_nodes,

149

Idx target, Idx ex_subexp,

150

int type);

151

static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,

152

re_node_set *cur_nodes, Idx cur_str,

153

Idx subexp_num, int type);

154

static bool build_trtable (const re_dfa_t *dfa, re_dfastate_t *state);

155

static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,

156

const re_string_t *input, Idx idx);

157

#ifdef _LIBC

158

static unsigned int find_collation_sequence_value (const unsigned char *mbs,

159

size_t name_len);

160

#endif

161

static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,

162

const re_dfastate_t *state,

163

re_node_set *states_node,

164

bitset_t *states_ch);

165

static bool check_node_accept (const re_match_context_t *mctx,

166

const re_token_t *node, Idx idx);

167

static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len);

168

169

/* Entry point for POSIX code. */

170

171

/* regexec searches for a given pattern, specified by PREG, in the

172

string STRING.

173

174

If NMATCH is zero or REG_NOSUB was set in the cflags argument to

175

'regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at

176

least NMATCH elements, and we set them to the offsets of the

177

corresponding matched substrings.

178

179

EFLAGS specifies "execution flags" which affect matching: if

180

REG_NOTBOL is set, then ^ does not match at the beginning of the

181

string; if REG_NOTEOL is set, then $ does not match at the end.

182

183

Return 0 if a match is found, REG_NOMATCH if not, REG_BADPAT if

184

EFLAGS is invalid. */

185

186

int

187

regexec (const regex_t *__restrict preg, const char *__restrict string,

188

size_t nmatch, regmatch_t pmatch[_REGEX_NELTS (nmatch)], int eflags)

189

{

190

reg_errcode_t err;

191

Idx start, length;

192

re_dfa_t *dfa = preg->buffer;

193

194

if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))

195

return REG_BADPAT;

196

197

if (eflags & REG_STARTEND)

198

{

199

start = pmatch[0].rm_so;

200

length = pmatch[0].rm_eo;

201

}

202

else

203

{

204

start = 0;

205

length = strlen (string);

206

}

207

208

lock_lock (dfa->lock);

209

if (preg->no_sub)

210

err = re_search_internal (preg, string, length, start, length,

211

length, 0, NULL, eflags);

212

else

213

err = re_search_internal (preg, string, length, start, length,

214

length, nmatch, pmatch, eflags);

215

lock_unlock (dfa->lock);

216

return err != REG_NOERROR;

217

}

218

219

#ifdef _LIBC

220

libc_hidden_def (__regexec)

221

222

# include <shlib-compat.h>

223

versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);

224

225

# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)

226

__typeof__ (__regexec) __compat_regexec;

227

228

int

229

attribute_compat_text_section

230

__compat_regexec (const regex_t *__restrict preg,

231

const char *__restrict string, size_t nmatch,

232

regmatch_t pmatch[_REGEX_NELTS (nmatch)], int eflags)

233

{

234

return regexec (preg, string, nmatch, pmatch,

235

eflags & (REG_NOTBOL | REG_NOTEOL));

236

}

237

compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);

238

# endif

239

#endif

240

241

/* Entry points for GNU code. */

242

243

/* re_match, re_search, re_match_2, re_search_2

244

245

The former two functions operate on STRING with length LENGTH,

246

while the later two operate on concatenation of STRING1 and STRING2

247

with lengths LENGTH1 and LENGTH2, respectively.

248

249

re_match() matches the compiled pattern in BUFP against the string,

250

starting at index START.

251

252

re_search() first tries matching at index START, then it tries to match

253

starting from index START + 1, and so on. The last start position tried

254

is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same

255

way as re_match().)

256

257

The parameter STOP of re_{match,search}_2 specifies that no match exceeding

258

the first STOP characters of the concatenation of the strings should be

259

concerned.

260

261

If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match

262

and all groups is stored in REGS. (For the "_2" variants, the offsets are

263

computed relative to the concatenation, not relative to the individual

264

strings.)

265

266

On success, re_match* functions return the length of the match, re_search*

267

return the position of the start of the match. They return -1 on

268

match failure, -2 on error. */

269

270

regoff_t

271

re_match (struct re_pattern_buffer *bufp, const char *string, Idx length,

272

Idx start, struct re_registers *regs)

273

{

274

return re_search_stub (bufp, string, length, start, 0, length, regs, true);

275

}

276

#ifdef _LIBC

277

weak_alias (__re_match, re_match)

278

#endif

279

280

regoff_t

281

re_search (struct re_pattern_buffer *bufp, const char *string, Idx length,

282

Idx start, regoff_t range, struct re_registers *regs)

283

{

284

return re_search_stub (bufp, string, length, start, range, length, regs,

285

false);

286

}

287

#ifdef _LIBC

288

weak_alias (__re_search, re_search)

289

#endif

290

291

regoff_t

292

re_match_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,

293

const char *string2, Idx length2, Idx start,

294

struct re_registers *regs, Idx stop)

295

{

296

return re_search_2_stub (bufp, string1, length1, string2, length2,

297

start, 0, regs, stop, true);

298

}

299

#ifdef _LIBC

300

weak_alias (__re_match_2, re_match_2)

301

#endif

302

303

regoff_t

304

re_search_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,

305

const char *string2, Idx length2, Idx start, regoff_t range,

306

struct re_registers *regs, Idx stop)

307

{

308

return re_search_2_stub (bufp, string1, length1, string2, length2,

309

start, range, regs, stop, false);

310

}

311

#ifdef _LIBC

312

weak_alias (__re_search_2, re_search_2)

313

#endif

314

315

static regoff_t

316

re_search_2_stub (struct re_pattern_buffer *bufp, const char *string1,

317

Idx length1, const char *string2, Idx length2, Idx start,

318

regoff_t range, struct re_registers *regs,

319

Idx stop, bool ret_len)

320

{

321

const char *str;

322

regoff_t rval;

323

Idx len;

324

char *s = NULL;

325

326

if (__glibc_unlikely ((length1 < 0 || length2 < 0 || stop < 0

327

|| INT_ADD_WRAPV (length1, length2, &len))))

328

return -2;

329

330

/* Concatenate the strings. */

331

if (length2 > 0)

332

if (length1 > 0)

333

{

334

s = re_malloc (char, len);

335

336

if (__glibc_unlikely (s == NULL))

337

return -2;

338

#ifdef _LIBC

339

memcpy (__mempcpy (s, string1, length1), string2, length2);

340

#else

341

memcpy (s, string1, length1);

342

memcpy (s + length1, string2, length2);

343

#endif

344

str = s;

345

}

346

else

347

str = string2;

348

else

349

str = string1;

350

351

rval = re_search_stub (bufp, str, len, start, range, stop, regs,

352

ret_len);

353

re_free (s);

354

return rval;

355

}

356

357

/* The parameters have the same meaning as those of re_search.

358

Additional parameters:

359

If RET_LEN is true the length of the match is returned (re_match style);

360

otherwise the position of the match is returned. */

361

362

static regoff_t

363

re_search_stub (struct re_pattern_buffer *bufp, const char *string, Idx length,

364

Idx start, regoff_t range, Idx stop, struct re_registers *regs,

365

bool ret_len)

366

{

367

reg_errcode_t result;

368

regmatch_t *pmatch;

369

Idx nregs;

370

regoff_t rval;

371

int eflags = 0;

372

re_dfa_t *dfa = bufp->buffer;

373

Idx last_start = start + range;

374

375

/* Check for out-of-range. */

376

if (__glibc_unlikely (start < 0 || start > length))

377

return -1;

378

if (__glibc_unlikely (length < last_start

379

|| (0 <= range && last_start < start)))

380

last_start = length;

381

else if (__glibc_unlikely (last_start < 0

382

|| (range < 0 && start <= last_start)))

383

last_start = 0;

384

385

lock_lock (dfa->lock);

386

387

eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;

388

eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;

389

390

/* Compile fastmap if we haven't yet. */

391

if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)

392

re_compile_fastmap (bufp);

393

394

if (__glibc_unlikely (bufp->no_sub))

395

regs = NULL;

396

397

/* We need at least 1 register. */

398

if (regs == NULL)

399

nregs = 1;

400

else if (__glibc_unlikely (bufp->regs_allocated == REGS_FIXED

401

&& regs->num_regs <= bufp->re_nsub))

402

{

403

nregs = regs->num_regs;

404

if (__glibc_unlikely (nregs < 1))

405

{

406

/* Nothing can be copied to regs. */

407

regs = NULL;

408

nregs = 1;

409

}

410

}

411

else

412

nregs = bufp->re_nsub + 1;

413

pmatch = re_malloc (regmatch_t, nregs);

414

if (__glibc_unlikely (pmatch == NULL))

415

{

416

rval = -2;

417

goto out;

418

}

419

420

result = re_search_internal (bufp, string, length, start, last_start, stop,

421

nregs, pmatch, eflags);

422

423

rval = 0;

424

425

/* I hope we needn't fill their regs with -1's when no match was found. */

426

if (result != REG_NOERROR)

427

rval = result == REG_NOMATCH ? -1 : -2;

428

else if (regs != NULL)

429

{

430

/* If caller wants register contents data back, copy them. */

431

bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,

432

bufp->regs_allocated);

433

if (__glibc_unlikely (bufp->regs_allocated == REGS_UNALLOCATED))

434

rval = -2;

435

}

436

437

if (__glibc_likely (rval == 0))

438

{

439

if (ret_len)

440

{

441

DEBUG_ASSERT (pmatch[0].rm_so == start);

442

rval = pmatch[0].rm_eo - start;

443

}

444

else

445

rval = pmatch[0].rm_so;

446

}

447

re_free (pmatch);

448

out:

449

lock_unlock (dfa->lock);

450

return rval;

451

}

452

453

static unsigned

454

re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,

455

int regs_allocated)

456

{

457

int rval = REGS_REALLOCATE;

458

Idx i;

459

Idx need_regs = nregs + 1;

460

/* We need one extra element beyond 'num_regs' for the '-1' marker GNU code

461

uses. */

462

463

/* Have the register data arrays been allocated? */

464

if (regs_allocated == REGS_UNALLOCATED)

465

{ /* No. So allocate them with malloc. */

466

regs->start = re_malloc (regoff_t, need_regs);

467

if (__glibc_unlikely (regs->start == NULL))

468

return REGS_UNALLOCATED;

469

regs->end = re_malloc (regoff_t, need_regs);

470

if (__glibc_unlikely (regs->end == NULL))

471

{

472

re_free (regs->start);

473

return REGS_UNALLOCATED;

474

}

475

regs->num_regs = need_regs;

476

}

477

else if (regs_allocated == REGS_REALLOCATE)

478

{ /* Yes. If we need more elements than were already

479

allocated, reallocate them. If we need fewer, just

480

leave it alone. */

481

if (__glibc_unlikely (need_regs > regs->num_regs))

482

{

483

regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);

484

regoff_t *new_end;

485

if (__glibc_unlikely (new_start == NULL))

486

return REGS_UNALLOCATED;

487

new_end = re_realloc (regs->end, regoff_t, need_regs);

488

if (__glibc_unlikely (new_end == NULL))

489

{

490

re_free (new_start);

491

return REGS_UNALLOCATED;

492

}

493

regs->start = new_start;

494

regs->end = new_end;

495

regs->num_regs = need_regs;

496

}

497

}

498

else

499

{

500

DEBUG_ASSERT (regs_allocated == REGS_FIXED);

501

/* This function may not be called with REGS_FIXED and nregs too big. */

502

DEBUG_ASSERT (nregs <= regs->num_regs);

503

rval = REGS_FIXED;

504

}

505

506

/* Copy the regs. */

507

for (i = 0; i < nregs; ++i)

508

{

509

regs->start[i] = pmatch[i].rm_so;

510

regs->end[i] = pmatch[i].rm_eo;

511

}

512

for ( ; i < regs->num_regs; ++i)

513

regs->start[i] = regs->end[i] = -1;

514

515

return rval;

516

}

517

518

/* Set REGS to hold NUM_REGS registers, storing them in STARTS and

519

ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use

520

this memory for recording register information. STARTS and ENDS

521

must be allocated using the malloc library routine, and must each

522

be at least NUM_REGS * sizeof (regoff_t) bytes long.

523

524

If NUM_REGS == 0, then subsequent matches should allocate their own

525

526

527

Unless this function is called, the first search or match using

528

PATTERN_BUFFER will allocate its own register data, without

529

freeing the old data. */

530

531

void

532

re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs,

533

__re_size_t num_regs, regoff_t *starts, regoff_t *ends)

534

{

535

if (num_regs)

536

{

537

bufp->regs_allocated = REGS_REALLOCATE;

538

regs->num_regs = num_regs;

539

regs->start = starts;

540

regs->end = ends;

541

}

542

else

543

{

544

bufp->regs_allocated = REGS_UNALLOCATED;

545

regs->num_regs = 0;

546

regs->start = regs->end = NULL;

547

}

548

}

549

#ifdef _LIBC

550

weak_alias (__re_set_registers, re_set_registers)

551

#endif

552

553

/* Entry points compatible with 4.2 BSD regex library. We don't define

554

them unless specifically requested. */

555

556

#if defined _REGEX_RE_COMP || defined _LIBC

557

int

558

# ifdef _LIBC

559

weak_function

560

# endif

561

re_exec (const char *s)

562

{

563

return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);

564

}

565

#endif /* _REGEX_RE_COMP */

566

567

/* Internal entry point. */

568

569

/* Searches for a compiled pattern PREG in the string STRING, whose

570

length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same

571

meaning as with regexec. LAST_START is START + RANGE, where

572

START and RANGE have the same meaning as with re_search.

573

Return REG_NOERROR if we find a match, and REG_NOMATCH if not,

574

otherwise return the error code.

575

Note: We assume front end functions already check ranges.

576

(0 <= LAST_START && LAST_START <= LENGTH) */

577

578

static reg_errcode_t

579

__attribute_warn_unused_result__

580

re_search_internal (const regex_t *preg, const char *string, Idx length,

581

Idx start, Idx last_start, Idx stop, size_t nmatch,

582

regmatch_t pmatch[], int eflags)

583

{

584

reg_errcode_t err;

585

const re_dfa_t *dfa = preg->buffer;

586

Idx left_lim, right_lim;

587

int incr;

588

bool fl_longest_match;

589

int match_kind;

590

Idx match_first;

591

Idx match_last = -1;

592

Idx extra_nmatch;

593

bool sb;

594

int ch;

595

re_match_context_t mctx = { .dfa = dfa };

596

char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate

597

&& start != last_start && !preg->can_be_null)

598

? preg->fastmap : NULL);

599

RE_TRANSLATE_TYPE t = preg->translate;

600

601

extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;

602

nmatch -= extra_nmatch;

603

604

/* Check if the DFA haven't been compiled. */

605

if (__glibc_unlikely (preg->used == 0 || dfa->init_state == NULL

606

|| dfa->init_state_word == NULL

607

|| dfa->init_state_nl == NULL

608

|| dfa->init_state_begbuf == NULL))

609

return REG_NOMATCH;

610

611

/* We assume front-end functions already check them. */

612

DEBUG_ASSERT (0 <= last_start && last_start <= length);

613

614

/* If initial states with non-begbuf contexts have no elements,

615

the regex must be anchored. If preg->newline_anchor is set,

616

we'll never use init_state_nl, so do not check it. */

617

if (dfa->init_state->nodes.nelem == 0

618

&& dfa->init_state_word->nodes.nelem == 0

619

&& (dfa->init_state_nl->nodes.nelem == 0

620

|| !preg->newline_anchor))

621

{

622

if (start != 0 && last_start != 0)

623

return REG_NOMATCH;

624

start = last_start = 0;

625

}

626

627

/* We must check the longest matching, if nmatch > 0. */

628

fl_longest_match = (nmatch != 0 || dfa->nbackref);

629

630

err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,

631

preg->translate, (preg->syntax & RE_ICASE) != 0,

632

dfa);

633

if (__glibc_unlikely (err != REG_NOERROR))

634

goto free_return;

635

mctx.input.stop = stop;

636

mctx.input.raw_stop = stop;

637

mctx.input.newline_anchor = preg->newline_anchor;

638

639

err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);

640

if (__glibc_unlikely (err != REG_NOERROR))

641

goto free_return;

642

643

/* We will log all the DFA states through which the dfa pass,

644

if nmatch > 1, or this dfa has "multibyte node", which is a

645

back-reference or a node which can accept multibyte character or

646

multi character collating element. */

647

if (nmatch > 1 || dfa->has_mb_node)

648

{

649

/* Avoid overflow. */

650

if (__glibc_unlikely ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))

651

<= mctx.input.bufs_len)))

652

{

653

err = REG_ESPACE;

654

goto free_return;

655

}

656

657

mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);

658

if (__glibc_unlikely (mctx.state_log == NULL))

659

{

660

err = REG_ESPACE;

661

goto free_return;

662

}

663

}

664

665

match_first = start;

666

mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF

667

: CONTEXT_NEWLINE | CONTEXT_BEGBUF;

668

669

/* Check incrementally whether the input string matches. */

670

incr = (last_start < start) ? -1 : 1;

671

left_lim = (last_start < start) ? last_start : start;

672

right_lim = (last_start < start) ? start : last_start;

673

sb = dfa->mb_cur_max == 1;

674

match_kind =

675

(fastmap

676

? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)

677

| (start <= last_start ? 2 : 0)

678

| (t != NULL ? 1 : 0))

679

: 8);

680

681

for (;; match_first += incr)

682

{

683

err = REG_NOMATCH;

684

if (match_first < left_lim || right_lim < match_first)

685

goto free_return;

686

687

/* Advance as rapidly as possible through the string, until we

688

find a plausible place to start matching. This may be done

689

with varying efficiency, so there are various possibilities:

690

only the most common of them are specialized, in order to

691

save on code size. We use a switch statement for speed. */

692

switch (match_kind)

693

{

694

case 8:

695

/* No fastmap. */

696

break;

697

698

case 7:

699

/* Fastmap with single-byte translation, match forward. */

700

while (__glibc_likely (match_first < right_lim)

701

&& !fastmap[t[(unsigned char) string[match_first]]])

702

++match_first;

703

goto forward_match_found_start_or_reached_end;

704

705

case 6:

706

/* Fastmap without translation, match forward. */

707

while (__glibc_likely (match_first < right_lim)

708

&& !fastmap[(unsigned char) string[match_first]])

709

++match_first;

710

711

forward_match_found_start_or_reached_end:

712

if (__glibc_unlikely (match_first == right_lim))

713

{

714

ch = match_first >= length

715

? 0 : (unsigned char) string[match_first];

716

if (!fastmap[t ? t[ch] : ch])

717

goto free_return;

718

}

719

break;

720

721

case 4:

722

case 5:

723

/* Fastmap without multi-byte translation, match backwards. */

724

while (match_first >= left_lim)

725

{

726

ch = match_first >= length

727

? 0 : (unsigned char) string[match_first];

728

if (fastmap[t ? t[ch] : ch])

729

break;

730

--match_first;

731

}

732

if (match_first < left_lim)

733

goto free_return;

734

break;

735

736

default:

737

/* In this case, we can't determine easily the current byte,

738

since it might be a component byte of a multibyte

739

character. Then we use the constructed buffer instead. */

740

for (;;)

741

{

742

/* If MATCH_FIRST is out of the valid range, reconstruct the

743

buffers. */

744

__re_size_t offset = match_first - mctx.input.raw_mbs_idx;

745

if (__glibc_unlikely (offset

746

>= (__re_size_t) mctx.input.valid_raw_len))

747

{

748

err = re_string_reconstruct (&mctx.input, match_first,

749

eflags);

750

if (__glibc_unlikely (err != REG_NOERROR))

751

goto free_return;

752

753

offset = match_first - mctx.input.raw_mbs_idx;

754

}

755

/* Use buffer byte if OFFSET is in buffer, otherwise '\0'. */

756

ch = (offset < mctx.input.valid_len

757

? re_string_byte_at (&mctx.input, offset) : 0);

758

if (fastmap[ch])

759

break;

760

match_first += incr;

761

if (match_first < left_lim || match_first > right_lim)

762

{

763

err = REG_NOMATCH;

764

goto free_return;

765

}

766

}

767

break;

768

}

769

770

/* Reconstruct the buffers so that the matcher can assume that

771

the matching starts from the beginning of the buffer. */

772

err = re_string_reconstruct (&mctx.input, match_first, eflags);

773

if (__glibc_unlikely (err != REG_NOERROR))

774

goto free_return;

775

776

/* Don't consider this char as a possible match start if it part,

777

yet isn't the head, of a multibyte character. */

778

if (!sb && !re_string_first_byte (&mctx.input, 0))

779

continue;

780

781

/* It seems to be appropriate one, then use the matcher. */

782

/* We assume that the matching starts from 0. */

783

mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;

784

match_last = check_matching (&mctx, fl_longest_match,

785

start <= last_start ? &match_first : NULL);

786

if (match_last != -1)

787

{

788

if (__glibc_unlikely (match_last == -2))

789

{

790

err = REG_ESPACE;

791

goto free_return;

792

}

793

else

794

{

795

mctx.match_last = match_last;

796

if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)

797

{

798

re_dfastate_t *pstate = mctx.state_log[match_last];

799

mctx.last_node = check_halt_state_context (&mctx, pstate,

800

match_last);

801

}

802

if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)

803

|| dfa->nbackref)

804

{

805

err = prune_impossible_nodes (&mctx);

806

if (err == REG_NOERROR)

807

break;

808

if (__glibc_unlikely (err != REG_NOMATCH))

809

goto free_return;

810

match_last = -1;

811

}

812

else

813

break; /* We found a match. */

814

}

815

}

816

817

match_ctx_clean (&mctx);

818

}

819

820

DEBUG_ASSERT (match_last != -1);

821

DEBUG_ASSERT (err == REG_NOERROR);

822

823

/* Set pmatch[] if we need. */

824

if (nmatch > 0)

825

{

826

Idx reg_idx;

827

828

/* Initialize registers. */

829

for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)

830

pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;

831

832

/* Set the points where matching start/end. */

833

pmatch[0].rm_so = 0;

834

pmatch[0].rm_eo = mctx.match_last;

835

/* FIXME: This function should fail if mctx.match_last exceeds

836

the maximum possible regoff_t value. We need a new error

837

code REG_OVERFLOW. */

838

839

if (!preg->no_sub && nmatch > 1)

840

{

841

err = set_regs (preg, &mctx, nmatch, pmatch,

842

dfa->has_plural_match && dfa->nbackref > 0);

843

if (__glibc_unlikely (err != REG_NOERROR))

844

goto free_return;

845

}

846

847

/* At last, add the offset to each register, since we slid

848

the buffers so that we could assume that the matching starts

849

from 0. */

850

for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)

851

if (pmatch[reg_idx].rm_so != -1)

852

{

853

if (__glibc_unlikely (mctx.input.offsets_needed != 0))

854

{

855

pmatch[reg_idx].rm_so =

856

(pmatch[reg_idx].rm_so == mctx.input.valid_len

857

? mctx.input.valid_raw_len

858

: mctx.input.offsets[pmatch[reg_idx].rm_so]);

859

pmatch[reg_idx].rm_eo =

860

(pmatch[reg_idx].rm_eo == mctx.input.valid_len

861

? mctx.input.valid_raw_len

862

: mctx.input.offsets[pmatch[reg_idx].rm_eo]);

863

}

864

pmatch[reg_idx].rm_so += match_first;

865

pmatch[reg_idx].rm_eo += match_first;

866

}

867

for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)

868

{

869

pmatch[nmatch + reg_idx].rm_so = -1;

870

pmatch[nmatch + reg_idx].rm_eo = -1;

871

}

872

873

if (dfa->subexp_map)

874

for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)

875

if (dfa->subexp_map[reg_idx] != reg_idx)

876

{

877

pmatch[reg_idx + 1].rm_so

878

= pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;

879

pmatch[reg_idx + 1].rm_eo

880

= pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;

881

}

882

}

883

884

free_return:

885

re_free (mctx.state_log);

886

if (dfa->nbackref)

887

match_ctx_free (&mctx);

888

re_string_destruct (&mctx.input);

889

return err;

890

}

891

892

static reg_errcode_t

893

__attribute_warn_unused_result__

894

prune_impossible_nodes (re_match_context_t *mctx)

895

{

896

const re_dfa_t *const dfa = mctx->dfa;

897

Idx halt_node, match_last;

898

reg_errcode_t ret;

899

re_dfastate_t **sifted_states;

900

re_dfastate_t **lim_states = NULL;

901

re_sift_context_t sctx;

902

DEBUG_ASSERT (mctx->state_log != NULL);

903

match_last = mctx->match_last;

904

halt_node = mctx->last_node;

905

906

/* Avoid overflow. */

907

if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))

908

<= match_last))

909

return REG_ESPACE;

910

911

sifted_states = re_malloc (re_dfastate_t *, match_last + 1);

912

if (__glibc_unlikely (sifted_states == NULL))

913

{

914

ret = REG_ESPACE;

915

goto free_return;

916

}

917

if (dfa->nbackref)

918

{

919

lim_states = re_malloc (re_dfastate_t *, match_last + 1);

920

if (__glibc_unlikely (lim_states == NULL))

921

{

922

ret = REG_ESPACE;

923

goto free_return;

924

}

925

while (1)

926

{

927

memset (lim_states, '\0',

928

sizeof (re_dfastate_t *) * (match_last + 1));

929

sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,

930

match_last);

931

ret = sift_states_backward (mctx, &sctx);

932

re_node_set_free (&sctx.limits);

933

if (__glibc_unlikely (ret != REG_NOERROR))

934

goto free_return;

935

if (sifted_states[0] != NULL || lim_states[0] != NULL)

936

break;

937

938

{

939

--match_last;

940

if (match_last < 0)

941

{

942

ret = REG_NOMATCH;

943

goto free_return;

944

}

945

} while (mctx->state_log[match_last] == NULL

946

|| !mctx->state_log[match_last]->halt);

947

halt_node = check_halt_state_context (mctx,

948

mctx->state_log[match_last],

949

match_last);

950

}

951

ret = merge_state_array (dfa, sifted_states, lim_states,

952

match_last + 1);

953

re_free (lim_states);

954

lim_states = NULL;

955

if (__glibc_unlikely (ret != REG_NOERROR))

956

goto free_return;

957

}

958

else

959

{

960

sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);

961

ret = sift_states_backward (mctx, &sctx);

962

re_node_set_free (&sctx.limits);

963

if (__glibc_unlikely (ret != REG_NOERROR))

964

goto free_return;

965

if (sifted_states[0] == NULL)

966

{

967

ret = REG_NOMATCH;

968

goto free_return;

969

}

970

}

971

re_free (mctx->state_log);

972

mctx->state_log = sifted_states;

973

sifted_states = NULL;

974

mctx->last_node = halt_node;

975

mctx->match_last = match_last;

976

ret = REG_NOERROR;

977

free_return:

978

re_free (sifted_states);

979

re_free (lim_states);

980

return ret;

981

}

982

983

/* Acquire an initial state and return it.

984

We must select appropriate initial state depending on the context,

985

since initial states may have constraints like "\<", "^", etc.. */

986

987

static __always_inline re_dfastate_t *

988

acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,

989

Idx idx)

990

{

991

const re_dfa_t *const dfa = mctx->dfa;

992

if (dfa->init_state->has_constraint)

993

{

994

unsigned int context;

995

context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);

996

if (IS_WORD_CONTEXT (context))

997

return dfa->init_state_word;

998

else if (IS_ORDINARY_CONTEXT (context))

999

return dfa->init_state;

1000

else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))

1001

return dfa->init_state_begbuf;

1002

else if (IS_NEWLINE_CONTEXT (context))

1003

return dfa->init_state_nl;

1004

else if (IS_BEGBUF_CONTEXT (context))

1005

{

1006

/* It is relatively rare case, then calculate on demand. */

1007

return re_acquire_state_context (err, dfa,

1008

dfa->init_state->entrance_nodes,

1009

context);

1010

}

1011

else

1012

/* Must not happen? */

1013

return dfa->init_state;

1014

}

1015

else

1016

return dfa->init_state;

1017

}

1018

1019

/* Check whether the regular expression match input string INPUT or not,

1020

and return the index where the matching end. Return -1 if

1021

there is no match, and return -2 in case of an error.

1022

FL_LONGEST_MATCH means we want the POSIX longest matching.

1023

If P_MATCH_FIRST is not NULL, and the match fails, it is set to the

1024

next place where we may want to try matching.

1025

Note that the matcher assumes that the matching starts from the current

1026

index of the buffer. */

1027

1028

static Idx

1029

__attribute_warn_unused_result__

1030

check_matching (re_match_context_t *mctx, bool fl_longest_match,

1031

Idx *p_match_first)

1032

{

1033

const re_dfa_t *const dfa = mctx->dfa;

1034

reg_errcode_t err;

1035

Idx match = 0;

1036

Idx match_last = -1;

1037

Idx cur_str_idx = re_string_cur_idx (&mctx->input);

1038

re_dfastate_t *cur_state;

1039

bool at_init_state = p_match_first != NULL;

1040

Idx next_start_idx = cur_str_idx;

1041

1042

err = REG_NOERROR;

1043

cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);

1044

/* An initial state must not be NULL (invalid). */

1045

if (__glibc_unlikely (cur_state == NULL))

1046

{

1047

DEBUG_ASSERT (err == REG_ESPACE);

1048

return -2;

1049

}

1050

1051

if (mctx->state_log != NULL)

1052

{

1053

mctx->state_log[cur_str_idx] = cur_state;

1054

1055

/* Check OP_OPEN_SUBEXP in the initial state in case that we use them

1056

later. E.g. Processing back references. */

1057

if (__glibc_unlikely (dfa->nbackref))

1058

{

1059

at_init_state = false;

1060

err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);

1061

if (__glibc_unlikely (err != REG_NOERROR))

1062

return err;

1063

1064

if (cur_state->has_backref)

1065

{

1066

err = transit_state_bkref (mctx, &cur_state->nodes);

1067

if (__glibc_unlikely (err != REG_NOERROR))

1068

return err;

1069

}

1070

}

1071

}

1072

1073

/* If the RE accepts NULL string. */

1074

if (__glibc_unlikely (cur_state->halt))

1075

{

1076

if (!cur_state->has_constraint

1077

|| check_halt_state_context (mctx, cur_state, cur_str_idx))

1078

{

1079

if (!fl_longest_match)

1080

return cur_str_idx;

1081

else

1082

{

1083

match_last = cur_str_idx;

1084

match = 1;

1085

}

1086

}

1087

}

1088

1089

while (!re_string_eoi (&mctx->input))

1090

{

1091

re_dfastate_t *old_state = cur_state;

1092

Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;

1093

1094

if ((__glibc_unlikely (next_char_idx >= mctx->input.bufs_len)

1095

&& mctx->input.bufs_len < mctx->input.len)

1096

|| (__glibc_unlikely (next_char_idx >= mctx->input.valid_len)

1097

&& mctx->input.valid_len < mctx->input.len))

1098

{

1099

err = extend_buffers (mctx, next_char_idx + 1);

1100

if (__glibc_unlikely (err != REG_NOERROR))

1101

{

1102

DEBUG_ASSERT (err == REG_ESPACE);

1103

return -2;

1104

}

1105

}

1106

1107

cur_state = transit_state (&err, mctx, cur_state);

1108

if (mctx->state_log != NULL)

1109

cur_state = merge_state_with_log (&err, mctx, cur_state);

1110

1111

if (cur_state == NULL)

1112

{

1113

/* Reached the invalid state or an error. Try to recover a valid

1114

state using the state log, if available and if we have not

1115

already found a valid (even if not the longest) match. */

1116

if (__glibc_unlikely (err != REG_NOERROR))

1117

return -2;

1118

1119

if (mctx->state_log == NULL

1120

|| (match && !fl_longest_match)

1121

|| (cur_state = find_recover_state (&err, mctx)) == NULL)

1122

break;

1123

}

1124

1125

if (__glibc_unlikely (at_init_state))

1126

{

1127

if (old_state == cur_state)

1128

next_start_idx = next_char_idx;

1129

else

1130

at_init_state = false;

1131

}

1132

1133

if (cur_state->halt)

1134

{

1135

/* Reached a halt state.

1136

Check the halt state can satisfy the current context. */

1137

if (!cur_state->has_constraint

1138

|| check_halt_state_context (mctx, cur_state,

1139

re_string_cur_idx (&mctx->input)))

1140

{

1141

/* We found an appropriate halt state. */

1142

match_last = re_string_cur_idx (&mctx->input);

1143

match = 1;

1144

1145

/* We found a match, do not modify match_first below. */

1146

p_match_first = NULL;

1147

if (!fl_longest_match)

1148

break;

1149

}

1150

}

1151

}

1152

1153

if (p_match_first)

1154

*p_match_first += next_start_idx;

1155

1156

return match_last;

1157

}

1158

1159

/* Check NODE match the current context. */

1160

1161

static bool

1162

check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)

1163

{

1164

re_token_type_t type = dfa->nodes[node].type;

1165

unsigned int constraint = dfa->nodes[node].constraint;

1166

if (type != END_OF_RE)

1167

return false;

1168

if (!constraint)

1169

return true;

1170

if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))

1171

return false;

1172

return true;

1173

}

1174

1175

/* Check the halt state STATE match the current context.

1176

Return 0 if not match, if the node, STATE has, is a halt node and

1177

match the context, return the node. */

1178

1179

static Idx

1180

check_halt_state_context (const re_match_context_t *mctx,

1181

const re_dfastate_t *state, Idx idx)

1182

{

1183

Idx i;

1184

unsigned int context;

1185

DEBUG_ASSERT (state->halt);

1186

context = re_string_context_at (&mctx->input, idx, mctx->eflags);

1187

for (i = 0; i < state->nodes.nelem; ++i)

1188

if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))

1189

return state->nodes.elems[i];

1190

return 0;

1191

}

1192

1193

/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA

1194

corresponding to the DFA).

1195

Return the destination node, and update EPS_VIA_NODES;

1196

return -1 on match failure, -2 on error. */

1197

1198

static Idx

1199

proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,

1200

regmatch_t *prevregs,

1201

Idx *pidx, Idx node, re_node_set *eps_via_nodes,

1202

struct re_fail_stack_t *fs)

1203

{

1204

const re_dfa_t *const dfa = mctx->dfa;

1205

if (IS_EPSILON_NODE (dfa->nodes[node].type))

1206

{

1207

re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;

1208

re_node_set *edests = &dfa->edests[node];

1209

1210

if (! re_node_set_contains (eps_via_nodes, node))

1211

{

1212

bool ok = re_node_set_insert (eps_via_nodes, node);

1213

if (__glibc_unlikely (! ok))

1214

return -2;

1215

}

1216

1217

/* Pick a valid destination, or return -1 if none is found. */

1218

Idx dest_node = -1;

1219

for (Idx i = 0; i < edests->nelem; i++)

1220

{

1221

Idx candidate = edests->elems[i];

1222

if (!re_node_set_contains (cur_nodes, candidate))

1223

continue;

1224

if (dest_node == -1)

1225

dest_node = candidate;

1226

1227

else

1228

{

1229

/* In order to avoid infinite loop like "(a*)*", return the second

1230

epsilon-transition if the first was already considered. */

1231

if (re_node_set_contains (eps_via_nodes, dest_node))

1232

return candidate;

1233

1234

/* Otherwise, push the second epsilon-transition on the fail stack. */

1235

else if (fs != NULL

1236

&& push_fail_stack (fs, *pidx, candidate, nregs, regs,

1237

prevregs, eps_via_nodes))

1238

return -2;

1239

1240

/* We know we are going to exit. */

1241

break;

1242

}

1243

}

1244

return dest_node;

1245

}

1246

else

1247

{

1248

Idx naccepted = 0;

1249

re_token_type_t type = dfa->nodes[node].type;

1250

1251

if (dfa->nodes[node].accept_mb)

1252

naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);

1253

else if (type == OP_BACK_REF)

1254

{

1255

Idx subexp_idx = dfa->nodes[node].opr.idx + 1;

1256

if (subexp_idx < nregs)

1257

naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;

1258

if (fs != NULL)

1259

{

1260

if (subexp_idx >= nregs

1261

|| regs[subexp_idx].rm_so == -1

1262

|| regs[subexp_idx].rm_eo == -1)

1263

return -1;

1264

else if (naccepted)

1265

{

1266

char *buf = (char *) re_string_get_buffer (&mctx->input);

1267

if (mctx->input.valid_len - *pidx < naccepted

1268

|| (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,

1269

naccepted)

1270

!= 0))

1271

return -1;

1272

}

1273

}

1274

1275

if (naccepted == 0)

1276

{

1277

Idx dest_node;

1278

bool ok = re_node_set_insert (eps_via_nodes, node);

1279

if (__glibc_unlikely (! ok))

1280

return -2;

1281

dest_node = dfa->edests[node].elems[0];

1282

if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,

1283

dest_node))

1284

return dest_node;

1285

}

1286

}

1287

1288

if (naccepted != 0

1289

|| check_node_accept (mctx, dfa->nodes + node, *pidx))

1290

{

1291

Idx dest_node = dfa->nexts[node];

1292

*pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;

1293

if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL

1294

|| !re_node_set_contains (&mctx->state_log[*pidx]->nodes,

1295

dest_node)))

1296

return -1;

1297

re_node_set_empty (eps_via_nodes);

1298

return dest_node;

1299

}

1300

}

1301

return -1;

1302

}

1303

1304

static reg_errcode_t

1305

__attribute_warn_unused_result__

1306

push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,

1307

Idx nregs, regmatch_t *regs, regmatch_t *prevregs,

1308

re_node_set *eps_via_nodes)

1309

{

1310

reg_errcode_t err;

1311

Idx num = fs->num;

1312

if (num == fs->alloc)

1313

{

1314

struct re_fail_stack_ent_t *new_array;

1315

new_array = re_realloc (fs->stack, struct re_fail_stack_ent_t,

1316

fs->alloc * 2);

1317

if (new_array == NULL)

1318

return REG_ESPACE;

1319

fs->alloc *= 2;

1320

fs->stack = new_array;

1321

}

1322

fs->stack[num].idx = str_idx;

1323

fs->stack[num].node = dest_node;

1324

fs->stack[num].regs = re_malloc (regmatch_t, 2 * nregs);

1325

if (fs->stack[num].regs == NULL)

1326

return REG_ESPACE;

1327

fs->num = num + 1;

1328

memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);

1329

memcpy (fs->stack[num].regs + nregs, prevregs, sizeof (regmatch_t) * nregs);

1330

err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);

1331

return err;

1332

}

1333

1334

static Idx

1335

pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,

1336

regmatch_t *regs, regmatch_t *prevregs,

1337

re_node_set *eps_via_nodes)

1338

{

1339

if (fs == NULL || fs->num == 0)

1340

return -1;

1341

Idx num = --fs->num;

1342

*pidx = fs->stack[num].idx;

1343

memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);

1344

memcpy (prevregs, fs->stack[num].regs + nregs, sizeof (regmatch_t) * nregs);

1345

re_node_set_free (eps_via_nodes);

1346

re_free (fs->stack[num].regs);

1347

*eps_via_nodes = fs->stack[num].eps_via_nodes;

1348

DEBUG_ASSERT (0 <= fs->stack[num].node);

1349

return fs->stack[num].node;

1350

}

1351

1352

1353

#define DYNARRAY_STRUCT regmatch_list

1354

#define DYNARRAY_ELEMENT regmatch_t

1355

#define DYNARRAY_PREFIX regmatch_list_

1356

#include <malloc/dynarray-skeleton.c>

1357

1358

/* Set the positions where the subexpressions are starts/ends to registers

1359

PMATCH.

1360

Note: We assume that pmatch[0] is already set, and

1361

pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. */

1362

1363

static reg_errcode_t

1364

__attribute_warn_unused_result__

1365

set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,

1366

regmatch_t *pmatch, bool fl_backtrack)

1367

{

1368

const re_dfa_t *dfa = preg->buffer;

1369

Idx idx, cur_node;

1370

re_node_set eps_via_nodes;

1371

struct re_fail_stack_t *fs;

1372

struct re_fail_stack_t fs_body = { 0, 2, NULL };

1373

struct regmatch_list prev_match;

1374

regmatch_list_init (&prev_match);

1375

1376

DEBUG_ASSERT (nmatch > 1);

1377

DEBUG_ASSERT (mctx->state_log != NULL);

1378

if (fl_backtrack)

1379

{

1380

fs = &fs_body;

1381

fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);

1382

if (fs->stack == NULL)

1383

return REG_ESPACE;

1384

}

1385

else

1386

fs = NULL;

1387

1388

cur_node = dfa->init_node;

1389

re_node_set_init_empty (&eps_via_nodes);

1390

1391

if (!regmatch_list_resize (&prev_match, nmatch))

1392

{

1393

regmatch_list_free (&prev_match);

1394

free_fail_stack_return (fs);

1395

return REG_ESPACE;

1396

}

1397

regmatch_t *prev_idx_match = regmatch_list_begin (&prev_match);

1398

memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);

1399

1400

for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)

1401

{

1402

update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);

1403

1404

if ((idx == pmatch[0].rm_eo && cur_node == mctx->last_node)

1405

|| (fs && re_node_set_contains (&eps_via_nodes, cur_node)))

1406

{

1407

Idx reg_idx;

1408

cur_node = -1;

1409

if (fs)

1410

{

1411

for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)

1412

if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)

1413

{

1414

cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,

1415

prev_idx_match, &eps_via_nodes);

1416

break;

1417

}

1418

}

1419

if (cur_node < 0)

1420

{

1421

re_node_set_free (&eps_via_nodes);

1422

regmatch_list_free (&prev_match);

1423

return free_fail_stack_return (fs);

1424

}

1425

}

1426

1427

/* Proceed to next node. */

1428

cur_node = proceed_next_node (mctx, nmatch, pmatch, prev_idx_match,

1429

&idx, cur_node,

1430

&eps_via_nodes, fs);

1431

1432

if (__glibc_unlikely (cur_node < 0))

1433

{

1434

if (__glibc_unlikely (cur_node == -2))

1435

{

1436

re_node_set_free (&eps_via_nodes);

1437

regmatch_list_free (&prev_match);

1438

free_fail_stack_return (fs);

1439

return REG_ESPACE;

1440

}

1441

cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,

1442

prev_idx_match, &eps_via_nodes);

1443

if (cur_node < 0)

1444

{

1445

re_node_set_free (&eps_via_nodes);

1446

regmatch_list_free (&prev_match);

1447

free_fail_stack_return (fs);

1448

return REG_NOMATCH;

1449

}

1450

}

1451

}

1452

re_node_set_free (&eps_via_nodes);

1453

regmatch_list_free (&prev_match);

1454

return free_fail_stack_return (fs);

1455

}

1456

1457

static reg_errcode_t

1458

free_fail_stack_return (struct re_fail_stack_t *fs)

1459

{

1460

if (fs)

1461

{

1462

Idx fs_idx;

1463

for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)

1464

{

1465

re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);

1466

re_free (fs->stack[fs_idx].regs);

1467

}

1468

re_free (fs->stack);

1469

}

1470

return REG_NOERROR;

1471

}

1472

1473

static void

1474

update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,

1475

regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)

1476

{

1477

int type = dfa->nodes[cur_node].type;

1478

if (type == OP_OPEN_SUBEXP)

1479

{

1480

Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;

1481

1482

/* We are at the first node of this sub expression. */

1483

if (reg_num < nmatch)

1484

{

1485

pmatch[reg_num].rm_so = cur_idx;

1486

pmatch[reg_num].rm_eo = -1;

1487

}

1488

}

1489

else if (type == OP_CLOSE_SUBEXP)

1490

{

1491

/* We are at the last node of this sub expression. */

1492

Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;

1493

if (reg_num < nmatch)

1494

{

1495

if (pmatch[reg_num].rm_so < cur_idx)

1496

{

1497

pmatch[reg_num].rm_eo = cur_idx;

1498

/* This is a non-empty match or we are not inside an optional

1499

subexpression. Accept this right away. */

1500

memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);

1501

}

1502

else

1503

{

1504

if (dfa->nodes[cur_node].opt_subexp

1505

&& prev_idx_match[reg_num].rm_so != -1)

1506

/* We transited through an empty match for an optional

1507

subexpression, like (a?)*, and this is not the subexp's

1508

first match. Copy back the old content of the registers

1509

so that matches of an inner subexpression are undone as

1510

well, like in ((a?))*. */

1511

memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);

1512

else

1513

/* We completed a subexpression, but it may be part of

1514

an optional one, so do not update PREV_IDX_MATCH. */

1515

pmatch[reg_num].rm_eo = cur_idx;

1516

}

1517

}

1518

}

1519

}

1520

1521

/* This function checks the STATE_LOG from the SCTX->last_str_idx to 0

1522

and sift the nodes in each states according to the following rules.

1523

Updated state_log will be wrote to STATE_LOG.

1524

1525

Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if...

1526

1. When STR_IDX == MATCH_LAST(the last index in the state_log):

1527

If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to

1528

the LAST_NODE, we throw away the node 'a'.

1529

2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts

1530

string 's' and transit to 'b':

1531

i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw

1532

away the node 'a'.

1533

ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is

1534

thrown away, we throw away the node 'a'.

1535

3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':

1536

i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the

1537

node 'a'.

1538

ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,

1539

we throw away the node 'a'. */

1540

1541

#define STATE_NODE_CONTAINS(state,node) \

1542

((state) != NULL && re_node_set_contains (&(state)->nodes, node))

1543

1544

static reg_errcode_t

1545

sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)

1546

{

1547

reg_errcode_t err;

1548

int null_cnt = 0;

1549

Idx str_idx = sctx->last_str_idx;

1550

re_node_set cur_dest;

1551

1552

DEBUG_ASSERT (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);

1553

1554

/* Build sifted state_log[str_idx]. It has the nodes which can epsilon

1555

transit to the last_node and the last_node itself. */

1556

err = re_node_set_init_1 (&cur_dest, sctx->last_node);

1557

if (__glibc_unlikely (err != REG_NOERROR))

1558

return err;

1559

err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);

1560

if (__glibc_unlikely (err != REG_NOERROR))

1561

goto free_return;

1562

1563

/* Then check each states in the state_log. */

1564

while (str_idx > 0)

1565

{

1566

/* Update counters. */

1567

null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;

1568

if (null_cnt > mctx->max_mb_elem_len)

1569

{

1570

memset (sctx->sifted_states, '\0',

1571

sizeof (re_dfastate_t *) * str_idx);

1572

re_node_set_free (&cur_dest);

1573

return REG_NOERROR;

1574

}

1575

re_node_set_empty (&cur_dest);

1576

--str_idx;

1577

1578

if (mctx->state_log[str_idx])

1579

{

1580

err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);

1581

if (__glibc_unlikely (err != REG_NOERROR))

1582

goto free_return;

1583

}

1584

1585

/* Add all the nodes which satisfy the following conditions:

1586

- It can epsilon transit to a node in CUR_DEST.

1587

- It is in CUR_SRC.

1588

And update state_log. */

1589

err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);

1590

if (__glibc_unlikely (err != REG_NOERROR))

1591

goto free_return;

1592

}

1593

err = REG_NOERROR;

1594

free_return:

1595

re_node_set_free (&cur_dest);

1596

return err;

1597

}

1598

1599

static reg_errcode_t

1600

__attribute_warn_unused_result__

1601

build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,

1602

Idx str_idx, re_node_set *cur_dest)

1603

{

1604

const re_dfa_t *const dfa = mctx->dfa;

1605

const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;

1606

Idx i;

1607

1608

/* Then build the next sifted state.

1609

We build the next sifted state on 'cur_dest', and update

1610

'sifted_states[str_idx]' with 'cur_dest'.

1611

Note:

1612

'cur_dest' is the sifted state from 'state_log[str_idx + 1]'.

1613

'cur_src' points the node_set of the old 'state_log[str_idx]'

1614

(with the epsilon nodes pre-filtered out). */

1615

for (i = 0; i < cur_src->nelem; i++)

1616

{

1617

Idx prev_node = cur_src->elems[i];

1618

int naccepted = 0;

1619

bool ok;

1620

DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[prev_node].type));

1621

1622

/* If the node may accept "multi byte". */

1623

if (dfa->nodes[prev_node].accept_mb)

1624

naccepted = sift_states_iter_mb (mctx, sctx, prev_node,

1625

str_idx, sctx->last_str_idx);

1626

1627

/* We don't check backreferences here.

1628

See update_cur_sifted_state(). */

1629

if (!naccepted

1630

&& check_node_accept (mctx, dfa->nodes + prev_node, str_idx)

1631

&& STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],

1632

dfa->nexts[prev_node]))

1633

naccepted = 1;

1634

1635

if (naccepted == 0)

1636

continue;

1637

1638

if (sctx->limits.nelem)

1639

{

1640

Idx to_idx = str_idx + naccepted;

1641

if (check_dst_limits (mctx, &sctx->limits,

1642

dfa->nexts[prev_node], to_idx,

1643

prev_node, str_idx))

1644

continue;

1645

}

1646

ok = re_node_set_insert (cur_dest, prev_node);

1647

if (__glibc_unlikely (! ok))

1648

return REG_ESPACE;

1649

}

1650

1651

return REG_NOERROR;

1652

}

1653

1654

/* Helper functions. */

1655

1656

static reg_errcode_t

1657

clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)

1658

{

1659

Idx top = mctx->state_log_top;

1660

1661

if ((next_state_log_idx >= mctx->input.bufs_len

1662

&& mctx->input.bufs_len < mctx->input.len)

1663

|| (next_state_log_idx >= mctx->input.valid_len

1664

&& mctx->input.valid_len < mctx->input.len))

1665

{

1666

reg_errcode_t err;

1667

err = extend_buffers (mctx, next_state_log_idx + 1);

1668

if (__glibc_unlikely (err != REG_NOERROR))

1669

return err;

1670

}

1671

1672

if (top < next_state_log_idx)

1673

{

1674

DEBUG_ASSERT (mctx->state_log != NULL);

1675

memset (mctx->state_log + top + 1, '\0',

1676

sizeof (re_dfastate_t *) * (next_state_log_idx - top));

1677

mctx->state_log_top = next_state_log_idx;

1678

}

1679

return REG_NOERROR;

1680

}

1681

1682

static reg_errcode_t

1683

merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,

1684

re_dfastate_t **src, Idx num)

1685

{

1686

Idx st_idx;

1687

reg_errcode_t err;

1688

for (st_idx = 0; st_idx < num; ++st_idx)

1689

{

1690

if (dst[st_idx] == NULL)

1691

dst[st_idx] = src[st_idx];

1692

else if (src[st_idx] != NULL)

1693

{

1694

re_node_set merged_set;

1695

err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,

1696

&src[st_idx]->nodes);

1697

if (__glibc_unlikely (err != REG_NOERROR))

1698

return err;

1699

dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);

1700

re_node_set_free (&merged_set);

1701

if (__glibc_unlikely (err != REG_NOERROR))

1702

return err;

1703

}

1704

}

1705

return REG_NOERROR;

1706

}

1707

1708

static reg_errcode_t

1709

update_cur_sifted_state (const re_match_context_t *mctx,

1710

re_sift_context_t *sctx, Idx str_idx,

1711

re_node_set *dest_nodes)

1712

{

1713

const re_dfa_t *const dfa = mctx->dfa;

1714

reg_errcode_t err = REG_NOERROR;

1715

const re_node_set *candidates;

1716

candidates = ((mctx->state_log[str_idx] == NULL) ? NULL

1717

: &mctx->state_log[str_idx]->nodes);

1718

1719

if (dest_nodes->nelem == 0)

1720

sctx->sifted_states[str_idx] = NULL;

1721

else

1722

{

1723

if (candidates)

1724

{

1725

/* At first, add the nodes which can epsilon transit to a node in

1726

DEST_NODE. */

1727

err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);

1728

if (__glibc_unlikely (err != REG_NOERROR))

1729

return err;

1730

1731

/* Then, check the limitations in the current sift_context. */

1732

if (sctx->limits.nelem)

1733

{

1734

err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,

1735

mctx->bkref_ents, str_idx);

1736

if (__glibc_unlikely (err != REG_NOERROR))

1737

return err;

1738

}

1739

}

1740

1741

sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);

1742

if (__glibc_unlikely (err != REG_NOERROR))

1743

return err;

1744

}

1745

1746

if (candidates && mctx->state_log[str_idx]->has_backref)

1747

{

1748

err = sift_states_bkref (mctx, sctx, str_idx, candidates);

1749

if (__glibc_unlikely (err != REG_NOERROR))

1750

return err;

1751

}

1752

return REG_NOERROR;

1753

}

1754

1755

static reg_errcode_t

1756

__attribute_warn_unused_result__

1757

add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,

1758

const re_node_set *candidates)

1759

{

1760

reg_errcode_t err = REG_NOERROR;

1761

Idx i;

1762

1763

re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);

1764

if (__glibc_unlikely (err != REG_NOERROR))

1765

return err;

1766

1767

if (!state->inveclosure.alloc)

1768

{

1769

err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);

1770

if (__glibc_unlikely (err != REG_NOERROR))

1771

return REG_ESPACE;

1772

for (i = 0; i < dest_nodes->nelem; i++)

1773

{

1774

err = re_node_set_merge (&state->inveclosure,

1775

dfa->inveclosures + dest_nodes->elems[i]);

1776

if (__glibc_unlikely (err != REG_NOERROR))

1777

return REG_ESPACE;

1778

}

1779

}

1780

return re_node_set_add_intersect (dest_nodes, candidates,

1781

&state->inveclosure);

1782

}

1783

1784

static reg_errcode_t

1785

sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,

1786

const re_node_set *candidates)

1787

{

1788

Idx ecl_idx;

1789

reg_errcode_t err;

1790

re_node_set *inv_eclosure = dfa->inveclosures + node;

1791

re_node_set except_nodes;

1792

re_node_set_init_empty (&except_nodes);

1793

for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)

1794

{

1795

Idx cur_node = inv_eclosure->elems[ecl_idx];

1796

if (cur_node == node)

1797

continue;

1798

if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))

1799

{

1800

Idx edst1 = dfa->edests[cur_node].elems[0];

1801

Idx edst2 = ((dfa->edests[cur_node].nelem > 1)

1802

? dfa->edests[cur_node].elems[1] : -1);

1803

if ((!re_node_set_contains (inv_eclosure, edst1)

1804

&& re_node_set_contains (dest_nodes, edst1))

1805

|| (edst2 > 0

1806

&& !re_node_set_contains (inv_eclosure, edst2)

1807

&& re_node_set_contains (dest_nodes, edst2)))

1808

{

1809

err = re_node_set_add_intersect (&except_nodes, candidates,

1810

dfa->inveclosures + cur_node);

1811

if (__glibc_unlikely (err != REG_NOERROR))

1812

{

1813

re_node_set_free (&except_nodes);

1814

return err;

1815

}

1816

}

1817

}

1818

}

1819

for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)

1820

{

1821

Idx cur_node = inv_eclosure->elems[ecl_idx];

1822

if (!re_node_set_contains (&except_nodes, cur_node))

1823

{

1824

Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;

1825

re_node_set_remove_at (dest_nodes, idx);

1826

}

1827

}

1828

re_node_set_free (&except_nodes);

1829

return REG_NOERROR;

1830

}

1831

1832

static bool

1833

check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,

1834

Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)

1835

{

1836

const re_dfa_t *const dfa = mctx->dfa;

1837

Idx lim_idx, src_pos, dst_pos;

1838

1839

Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);

1840

Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);

1841

for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)

1842

{

1843

Idx subexp_idx;

1844

struct re_backref_cache_entry *ent;

1845

ent = mctx->bkref_ents + limits->elems[lim_idx];

1846

subexp_idx = dfa->nodes[ent->node].opr.idx;

1847

1848

dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],

1849

subexp_idx, dst_node, dst_idx,

1850

dst_bkref_idx);

1851

src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],

1852

subexp_idx, src_node, src_idx,

1853

src_bkref_idx);

1854

1855

/* In case of:

1856

<src> <dst> ( <subexp> )

1857

( <subexp> ) <src> <dst>

1858

( <subexp1> <src> <subexp2> <dst> <subexp3> ) */

1859

if (src_pos == dst_pos)

1860

continue; /* This is unrelated limitation. */

1861

else

1862

return true;

1863

}

1864

return false;

1865

}

1866

1867

static int

1868

check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,

1869

Idx subexp_idx, Idx from_node, Idx bkref_idx)

1870

{

1871

const re_dfa_t *const dfa = mctx->dfa;

1872

const re_node_set *eclosures = dfa->eclosures + from_node;

1873

Idx node_idx;

1874

1875

/* Else, we are on the boundary: examine the nodes on the epsilon

1876

closure. */

1877

for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)

1878

{

1879

Idx node = eclosures->elems[node_idx];

1880

switch (dfa->nodes[node].type)

1881

{

1882

case OP_BACK_REF:

1883

if (bkref_idx != -1)

1884

{

1885

struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;

1886

1887

{

1888

Idx dst;

1889

int cpos;

1890

1891

if (ent->node != node)

1892

continue;

1893

1894

if (subexp_idx < BITSET_WORD_BITS

1895

&& !(ent->eps_reachable_subexps_map

1896

& ((bitset_word_t) 1 << subexp_idx)))

1897

continue;

1898

1899

/* Recurse trying to reach the OP_OPEN_SUBEXP and

1900

OP_CLOSE_SUBEXP cases below. But, if the

1901

destination node is the same node as the source

1902

node, don't recurse because it would cause an

1903

infinite loop: a regex that exhibits this behavior

1904

is ()\1*\1* */

1905

dst = dfa->edests[node].elems[0];

1906

if (dst == from_node)

1907

{

1908

if (boundaries & 1)

1909

return -1;

1910

else /* if (boundaries & 2) */

1911

return 0;

1912

}

1913

1914

cpos =

1915

check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,

1916

dst, bkref_idx);

1917

if (cpos == -1 /* && (boundaries & 1) */)

1918

return -1;

1919

if (cpos == 0 && (boundaries & 2))

1920

return 0;

1921

1922

if (subexp_idx < BITSET_WORD_BITS)

1923

ent->eps_reachable_subexps_map

1924

&= ~((bitset_word_t) 1 << subexp_idx);

1925

}

1926

while (ent++->more);

1927

}

1928

break;

1929

1930

case OP_OPEN_SUBEXP:

1931

if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)

1932

return -1;

1933

break;

1934

1935

case OP_CLOSE_SUBEXP:

1936

if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)

1937

return 0;

1938

break;

1939

1940

default:

1941

break;

1942

}

1943

}

1944

1945

return (boundaries & 2) ? 1 : 0;

1946

}

1947

1948

static int

1949

check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,

1950

Idx subexp_idx, Idx from_node, Idx str_idx,

1951

Idx bkref_idx)

1952

{

1953

struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;

1954

int boundaries;

1955

1956

/* If we are outside the range of the subexpression, return -1 or 1. */

1957

if (str_idx < lim->subexp_from)

1958

return -1;

1959

1960

if (lim->subexp_to < str_idx)

1961

return 1;

1962

1963

/* If we are within the subexpression, return 0. */

1964

boundaries = (str_idx == lim->subexp_from);

1965

boundaries |= (str_idx == lim->subexp_to) << 1;

1966

if (boundaries == 0)

1967

return 0;

1968

1969

/* Else, examine epsilon closure. */

1970

return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,

1971

from_node, bkref_idx);

1972

}

1973

1974

/* Check the limitations of sub expressions LIMITS, and remove the nodes

1975

which are against limitations from DEST_NODES. */

1976

1977

static reg_errcode_t

1978

check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,

1979

const re_node_set *candidates, re_node_set *limits,

1980

struct re_backref_cache_entry *bkref_ents, Idx str_idx)

1981

{

1982

reg_errcode_t err;

1983

Idx node_idx, lim_idx;

1984

1985

for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)

1986

{

1987

Idx subexp_idx;

1988

struct re_backref_cache_entry *ent;

1989

ent = bkref_ents + limits->elems[lim_idx];

1990

1991

if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)

1992

continue; /* This is unrelated limitation. */

1993

1994

subexp_idx = dfa->nodes[ent->node].opr.idx;

1995

if (ent->subexp_to == str_idx)

1996

{

1997

Idx ops_node = -1;

1998

Idx cls_node = -1;

1999

for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)

2000

{

2001

Idx node = dest_nodes->elems[node_idx];

2002

re_token_type_t type = dfa->nodes[node].type;

2003

if (type == OP_OPEN_SUBEXP

2004

&& subexp_idx == dfa->nodes[node].opr.idx)

2005

ops_node = node;

2006

else if (type == OP_CLOSE_SUBEXP

2007

&& subexp_idx == dfa->nodes[node].opr.idx)

2008

cls_node = node;

2009

}

2010

2011

/* Check the limitation of the open subexpression. */

2012

/* Note that (ent->subexp_to = str_idx != ent->subexp_from). */

2013

if (ops_node >= 0)

2014

{

2015

err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,

2016

candidates);

2017

if (__glibc_unlikely (err != REG_NOERROR))

2018

return err;

2019

}

2020

2021

/* Check the limitation of the close subexpression. */

2022

if (cls_node >= 0)

2023

for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)

2024

{

2025

Idx node = dest_nodes->elems[node_idx];

2026

if (!re_node_set_contains (dfa->inveclosures + node,

2027

cls_node)

2028

&& !re_node_set_contains (dfa->eclosures + node,

2029

cls_node))

2030

{

2031

/* It is against this limitation.

2032

Remove it form the current sifted state. */

2033

err = sub_epsilon_src_nodes (dfa, node, dest_nodes,

2034

candidates);

2035

if (__glibc_unlikely (err != REG_NOERROR))

2036

return err;

2037

--node_idx;

2038

}

2039

}

2040

}

2041

else /* (ent->subexp_to != str_idx) */

2042

{

2043

for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)

2044

{

2045

Idx node = dest_nodes->elems[node_idx];

2046

re_token_type_t type = dfa->nodes[node].type;

2047

if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)

2048

{

2049

if (subexp_idx != dfa->nodes[node].opr.idx)

2050

continue;

2051

/* It is against this limitation.

2052

Remove it form the current sifted state. */

2053

err = sub_epsilon_src_nodes (dfa, node, dest_nodes,

2054

candidates);

2055

if (__glibc_unlikely (err != REG_NOERROR))

2056

return err;

2057

}

2058

}

2059

}

2060

}

2061

return REG_NOERROR;

2062

}

2063

2064

static reg_errcode_t

2065

__attribute_warn_unused_result__

2066

sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,

2067

Idx str_idx, const re_node_set *candidates)

2068

{

2069

const re_dfa_t *const dfa = mctx->dfa;

2070

reg_errcode_t err;

2071

Idx node_idx, node;

2072

re_sift_context_t local_sctx;

2073

Idx first_idx = search_cur_bkref_entry (mctx, str_idx);

2074

2075

if (first_idx == -1)

2076

return REG_NOERROR;

2077

2078

local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */

2079

2080

for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)

2081

{

2082

Idx enabled_idx;

2083

re_token_type_t type;

2084

struct re_backref_cache_entry *entry;

2085

node = candidates->elems[node_idx];

2086

type = dfa->nodes[node].type;

2087

/* Avoid infinite loop for the REs like "()\1+". */

2088

if (node == sctx->last_node && str_idx == sctx->last_str_idx)

2089

continue;

2090

if (type != OP_BACK_REF)

2091

continue;

2092

2093

entry = mctx->bkref_ents + first_idx;

2094

enabled_idx = first_idx;

2095

2096

{

2097

Idx subexp_len;

2098

Idx to_idx;

2099

Idx dst_node;

2100

bool ok;

2101

re_dfastate_t *cur_state;

2102

2103

if (entry->node != node)

2104

continue;

2105

subexp_len = entry->subexp_to - entry->subexp_from;

2106

to_idx = str_idx + subexp_len;

2107

dst_node = (subexp_len ? dfa->nexts[node]

2108

: dfa->edests[node].elems[0]);

2109

2110

if (to_idx > sctx->last_str_idx

2111

|| sctx->sifted_states[to_idx] == NULL

2112

|| !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)

2113

|| check_dst_limits (mctx, &sctx->limits, node,

2114

str_idx, dst_node, to_idx))

2115

continue;

2116

2117

if (local_sctx.sifted_states == NULL)

2118

{

2119

local_sctx = *sctx;

2120

err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);

2121

if (__glibc_unlikely (err != REG_NOERROR))

2122

goto free_return;

2123

}

2124

local_sctx.last_node = node;

2125

local_sctx.last_str_idx = str_idx;

2126

ok = re_node_set_insert (&local_sctx.limits, enabled_idx);

2127

if (__glibc_unlikely (! ok))

2128

{

2129

err = REG_ESPACE;

2130

goto free_return;

2131

}

2132

cur_state = local_sctx.sifted_states[str_idx];

2133

err = sift_states_backward (mctx, &local_sctx);

2134

if (__glibc_unlikely (err != REG_NOERROR))

2135

goto free_return;

2136

if (sctx->limited_states != NULL)

2137

{

2138

err = merge_state_array (dfa, sctx->limited_states,

2139

local_sctx.sifted_states,

2140

str_idx + 1);

2141

if (__glibc_unlikely (err != REG_NOERROR))

2142

goto free_return;

2143

}

2144

local_sctx.sifted_states[str_idx] = cur_state;

2145

re_node_set_remove (&local_sctx.limits, enabled_idx);

2146

2147

/* mctx->bkref_ents may have changed, reload the pointer. */

2148

entry = mctx->bkref_ents + enabled_idx;

2149

}

2150

while (enabled_idx++, entry++->more);

2151

}

2152

err = REG_NOERROR;

2153

free_return:

2154

if (local_sctx.sifted_states != NULL)

2155

{

2156

re_node_set_free (&local_sctx.limits);

2157

}

2158

2159

return err;

2160

}

2161

2162

2163

static int

2164

sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,

2165

Idx node_idx, Idx str_idx, Idx max_str_idx)

2166

{

2167

const re_dfa_t *const dfa = mctx->dfa;

2168

int naccepted;

2169

/* Check the node can accept "multi byte". */

2170

naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);

2171

if (naccepted > 0 && str_idx + naccepted <= max_str_idx

2172

&& !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],

2173

dfa->nexts[node_idx]))

2174

/* The node can't accept the "multi byte", or the

2175

destination was already thrown away, then the node

2176

couldn't accept the current input "multi byte". */

2177

naccepted = 0;

2178

/* Otherwise, it is sure that the node could accept

2179

'naccepted' bytes input. */

2180

return naccepted;

2181

}

2182

2183

/* Functions for state transition. */

2184

2185

/* Return the next state to which the current state STATE will transit by

2186

accepting the current input byte, and update STATE_LOG if necessary.

2187

Return NULL on failure.

2188

If STATE can accept a multibyte char/collating element/back reference

2189

update the destination of STATE_LOG. */

2190

2191

static re_dfastate_t *

2192

__attribute_warn_unused_result__

2193

transit_state (reg_errcode_t *err, re_match_context_t *mctx,

2194

re_dfastate_t *state)

2195

{

2196

re_dfastate_t **trtable;

2197

unsigned char ch;

2198

2199

/* If the current state can accept multibyte. */

2200

if (__glibc_unlikely (state->accept_mb))

2201

{

2202

*err = transit_state_mb (mctx, state);

2203

if (__glibc_unlikely (*err != REG_NOERROR))

2204

return NULL;

2205

}

2206

2207

/* Then decide the next state with the single byte. */

2208

#if 0

2209

if (0)

2210

/* don't use transition table */

2211

return transit_state_sb (err, mctx, state);

2212

#endif

2213

2214

/* Use transition table */

2215

ch = re_string_fetch_byte (&mctx->input);

2216

for (;;)

2217

{

2218

trtable = state->trtable;

2219

if (__glibc_likely (trtable != NULL))

2220

return trtable[ch];

2221

2222

trtable = state->word_trtable;

2223

if (__glibc_likely (trtable != NULL))

2224

{

2225

unsigned int context;

2226

context

2227

= re_string_context_at (&mctx->input,

2228

re_string_cur_idx (&mctx->input) - 1,

2229

mctx->eflags);

2230

if (IS_WORD_CONTEXT (context))

2231

return trtable[ch + SBC_MAX];

2232

else

2233

return trtable[ch];

2234

}

2235

2236

if (!build_trtable (mctx->dfa, state))

2237

{

2238

*err = REG_ESPACE;

2239

return NULL;

2240

}

2241

2242

/* Retry, we now have a transition table. */

2243

}

2244

}

2245

2246

/* Update the state_log if we need */

2247

static re_dfastate_t *

2248

merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,

2249

re_dfastate_t *next_state)

2250

{

2251

const re_dfa_t *const dfa = mctx->dfa;

2252

Idx cur_idx = re_string_cur_idx (&mctx->input);

2253

2254

if (cur_idx > mctx->state_log_top)

2255

{

2256

mctx->state_log[cur_idx] = next_state;

2257

mctx->state_log_top = cur_idx;

2258

}

2259

else if (mctx->state_log[cur_idx] == 0)

2260

{

2261

mctx->state_log[cur_idx] = next_state;

2262

}

2263

else

2264

{

2265

re_dfastate_t *pstate;

2266

unsigned int context;

2267

re_node_set next_nodes, *log_nodes, *table_nodes = NULL;

2268

/* If (state_log[cur_idx] != 0), it implies that cur_idx is

2269

the destination of a multibyte char/collating element/

2270

back reference. Then the next state is the union set of

2271

these destinations and the results of the transition table. */

2272

pstate = mctx->state_log[cur_idx];

2273

log_nodes = pstate->entrance_nodes;

2274

if (next_state != NULL)

2275

{

2276

table_nodes = next_state->entrance_nodes;

2277

*err = re_node_set_init_union (&next_nodes, table_nodes,

2278

log_nodes);

2279

if (__glibc_unlikely (*err != REG_NOERROR))

2280

return NULL;

2281

}

2282

else

2283

next_nodes = *log_nodes;

2284

/* Note: We already add the nodes of the initial state,

2285

then we don't need to add them here. */

2286

2287

context = re_string_context_at (&mctx->input,

2288

re_string_cur_idx (&mctx->input) - 1,

2289

mctx->eflags);

2290

next_state = mctx->state_log[cur_idx]

2291

= re_acquire_state_context (err, dfa, &next_nodes, context);

2292

/* We don't need to check errors here, since the return value of

2293

this function is next_state and ERR is already set. */

2294

2295

if (table_nodes != NULL)

2296

re_node_set_free (&next_nodes);

2297

}

2298

2299

if (__glibc_unlikely (dfa->nbackref) && next_state != NULL)

2300

{

2301

/* Check OP_OPEN_SUBEXP in the current state in case that we use them

2302

later. We must check them here, since the back references in the

2303

next state might use them. */

2304

*err = check_subexp_matching_top (mctx, &next_state->nodes,

2305

cur_idx);

2306

if (__glibc_unlikely (*err != REG_NOERROR))

2307

return NULL;

2308

2309

/* If the next state has back references. */

2310

if (next_state->has_backref)

2311

{

2312

*err = transit_state_bkref (mctx, &next_state->nodes);

2313

if (__glibc_unlikely (*err != REG_NOERROR))

2314

return NULL;

2315

next_state = mctx->state_log[cur_idx];

2316

}

2317

}

2318

2319

return next_state;

2320

}

2321

2322

/* Skip bytes in the input that correspond to part of a

2323

multi-byte match, then look in the log for a state

2324

from which to restart matching. */

2325

static re_dfastate_t *

2326

find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)

2327

{

2328

re_dfastate_t *cur_state;

2329

2330

{

2331

Idx max = mctx->state_log_top;

2332

Idx cur_str_idx = re_string_cur_idx (&mctx->input);

2333

2334

2335

{

2336

if (++cur_str_idx > max)

2337

return NULL;

2338

re_string_skip_bytes (&mctx->input, 1);

2339

}

2340

while (mctx->state_log[cur_str_idx] == NULL);

2341

2342

cur_state = merge_state_with_log (err, mctx, NULL);

2343

}

2344

while (*err == REG_NOERROR && cur_state == NULL);

2345

return cur_state;

2346

}

2347

2348

/* Helper functions for transit_state. */

2349

2350

/* From the node set CUR_NODES, pick up the nodes whose types are

2351

OP_OPEN_SUBEXP and which have corresponding back references in the regular

2352

expression. And register them to use them later for evaluating the

2353

corresponding back references. */

2354

2355

static reg_errcode_t

2356

check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,

2357

Idx str_idx)

2358

{

2359

const re_dfa_t *const dfa = mctx->dfa;

2360

Idx node_idx;

2361

reg_errcode_t err;

2362

2363

/* TODO: This isn't efficient.

2364

Because there might be more than one nodes whose types are

2365

OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all

2366

nodes.

2367

E.g. RE: (a){2} */

2368

for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)

2369

{

2370

Idx node = cur_nodes->elems[node_idx];

2371

if (dfa->nodes[node].type == OP_OPEN_SUBEXP

2372

&& dfa->nodes[node].opr.idx < BITSET_WORD_BITS

2373

&& (dfa->used_bkref_map

2374

& ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))

2375

{

2376

err = match_ctx_add_subtop (mctx, node, str_idx);

2377

if (__glibc_unlikely (err != REG_NOERROR))

2378

return err;

2379

}

2380

}

2381

return REG_NOERROR;

2382

}

2383

2384

#if 0

2385

/* Return the next state to which the current state STATE will transit by

2386

accepting the current input byte. Return NULL on failure. */

2387

2388

static re_dfastate_t *

2389

transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,

2390

re_dfastate_t *state)

2391

{

2392

const re_dfa_t *const dfa = mctx->dfa;

2393

re_node_set next_nodes;

2394

re_dfastate_t *next_state;

2395

Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);

2396

unsigned int context;

2397

2398

*err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);

2399

if (__glibc_unlikely (*err != REG_NOERROR))

2400

return NULL;

2401

for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)

2402

{

2403

Idx cur_node = state->nodes.elems[node_cnt];

2404

if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))

2405

{

2406

*err = re_node_set_merge (&next_nodes,

2407

dfa->eclosures + dfa->nexts[cur_node]);

2408

if (__glibc_unlikely (*err != REG_NOERROR))

2409

{

2410

re_node_set_free (&next_nodes);

2411

return NULL;

2412

}

2413

}

2414

}

2415

context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);

2416

next_state = re_acquire_state_context (err, dfa, &next_nodes, context);

2417

/* We don't need to check errors here, since the return value of

2418

this function is next_state and ERR is already set. */

2419

2420

re_node_set_free (&next_nodes);

2421

re_string_skip_bytes (&mctx->input, 1);

2422

return next_state;

2423

}

2424

#endif

2425

2426

static reg_errcode_t

2427

transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)

2428

{

2429

const re_dfa_t *const dfa = mctx->dfa;

2430

reg_errcode_t err;

2431

Idx i;

2432

2433

for (i = 0; i < pstate->nodes.nelem; ++i)

2434

{

2435

re_node_set dest_nodes, *new_nodes;

2436

Idx cur_node_idx = pstate->nodes.elems[i];

2437

int naccepted;

2438

Idx dest_idx;

2439

unsigned int context;

2440

re_dfastate_t *dest_state;

2441

2442

if (!dfa->nodes[cur_node_idx].accept_mb)

2443

continue;

2444

2445

if (dfa->nodes[cur_node_idx].constraint)

2446

{

2447

context = re_string_context_at (&mctx->input,

2448

re_string_cur_idx (&mctx->input),

2449

mctx->eflags);

2450

if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,

2451

context))

2452

continue;

2453

}

2454

2455

/* How many bytes the node can accept? */

2456

naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,

2457

re_string_cur_idx (&mctx->input));

2458

if (naccepted == 0)

2459

continue;

2460

2461

/* The node can accepts 'naccepted' bytes. */

2462

dest_idx = re_string_cur_idx (&mctx->input) + naccepted;

2463

mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted

2464

: mctx->max_mb_elem_len);

2465

err = clean_state_log_if_needed (mctx, dest_idx);

2466

if (__glibc_unlikely (err != REG_NOERROR))

2467

return err;

2468

DEBUG_ASSERT (dfa->nexts[cur_node_idx] != -1);

2469

new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];

2470

2471

dest_state = mctx->state_log[dest_idx];

2472

if (dest_state == NULL)

2473

dest_nodes = *new_nodes;

2474

else

2475

{

2476

err = re_node_set_init_union (&dest_nodes,

2477

dest_state->entrance_nodes, new_nodes);

2478

if (__glibc_unlikely (err != REG_NOERROR))

2479

return err;

2480

}

2481

context = re_string_context_at (&mctx->input, dest_idx - 1,

2482

mctx->eflags);

2483

mctx->state_log[dest_idx]

2484

= re_acquire_state_context (&err, dfa, &dest_nodes, context);

2485

if (dest_state != NULL)

2486

re_node_set_free (&dest_nodes);

2487

if (__glibc_unlikely (mctx->state_log[dest_idx] == NULL

2488

&& err != REG_NOERROR))

2489

return err;

2490

}

2491

return REG_NOERROR;

2492

}

2493

2494

static reg_errcode_t

2495

transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)

2496

{

2497

const re_dfa_t *const dfa = mctx->dfa;

2498

reg_errcode_t err;

2499

Idx i;

2500

Idx cur_str_idx = re_string_cur_idx (&mctx->input);

2501

2502

for (i = 0; i < nodes->nelem; ++i)

2503

{

2504

Idx dest_str_idx, prev_nelem, bkc_idx;

2505

Idx node_idx = nodes->elems[i];

2506

unsigned int context;

2507

const re_token_t *node = dfa->nodes + node_idx;

2508

re_node_set *new_dest_nodes;

2509

2510

/* Check whether 'node' is a backreference or not. */

2511

if (node->type != OP_BACK_REF)

2512

continue;

2513

2514

if (node->constraint)

2515

{

2516

context = re_string_context_at (&mctx->input, cur_str_idx,

2517

mctx->eflags);

2518

if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))

2519

continue;

2520

}

2521

2522

/* 'node' is a backreference.

2523

Check the substring which the substring matched. */

2524

bkc_idx = mctx->nbkref_ents;

2525

err = get_subexp (mctx, node_idx, cur_str_idx);

2526

if (__glibc_unlikely (err != REG_NOERROR))

2527

goto free_return;

2528

2529

/* And add the epsilon closures (which is 'new_dest_nodes') of

2530

the backreference to appropriate state_log. */

2531

DEBUG_ASSERT (dfa->nexts[node_idx] != -1);

2532

for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)

2533

{

2534

Idx subexp_len;

2535

re_dfastate_t *dest_state;

2536

struct re_backref_cache_entry *bkref_ent;

2537

bkref_ent = mctx->bkref_ents + bkc_idx;

2538

if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)

2539

continue;

2540

subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;

2541

new_dest_nodes = (subexp_len == 0

2542

? dfa->eclosures + dfa->edests[node_idx].elems[0]

2543

: dfa->eclosures + dfa->nexts[node_idx]);

2544

dest_str_idx = (cur_str_idx + bkref_ent->subexp_to

2545

- bkref_ent->subexp_from);

2546

context = re_string_context_at (&mctx->input, dest_str_idx - 1,

2547

mctx->eflags);

2548

dest_state = mctx->state_log[dest_str_idx];

2549

prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0

2550

: mctx->state_log[cur_str_idx]->nodes.nelem);

2551

/* Add 'new_dest_node' to state_log. */

2552

if (dest_state == NULL)

2553

{

2554

mctx->state_log[dest_str_idx]

2555

= re_acquire_state_context (&err, dfa, new_dest_nodes,

2556

context);

2557

if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL

2558

&& err != REG_NOERROR))

2559

goto free_return;

2560

}

2561

else

2562

{

2563

re_node_set dest_nodes;

2564

err = re_node_set_init_union (&dest_nodes,

2565

dest_state->entrance_nodes,

2566

new_dest_nodes);

2567

if (__glibc_unlikely (err != REG_NOERROR))

2568

{

2569

re_node_set_free (&dest_nodes);

2570

goto free_return;

2571

}

2572

mctx->state_log[dest_str_idx]

2573

= re_acquire_state_context (&err, dfa, &dest_nodes, context);

2574

re_node_set_free (&dest_nodes);

2575

if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL

2576

&& err != REG_NOERROR))

2577

goto free_return;

2578

}

2579

/* We need to check recursively if the backreference can epsilon

2580

transit. */

2581

if (subexp_len == 0

2582

&& mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)

2583

{

2584

err = check_subexp_matching_top (mctx, new_dest_nodes,

2585

cur_str_idx);

2586

if (__glibc_unlikely (err != REG_NOERROR))

2587

goto free_return;

2588

err = transit_state_bkref (mctx, new_dest_nodes);

2589

if (__glibc_unlikely (err != REG_NOERROR))

2590

goto free_return;

2591

}

2592

}

2593

}

2594

err = REG_NOERROR;

2595

free_return:

2596

return err;

2597

}

2598

2599

/* Enumerate all the candidates which the backreference BKREF_NODE can match

2600

at BKREF_STR_IDX, and register them by match_ctx_add_entry().

2601

Note that we might collect inappropriate candidates here.

2602

However, the cost of checking them strictly here is too high, then we

2603

delay these checking for prune_impossible_nodes(). */

2604

2605

static reg_errcode_t

2606

__attribute_warn_unused_result__

2607

get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)

2608

{

2609

const re_dfa_t *const dfa = mctx->dfa;

2610

Idx subexp_num, sub_top_idx;

2611

const char *buf = (const char *) re_string_get_buffer (&mctx->input);

2612

/* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */

2613

Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);

2614

if (cache_idx != -1)

2615

{

2616

const struct re_backref_cache_entry *entry

2617

= mctx->bkref_ents + cache_idx;

2618

2619

if (entry->node == bkref_node)

2620

return REG_NOERROR; /* We already checked it. */

2621

while (entry++->more);

2622

}

2623

2624

subexp_num = dfa->nodes[bkref_node].opr.idx;

2625

2626

/* For each sub expression */

2627

for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)

2628

{

2629

reg_errcode_t err;

2630

re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];

2631

re_sub_match_last_t *sub_last;

2632

Idx sub_last_idx, sl_str, bkref_str_off;

2633

2634

if (dfa->nodes[sub_top->node].opr.idx != subexp_num)

2635

continue; /* It isn't related. */

2636

2637

sl_str = sub_top->str_idx;

2638

bkref_str_off = bkref_str_idx;

2639

/* At first, check the last node of sub expressions we already

2640

evaluated. */

2641

for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)

2642

{

2643

regoff_t sl_str_diff;

2644

sub_last = sub_top->lasts[sub_last_idx];

2645

sl_str_diff = sub_last->str_idx - sl_str;

2646

/* The matched string by the sub expression match with the substring

2647

at the back reference? */

2648

if (sl_str_diff > 0)

2649

{

2650

if (__glibc_unlikely (bkref_str_off + sl_str_diff

2651

> mctx->input.valid_len))

2652

{

2653

/* Not enough chars for a successful match. */

2654

if (bkref_str_off + sl_str_diff > mctx->input.len)

2655

break;

2656

2657

err = clean_state_log_if_needed (mctx,

2658

bkref_str_off

2659

+ sl_str_diff);

2660

if (__glibc_unlikely (err != REG_NOERROR))

2661

return err;

2662

buf = (const char *) re_string_get_buffer (&mctx->input);

2663

}

2664

if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)

2665

/* We don't need to search this sub expression any more. */

2666

break;

2667

}

2668

bkref_str_off += sl_str_diff;

2669

sl_str += sl_str_diff;

2670

err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,

2671

bkref_str_idx);

2672

2673

/* Reload buf, since the preceding call might have reallocated

2674

the buffer. */

2675

buf = (const char *) re_string_get_buffer (&mctx->input);

2676

2677

if (err == REG_NOMATCH)

2678

continue;

2679

if (__glibc_unlikely (err != REG_NOERROR))

2680

return err;

2681

}

2682

2683

if (sub_last_idx < sub_top->nlasts)

2684

continue;

2685

if (sub_last_idx > 0)

2686

++sl_str;

2687

/* Then, search for the other last nodes of the sub expression. */

2688

for (; sl_str <= bkref_str_idx; ++sl_str)

2689

{

2690

Idx cls_node;

2691

regoff_t sl_str_off;

2692

const re_node_set *nodes;

2693

sl_str_off = sl_str - sub_top->str_idx;

2694

/* The matched string by the sub expression match with the substring

2695

at the back reference? */

2696

if (sl_str_off > 0)

2697

{

2698

if (__glibc_unlikely (bkref_str_off >= mctx->input.valid_len))

2699

{

2700

/* If we are at the end of the input, we cannot match. */

2701

if (bkref_str_off >= mctx->input.len)

2702

break;

2703

2704

err = extend_buffers (mctx, bkref_str_off + 1);

2705

if (__glibc_unlikely (err != REG_NOERROR))

2706

return err;

2707

2708

buf = (const char *) re_string_get_buffer (&mctx->input);

2709

}

2710

if (buf [bkref_str_off++] != buf[sl_str - 1])

2711

break; /* We don't need to search this sub expression

2712

any more. */

2713

}

2714

if (mctx->state_log[sl_str] == NULL)

2715

continue;

2716

/* Does this state have a ')' of the sub expression? */

2717

nodes = &mctx->state_log[sl_str]->nodes;

2718

cls_node = find_subexp_node (dfa, nodes, subexp_num,

2719

OP_CLOSE_SUBEXP);

2720

if (cls_node == -1)

2721

continue; /* No. */

2722

if (sub_top->path == NULL)

2723

{

2724

sub_top->path = calloc (sizeof (state_array_t),

2725

sl_str - sub_top->str_idx + 1);

2726

if (sub_top->path == NULL)

2727

return REG_ESPACE;

2728

}

2729

/* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node

2730

in the current context? */

2731

err = check_arrival (mctx, sub_top->path, sub_top->node,

2732

sub_top->str_idx, cls_node, sl_str,

2733

OP_CLOSE_SUBEXP);

2734

if (err == REG_NOMATCH)

2735

continue;

2736

if (__glibc_unlikely (err != REG_NOERROR))

2737

return err;

2738

sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);

2739

if (__glibc_unlikely (sub_last == NULL))

2740

return REG_ESPACE;

2741

err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,

2742

bkref_str_idx);

2743

buf = (const char *) re_string_get_buffer (&mctx->input);

2744

if (err == REG_NOMATCH)

2745

continue;

2746

if (__glibc_unlikely (err != REG_NOERROR))

2747

return err;

2748

}

2749

}

2750

return REG_NOERROR;

2751

}

2752

2753

/* Helper functions for get_subexp(). */

2754

2755

/* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.

2756

If it can arrive, register the sub expression expressed with SUB_TOP

2757

and SUB_LAST. */

2758

2759

static reg_errcode_t

2760

get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,

2761

re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)

2762

{

2763

reg_errcode_t err;

2764

Idx to_idx;

2765

/* Can the subexpression arrive the back reference? */

2766

err = check_arrival (mctx, &sub_last->path, sub_last->node,

2767

sub_last->str_idx, bkref_node, bkref_str,

2768

OP_OPEN_SUBEXP);

2769

if (err != REG_NOERROR)

2770

return err;

2771

err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,

2772

sub_last->str_idx);

2773

if (__glibc_unlikely (err != REG_NOERROR))

2774

return err;

2775

to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;

2776

return clean_state_log_if_needed (mctx, to_idx);

2777

}

2778

2779

/* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.

2780

Search '(' if FL_OPEN, or search ')' otherwise.

2781

TODO: This function isn't efficient...

2782

Because there might be more than one nodes whose types are

2783

OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all

2784

nodes.

2785

E.g. RE: (a){2} */

2786

2787

static Idx

2788

find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,

2789

Idx subexp_idx, int type)

2790

{

2791

Idx cls_idx;

2792

for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)

2793

{

2794

Idx cls_node = nodes->elems[cls_idx];

2795

const re_token_t *node = dfa->nodes + cls_node;

2796

if (node->type == type

2797

&& node->opr.idx == subexp_idx)

2798

return cls_node;

2799

}

2800

return -1;

2801

}

2802

2803

/* Check whether the node TOP_NODE at TOP_STR can arrive to the node

2804

LAST_NODE at LAST_STR. We record the path onto PATH since it will be

2805

heavily reused.

2806

Return REG_NOERROR if it can arrive, REG_NOMATCH if it cannot,

2807

REG_ESPACE if memory is exhausted. */

2808

2809

static reg_errcode_t

2810

__attribute_warn_unused_result__

2811

check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,

2812

Idx top_str, Idx last_node, Idx last_str, int type)

2813

{

2814

const re_dfa_t *const dfa = mctx->dfa;

2815

reg_errcode_t err = REG_NOERROR;

2816

Idx subexp_num, backup_cur_idx, str_idx, null_cnt;

2817

re_dfastate_t *cur_state = NULL;

2818

re_node_set *cur_nodes, next_nodes;

2819

re_dfastate_t **backup_state_log;

2820

unsigned int context;

2821

2822

subexp_num = dfa->nodes[top_node].opr.idx;

2823

/* Extend the buffer if we need. */

2824

if (__glibc_unlikely (path->alloc < last_str + mctx->max_mb_elem_len + 1))

2825

{

2826

re_dfastate_t **new_array;

2827

Idx old_alloc = path->alloc;

2828

Idx incr_alloc = last_str + mctx->max_mb_elem_len + 1;

2829

Idx new_alloc;

2830

if (__glibc_unlikely (IDX_MAX - old_alloc < incr_alloc))

2831

return REG_ESPACE;

2832

new_alloc = old_alloc + incr_alloc;

2833

if (__glibc_unlikely (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc))

2834

return REG_ESPACE;

2835

new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);

2836

if (__glibc_unlikely (new_array == NULL))

2837

return REG_ESPACE;

2838

path->array = new_array;

2839

path->alloc = new_alloc;

2840

memset (new_array + old_alloc, '\0',

2841

sizeof (re_dfastate_t *) * (path->alloc - old_alloc));

2842

}

2843

2844

str_idx = path->next_idx ? path->next_idx : top_str;

2845

2846

/* Temporary modify MCTX. */

2847

backup_state_log = mctx->state_log;

2848

backup_cur_idx = mctx->input.cur_idx;

2849

mctx->state_log = path->array;

2850

mctx->input.cur_idx = str_idx;

2851

2852

/* Setup initial node set. */

2853

context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);

2854

if (str_idx == top_str)

2855

{

2856

err = re_node_set_init_1 (&next_nodes, top_node);

2857

if (__glibc_unlikely (err != REG_NOERROR))

2858

return err;

2859

err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);

2860

if (__glibc_unlikely (err != REG_NOERROR))

2861

{

2862

re_node_set_free (&next_nodes);

2863

return err;

2864

}

2865

}

2866

else

2867

{

2868

cur_state = mctx->state_log[str_idx];

2869

if (cur_state && cur_state->has_backref)

2870

{

2871

err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);

2872

if (__glibc_unlikely (err != REG_NOERROR))

2873

return err;

2874

}

2875

else

2876

re_node_set_init_empty (&next_nodes);

2877

}

2878

if (str_idx == top_str || (cur_state && cur_state->has_backref))

2879

{

2880

if (next_nodes.nelem)

2881

{

2882

err = expand_bkref_cache (mctx, &next_nodes, str_idx,

2883

subexp_num, type);

2884

if (__glibc_unlikely (err != REG_NOERROR))

2885

{

2886

re_node_set_free (&next_nodes);

2887

return err;

2888

}

2889

}

2890

cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);

2891

if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))

2892

{

2893

re_node_set_free (&next_nodes);

2894

return err;

2895

}

2896

mctx->state_log[str_idx] = cur_state;

2897

}

2898

2899

for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)

2900

{

2901

re_node_set_empty (&next_nodes);

2902

if (mctx->state_log[str_idx + 1])

2903

{

2904

err = re_node_set_merge (&next_nodes,

2905

&mctx->state_log[str_idx + 1]->nodes);

2906

if (__glibc_unlikely (err != REG_NOERROR))

2907

{

2908

re_node_set_free (&next_nodes);

2909

return err;

2910

}

2911

}

2912

if (cur_state)

2913

{

2914

err = check_arrival_add_next_nodes (mctx, str_idx,

2915

&cur_state->non_eps_nodes,

2916

&next_nodes);

2917

if (__glibc_unlikely (err != REG_NOERROR))

2918

{

2919

re_node_set_free (&next_nodes);

2920

return err;

2921

}

2922

}

2923

++str_idx;

2924

if (next_nodes.nelem)

2925

{

2926

err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);

2927

if (__glibc_unlikely (err != REG_NOERROR))

2928

{

2929

re_node_set_free (&next_nodes);

2930

return err;

2931

}

2932

err = expand_bkref_cache (mctx, &next_nodes, str_idx,

2933

subexp_num, type);

2934

if (__glibc_unlikely (err != REG_NOERROR))

2935

{

2936

re_node_set_free (&next_nodes);

2937

return err;

2938

}

2939

}

2940

context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);

2941

cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);

2942

if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))

2943

{

2944

re_node_set_free (&next_nodes);

2945

return err;

2946

}

2947

mctx->state_log[str_idx] = cur_state;

2948

null_cnt = cur_state == NULL ? null_cnt + 1 : 0;

2949

}

2950

re_node_set_free (&next_nodes);

2951

cur_nodes = (mctx->state_log[last_str] == NULL ? NULL

2952

: &mctx->state_log[last_str]->nodes);

2953

path->next_idx = str_idx;

2954

2955

/* Fix MCTX. */

2956

mctx->state_log = backup_state_log;

2957

mctx->input.cur_idx = backup_cur_idx;

2958

2959

/* Then check the current node set has the node LAST_NODE. */

2960

if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))

2961

return REG_NOERROR;

2962

2963

return REG_NOMATCH;

2964

}

2965

2966

/* Helper functions for check_arrival. */

2967

2968

/* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them

2969

to NEXT_NODES.

2970

TODO: This function is similar to the functions transit_state*(),

2971

however this function has many additional works.

2972

Can't we unify them? */

2973

2974

static reg_errcode_t

2975

__attribute_warn_unused_result__

2976

check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,

2977

re_node_set *cur_nodes, re_node_set *next_nodes)

2978

{

2979

const re_dfa_t *const dfa = mctx->dfa;

2980

bool ok;

2981

Idx cur_idx;

2982

reg_errcode_t err = REG_NOERROR;

2983

re_node_set union_set;

2984

re_node_set_init_empty (&union_set);

2985

for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)

2986

{

2987

int naccepted = 0;

2988

Idx cur_node = cur_nodes->elems[cur_idx];

2989

DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[cur_node].type));

2990

2991

/* If the node may accept "multi byte". */

2992

if (dfa->nodes[cur_node].accept_mb)

2993

{

2994

naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,

2995

str_idx);

2996

if (naccepted > 1)

2997

{

2998

re_dfastate_t *dest_state;

2999

Idx next_node = dfa->nexts[cur_node];

3000

Idx next_idx = str_idx + naccepted;

3001

dest_state = mctx->state_log[next_idx];

3002

re_node_set_empty (&union_set);

3003

if (dest_state)

3004

{

3005

err = re_node_set_merge (&union_set, &dest_state->nodes);

3006

if (__glibc_unlikely (err != REG_NOERROR))

3007

{

3008

re_node_set_free (&union_set);

3009

return err;

3010

}

3011

}

3012

ok = re_node_set_insert (&union_set, next_node);

3013

if (__glibc_unlikely (! ok))

3014

{

3015

re_node_set_free (&union_set);

3016

return REG_ESPACE;

3017

}

3018

mctx->state_log[next_idx] = re_acquire_state (&err, dfa,

3019

&union_set);

3020

if (__glibc_unlikely (mctx->state_log[next_idx] == NULL

3021

&& err != REG_NOERROR))

3022

{

3023

re_node_set_free (&union_set);

3024

return err;

3025

}

3026

}

3027

}

3028

3029

if (naccepted

3030

|| check_node_accept (mctx, dfa->nodes + cur_node, str_idx))

3031

{

3032

ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);

3033

if (__glibc_unlikely (! ok))

3034

{

3035

re_node_set_free (&union_set);

3036

return REG_ESPACE;

3037

}

3038

}

3039

}

3040

re_node_set_free (&union_set);

3041

return REG_NOERROR;

3042

}

3043

3044

/* For all the nodes in CUR_NODES, add the epsilon closures of them to

3045

CUR_NODES, however exclude the nodes which are:

3046

- inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.

3047

- out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.

3048

3049

3050

static reg_errcode_t

3051

check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,

3052

Idx ex_subexp, int type)

3053

{

3054

reg_errcode_t err;

3055

Idx idx, outside_node;

3056

re_node_set new_nodes;

3057

DEBUG_ASSERT (cur_nodes->nelem);

3058

err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);

3059

if (__glibc_unlikely (err != REG_NOERROR))

3060

return err;

3061

/* Create a new node set NEW_NODES with the nodes which are epsilon

3062

closures of the node in CUR_NODES. */

3063

3064

for (idx = 0; idx < cur_nodes->nelem; ++idx)

3065

{

3066

Idx cur_node = cur_nodes->elems[idx];

3067

const re_node_set *eclosure = dfa->eclosures + cur_node;

3068

outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);

3069

if (outside_node == -1)

3070

{

3071

/* There are no problematic nodes, just merge them. */

3072

err = re_node_set_merge (&new_nodes, eclosure);

3073

if (__glibc_unlikely (err != REG_NOERROR))

3074

{

3075

re_node_set_free (&new_nodes);

3076

return err;

3077

}

3078

}

3079

else

3080

{

3081

/* There are problematic nodes, re-calculate incrementally. */

3082

err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,

3083

ex_subexp, type);

3084

if (__glibc_unlikely (err != REG_NOERROR))

3085

{

3086

re_node_set_free (&new_nodes);

3087

return err;

3088

}

3089

}

3090

}

3091

re_node_set_free (cur_nodes);

3092

*cur_nodes = new_nodes;

3093

return REG_NOERROR;

3094

}

3095

3096

/* Helper function for check_arrival_expand_ecl.

3097

Check incrementally the epsilon closure of TARGET, and if it isn't

3098

problematic append it to DST_NODES. */

3099

3100

static reg_errcode_t

3101

__attribute_warn_unused_result__

3102

check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,

3103

Idx target, Idx ex_subexp, int type)

3104

{

3105

Idx cur_node;

3106

for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)

3107

{

3108

bool ok;

3109

3110

if (dfa->nodes[cur_node].type == type

3111

&& dfa->nodes[cur_node].opr.idx == ex_subexp)

3112

{

3113

if (type == OP_CLOSE_SUBEXP)

3114

{

3115

ok = re_node_set_insert (dst_nodes, cur_node);

3116

if (__glibc_unlikely (! ok))

3117

return REG_ESPACE;

3118

}

3119

break;

3120

}

3121

ok = re_node_set_insert (dst_nodes, cur_node);

3122

if (__glibc_unlikely (! ok))

3123

return REG_ESPACE;

3124

if (dfa->edests[cur_node].nelem == 0)

3125

break;

3126

if (dfa->edests[cur_node].nelem == 2)

3127

{

3128

reg_errcode_t err;

3129

err = check_arrival_expand_ecl_sub (dfa, dst_nodes,

3130

dfa->edests[cur_node].elems[1],

3131

ex_subexp, type);

3132

if (__glibc_unlikely (err != REG_NOERROR))

3133

return err;

3134

}

3135

cur_node = dfa->edests[cur_node].elems[0];

3136

}

3137

return REG_NOERROR;

3138

}

3139

3140

3141

/* For all the back references in the current state, calculate the

3142

destination of the back references by the appropriate entry

3143

in MCTX->BKREF_ENTS. */

3144

3145

static reg_errcode_t

3146

__attribute_warn_unused_result__

3147

expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,

3148

Idx cur_str, Idx subexp_num, int type)

3149

{

3150

const re_dfa_t *const dfa = mctx->dfa;

3151

reg_errcode_t err;

3152

Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);

3153

struct re_backref_cache_entry *ent;

3154

3155

if (cache_idx_start == -1)

3156

return REG_NOERROR;

3157

3158

restart:

3159

ent = mctx->bkref_ents + cache_idx_start;

3160

3161

{

3162

Idx to_idx, next_node;

3163

3164

/* Is this entry ENT is appropriate? */

3165

if (!re_node_set_contains (cur_nodes, ent->node))

3166

continue; /* No. */

3167

3168

to_idx = cur_str + ent->subexp_to - ent->subexp_from;

3169

/* Calculate the destination of the back reference, and append it

3170

to MCTX->STATE_LOG. */

3171

if (to_idx == cur_str)

3172

{

3173

/* The backreference did epsilon transit, we must re-check all the

3174

node in the current state. */

3175

re_node_set new_dests;

3176

reg_errcode_t err2, err3;

3177

next_node = dfa->edests[ent->node].elems[0];

3178

if (re_node_set_contains (cur_nodes, next_node))

3179

continue;

3180

err = re_node_set_init_1 (&new_dests, next_node);

3181

err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);

3182

err3 = re_node_set_merge (cur_nodes, &new_dests);

3183

re_node_set_free (&new_dests);

3184

if (__glibc_unlikely (err != REG_NOERROR || err2 != REG_NOERROR

3185

|| err3 != REG_NOERROR))

3186

{

3187

err = (err != REG_NOERROR ? err

3188

: (err2 != REG_NOERROR ? err2 : err3));

3189

return err;

3190

}

3191

/* TODO: It is still inefficient... */

3192

goto restart;

3193

}

3194

else

3195

{

3196

re_node_set union_set;

3197

next_node = dfa->nexts[ent->node];

3198

if (mctx->state_log[to_idx])

3199

{

3200

bool ok;

3201

if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,

3202

next_node))

3203

continue;

3204

err = re_node_set_init_copy (&union_set,

3205

&mctx->state_log[to_idx]->nodes);

3206

ok = re_node_set_insert (&union_set, next_node);

3207

if (__glibc_unlikely (err != REG_NOERROR || ! ok))

3208

{

3209

re_node_set_free (&union_set);

3210

err = err != REG_NOERROR ? err : REG_ESPACE;

3211

return err;

3212

}

3213

}

3214

else

3215

{

3216

err = re_node_set_init_1 (&union_set, next_node);

3217

if (__glibc_unlikely (err != REG_NOERROR))

3218

return err;

3219

}

3220

mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);

3221

re_node_set_free (&union_set);

3222

if (__glibc_unlikely (mctx->state_log[to_idx] == NULL

3223

&& err != REG_NOERROR))

3224

return err;

3225

}

3226

}

3227

while (ent++->more);

3228

return REG_NOERROR;

3229

}

3230

3231

/* Build transition table for the state.

3232

Return true if successful. */

3233

3234

static bool __attribute_noinline__

3235

build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)

3236

{

3237

reg_errcode_t err;

3238

Idx i, j;

3239

int ch;

3240

bool need_word_trtable = false;

3241

bitset_word_t elem, mask;

3242

Idx ndests; /* Number of the destination states from 'state'. */

3243

re_dfastate_t **trtable;

3244

re_dfastate_t *dest_states[SBC_MAX];

3245

re_dfastate_t *dest_states_word[SBC_MAX];

3246

re_dfastate_t *dest_states_nl[SBC_MAX];

3247

re_node_set follows;

3248

bitset_t acceptable;

3249

3250

/* We build DFA states which corresponds to the destination nodes

3251

from 'state'. 'dests_node[i]' represents the nodes which i-th

3252

destination state contains, and 'dests_ch[i]' represents the

3253

characters which i-th destination state accepts. */

3254

re_node_set dests_node[SBC_MAX];

3255

bitset_t dests_ch[SBC_MAX];

3256

3257

/* Initialize transition table. */

3258

state->word_trtable = state->trtable = NULL;

3259

3260

/* At first, group all nodes belonging to 'state' into several

3261

destinations. */

3262

ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);

3263

if (__glibc_unlikely (ndests <= 0))

3264

{

3265

/* Return false in case of an error, true otherwise. */

3266

if (ndests == 0)

3267

{

3268

state->trtable = (re_dfastate_t **)

3269

calloc (sizeof (re_dfastate_t *), SBC_MAX);

3270

if (__glibc_unlikely (state->trtable == NULL))

3271

return false;

3272

return true;

3273

}

3274

return false;

3275

}

3276

3277

err = re_node_set_alloc (&follows, ndests + 1);

3278

if (__glibc_unlikely (err != REG_NOERROR))

3279

{

3280

out_free:

3281

re_node_set_free (&follows);

3282

for (i = 0; i < ndests; ++i)

3283

re_node_set_free (dests_node + i);

3284

return false;

3285

}

3286

3287

bitset_empty (acceptable);

3288

3289

/* Then build the states for all destinations. */

3290

for (i = 0; i < ndests; ++i)

3291

{

3292

Idx next_node;

3293

re_node_set_empty (&follows);

3294

/* Merge the follows of this destination states. */

3295

for (j = 0; j < dests_node[i].nelem; ++j)

3296

{

3297

next_node = dfa->nexts[dests_node[i].elems[j]];

3298

if (next_node != -1)

3299

{

3300

err = re_node_set_merge (&follows, dfa->eclosures + next_node);

3301

if (__glibc_unlikely (err != REG_NOERROR))

3302

goto out_free;

3303

}

3304

}

3305

dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);

3306

if (__glibc_unlikely (dest_states[i] == NULL && err != REG_NOERROR))

3307

goto out_free;

3308

/* If the new state has context constraint,

3309

build appropriate states for these contexts. */

3310

if (dest_states[i]->has_constraint)

3311

{

3312

dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,

3313

CONTEXT_WORD);

3314

if (__glibc_unlikely (dest_states_word[i] == NULL

3315

&& err != REG_NOERROR))

3316

goto out_free;

3317

3318

if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)

3319

need_word_trtable = true;

3320

3321

dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,

3322

CONTEXT_NEWLINE);

3323

if (__glibc_unlikely (dest_states_nl[i] == NULL && err != REG_NOERROR))

3324

goto out_free;

3325

}

3326

else

3327

{

3328

dest_states_word[i] = dest_states[i];

3329

dest_states_nl[i] = dest_states[i];

3330

}

3331

bitset_merge (acceptable, dests_ch[i]);

3332

}

3333

3334

if (!__glibc_unlikely (need_word_trtable))

3335

{

3336

/* We don't care about whether the following character is a word

3337

character, or we are in a single-byte character set so we can

3338

discern by looking at the character code: allocate a

3339

256-entry transition table. */

3340

trtable = state->trtable =

3341

(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);

3342

if (__glibc_unlikely (trtable == NULL))

3343

goto out_free;

3344

3345

/* For all characters ch...: */

3346

for (i = 0; i < BITSET_WORDS; ++i)

3347

for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;

3348

elem;

3349

mask <<= 1, elem >>= 1, ++ch)

3350

if (__glibc_unlikely (elem & 1))

3351

{

3352

/* There must be exactly one destination which accepts

3353

character ch. See group_nodes_into_DFAstates. */

3354

for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)

3355

;

3356

3357

/* j-th destination accepts the word character ch. */

3358

if (dfa->word_char[i] & mask)

3359

trtable[ch] = dest_states_word[j];

3360

else

3361

trtable[ch] = dest_states[j];

3362

}

3363

}

3364

else

3365

{

3366

/* We care about whether the following character is a word

3367

character, and we are in a multi-byte character set: discern

3368

by looking at the character code: build two 256-entry

3369

transition tables, one starting at trtable[0] and one

3370

starting at trtable[SBC_MAX]. */

3371

trtable = state->word_trtable =

3372

(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);

3373

if (__glibc_unlikely (trtable == NULL))

3374

goto out_free;

3375

3376

/* For all characters ch...: */

3377

for (i = 0; i < BITSET_WORDS; ++i)

3378

for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;

3379

elem;

3380

mask <<= 1, elem >>= 1, ++ch)

3381

if (__glibc_unlikely (elem & 1))

3382

{

3383

/* There must be exactly one destination which accepts

3384

character ch. See group_nodes_into_DFAstates. */

3385

for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)

3386

;

3387

3388

/* j-th destination accepts the word character ch. */

3389

trtable[ch] = dest_states[j];

3390

trtable[ch + SBC_MAX] = dest_states_word[j];

3391

}

3392

}

3393

3394

/* new line */

3395

if (bitset_contain (acceptable, NEWLINE_CHAR))

3396

{

3397

/* The current state accepts newline character. */

3398

for (j = 0; j < ndests; ++j)

3399

if (bitset_contain (dests_ch[j], NEWLINE_CHAR))

3400

{

3401

/* k-th destination accepts newline character. */

3402

trtable[NEWLINE_CHAR] = dest_states_nl[j];

3403

if (need_word_trtable)

3404

trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];

3405

/* There must be only one destination which accepts

3406

newline. See group_nodes_into_DFAstates. */

3407

break;

3408

}

3409

}

3410

3411

re_node_set_free (&follows);

3412

for (i = 0; i < ndests; ++i)

3413

re_node_set_free (dests_node + i);

3414

return true;

3415

}

3416

3417

/* Group all nodes belonging to STATE into several destinations.

3418

Then for all destinations, set the nodes belonging to the destination

3419

to DESTS_NODE[i] and set the characters accepted by the destination

3420

to DEST_CH[i]. Return the number of destinations if successful,

3421

-1 on internal error. */

3422

3423

static Idx

3424

group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,

3425

re_node_set *dests_node, bitset_t *dests_ch)

3426

{

3427

reg_errcode_t err;

3428

bool ok;

3429

Idx i, j, k;

3430

Idx ndests; /* Number of the destinations from 'state'. */

3431

bitset_t accepts; /* Characters a node can accept. */

3432

const re_node_set *cur_nodes = &state->nodes;

3433

bitset_empty (accepts);

3434

ndests = 0;

3435

3436

/* For all the nodes belonging to 'state', */

3437

for (i = 0; i < cur_nodes->nelem; ++i)

3438

{

3439

re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];

3440

re_token_type_t type = node->type;

3441

unsigned int constraint = node->constraint;

3442

3443

/* Enumerate all single byte character this node can accept. */

3444

if (type == CHARACTER)

3445

bitset_set (accepts, node->opr.c);

3446

else if (type == SIMPLE_BRACKET)

3447

{

3448

bitset_merge (accepts, node->opr.sbcset);

3449

}

3450

else if (type == OP_PERIOD)

3451

{

3452

if (dfa->mb_cur_max > 1)

3453

bitset_merge (accepts, dfa->sb_char);

3454

else

3455

bitset_set_all (accepts);

3456

if (!(dfa->syntax & RE_DOT_NEWLINE))

3457

bitset_clear (accepts, '\n');

3458

if (dfa->syntax & RE_DOT_NOT_NULL)

3459

bitset_clear (accepts, '\0');

3460

}

3461

else if (type == OP_UTF8_PERIOD)

3462

{

3463

if (ASCII_CHARS % BITSET_WORD_BITS == 0)

3464

memset (accepts, -1, ASCII_CHARS / CHAR_BIT);

3465

else

3466

bitset_merge (accepts, utf8_sb_map);

3467

if (!(dfa->syntax & RE_DOT_NEWLINE))

3468

bitset_clear (accepts, '\n');

3469

if (dfa->syntax & RE_DOT_NOT_NULL)

3470

bitset_clear (accepts, '\0');

3471

}

3472

else

3473

continue;

3474

3475

/* Check the 'accepts' and sift the characters which are not

3476

match it the context. */

3477

if (constraint)

3478

{

3479

if (constraint & NEXT_NEWLINE_CONSTRAINT)

3480

{

3481

bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);

3482

bitset_empty (accepts);

3483

if (accepts_newline)

3484

bitset_set (accepts, NEWLINE_CHAR);

3485

else

3486

continue;

3487

}

3488

if (constraint & NEXT_ENDBUF_CONSTRAINT)

3489

{

3490

bitset_empty (accepts);

3491

continue;

3492

}

3493

3494

if (constraint & NEXT_WORD_CONSTRAINT)

3495

{

3496

bitset_word_t any_set = 0;

3497

if (type == CHARACTER && !node->word_char)

3498

{

3499

bitset_empty (accepts);

3500

continue;

3501

}

3502

if (dfa->mb_cur_max > 1)

3503

for (j = 0; j < BITSET_WORDS; ++j)

3504

any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));

3505

else

3506

for (j = 0; j < BITSET_WORDS; ++j)

3507

any_set |= (accepts[j] &= dfa->word_char[j]);

3508

if (!any_set)

3509

continue;

3510

}

3511

if (constraint & NEXT_NOTWORD_CONSTRAINT)

3512

{

3513

bitset_word_t any_set = 0;

3514

if (type == CHARACTER && node->word_char)

3515

{

3516

bitset_empty (accepts);

3517

continue;

3518

}

3519

if (dfa->mb_cur_max > 1)

3520

for (j = 0; j < BITSET_WORDS; ++j)

3521

any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));

3522

else

3523

for (j = 0; j < BITSET_WORDS; ++j)

3524

any_set |= (accepts[j] &= ~dfa->word_char[j]);

3525

if (!any_set)

3526

continue;

3527

}

3528

}

3529

3530

/* Then divide 'accepts' into DFA states, or create a new

3531

state. Above, we make sure that accepts is not empty. */

3532

for (j = 0; j < ndests; ++j)

3533

{

3534

bitset_t intersec; /* Intersection sets, see below. */

3535

bitset_t remains;

3536

/* Flags, see below. */

3537

bitset_word_t has_intersec, not_subset, not_consumed;

3538

3539

/* Optimization, skip if this state doesn't accept the character. */

3540

if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))

3541

continue;

3542

3543

/* Enumerate the intersection set of this state and 'accepts'. */

3544

has_intersec = 0;

3545

for (k = 0; k < BITSET_WORDS; ++k)

3546

has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];

3547

/* And skip if the intersection set is empty. */

3548

if (!has_intersec)

3549

continue;

3550

3551

/* Then check if this state is a subset of 'accepts'. */

3552

not_subset = not_consumed = 0;

3553

for (k = 0; k < BITSET_WORDS; ++k)

3554

{

3555

not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];

3556

not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];

3557

}

3558

3559

/* If this state isn't a subset of 'accepts', create a

3560

new group state, which has the 'remains'. */

3561

if (not_subset)

3562

{

3563

bitset_copy (dests_ch[ndests], remains);

3564

bitset_copy (dests_ch[j], intersec);

3565

err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);

3566

if (__glibc_unlikely (err != REG_NOERROR))

3567

goto error_return;

3568

++ndests;

3569

}

3570

3571

/* Put the position in the current group. */

3572

ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);

3573

if (__glibc_unlikely (! ok))

3574

goto error_return;

3575

3576

/* If all characters are consumed, go to next node. */

3577

if (!not_consumed)

3578

break;

3579

}

3580

/* Some characters remain, create a new group. */

3581

if (j == ndests)

3582

{

3583

bitset_copy (dests_ch[ndests], accepts);

3584

err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);

3585

if (__glibc_unlikely (err != REG_NOERROR))

3586

goto error_return;

3587

++ndests;

3588

bitset_empty (accepts);

3589

}

3590

}

3591

assume (ndests <= SBC_MAX);

3592

return ndests;

3593

error_return:

3594

for (j = 0; j < ndests; ++j)

3595

re_node_set_free (dests_node + j);

3596

return -1;

3597

}

3598

3599

/* Check how many bytes the node 'dfa->nodes[node_idx]' accepts.

3600

Return the number of the bytes the node accepts.

3601

STR_IDX is the current index of the input string.

3602

3603

This function handles the nodes which can accept one character, or

3604

one collating element like '.', '[a-z]', opposite to the other nodes

3605

can only accept one byte. */

3606

3607

#ifdef _LIBC

3608

# include <locale/weight.h>

3609

#endif

3610

3611

static int

3612

check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,

3613

const re_string_t *input, Idx str_idx)

3614

{

3615

const re_token_t *node = dfa->nodes + node_idx;

3616

int char_len, elem_len;

3617

Idx i;

3618

3619

if (__glibc_unlikely (node->type == OP_UTF8_PERIOD))

3620

{

3621

unsigned char c = re_string_byte_at (input, str_idx), d;

3622

if (__glibc_likely (c < 0xc2))

3623

return 0;

3624

3625

if (str_idx + 2 > input->len)

3626

return 0;

3627

3628

d = re_string_byte_at (input, str_idx + 1);

3629

if (c < 0xe0)

3630

return (d < 0x80 || d > 0xbf) ? 0 : 2;

3631

else if (c < 0xf0)

3632

{

3633

char_len = 3;

3634

if (c == 0xe0 && d < 0xa0)

3635

return 0;

3636

}

3637

else if (c < 0xf8)

3638

{

3639

char_len = 4;

3640

if (c == 0xf0 && d < 0x90)

3641

return 0;

3642

}

3643

else if (c < 0xfc)

3644

{

3645

char_len = 5;

3646

if (c == 0xf8 && d < 0x88)

3647

return 0;

3648

}

3649

else if (c < 0xfe)

3650

{

3651

char_len = 6;

3652

if (c == 0xfc && d < 0x84)

3653

return 0;

3654

}

3655

else

3656

return 0;

3657

3658

if (str_idx + char_len > input->len)

3659

return 0;

3660

3661

for (i = 1; i < char_len; ++i)

3662

{

3663

d = re_string_byte_at (input, str_idx + i);

3664

if (d < 0x80 || d > 0xbf)

3665

return 0;

3666

}

3667

return char_len;

3668

}

3669

3670

char_len = re_string_char_size_at (input, str_idx);

3671

if (node->type == OP_PERIOD)

3672

{

3673

if (char_len <= 1)

3674

return 0;

3675

/* FIXME: I don't think this if is needed, as both '\n'

3676

and '\0' are char_len == 1. */

3677

/* '.' accepts any one character except the following two cases. */

3678

if ((!(dfa->syntax & RE_DOT_NEWLINE)

3679

&& re_string_byte_at (input, str_idx) == '\n')

3680

|| ((dfa->syntax & RE_DOT_NOT_NULL)

3681

&& re_string_byte_at (input, str_idx) == '\0'))

3682

return 0;

3683

return char_len;

3684

}

3685

3686

elem_len = re_string_elem_size_at (input, str_idx);

3687

if ((elem_len <= 1 && char_len <= 1) || char_len == 0)

3688

return 0;

3689

3690

if (node->type == COMPLEX_BRACKET)

3691

{

3692

const re_charset_t *cset = node->opr.mbcset;

3693

#ifdef _LIBC

3694

const unsigned char *pin

3695

= ((const unsigned char *) re_string_get_buffer (input) + str_idx);

3696

Idx j;

3697

uint32_t nrules;

3698

#endif

3699

int match_len = 0;

3700

wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)

3701

? re_string_wchar_at (input, str_idx) : 0);

3702

3703

/* match with multibyte character? */

3704

for (i = 0; i < cset->nmbchars; ++i)

3705

if (wc == cset->mbchars[i])

3706

{

3707

match_len = char_len;

3708

goto check_node_accept_bytes_match;

3709

}

3710

/* match with character_class? */

3711

for (i = 0; i < cset->nchar_classes; ++i)

3712

{

3713

wctype_t wt = cset->char_classes[i];

3714

if (__iswctype (wc, wt))

3715

{

3716

match_len = char_len;

3717

goto check_node_accept_bytes_match;

3718

}

3719

}

3720

3721

#ifdef _LIBC

3722

nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);

3723

if (nrules != 0)

3724

{

3725

unsigned int in_collseq = 0;

3726

const int32_t *table, *indirect;

3727

const unsigned char *weights, *extra;

3728

const char *collseqwc;

3729

3730

/* match with collating_symbol? */

3731

if (cset->ncoll_syms)

3732

extra = (const unsigned char *)

3733

_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);

3734

for (i = 0; i < cset->ncoll_syms; ++i)

3735

{

3736

const unsigned char *coll_sym = extra + cset->coll_syms[i];

3737

/* Compare the length of input collating element and

3738

the length of current collating element. */

3739

if (*coll_sym != elem_len)

3740

continue;

3741

/* Compare each bytes. */

3742

for (j = 0; j < *coll_sym; j++)

3743

if (pin[j] != coll_sym[1 + j])

3744

break;

3745

if (j == *coll_sym)

3746

{

3747

/* Match if every bytes is equal. */

3748

match_len = j;

3749

goto check_node_accept_bytes_match;

3750

}

3751

}

3752

3753

if (cset->nranges)

3754

{

3755

if (elem_len <= char_len)

3756

{

3757

collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);

3758

in_collseq = __collseq_table_lookup (collseqwc, wc);

3759

}

3760

else

3761

in_collseq = find_collation_sequence_value (pin, elem_len);

3762

}

3763

/* match with range expression? */

3764

/* FIXME: Implement rational ranges here, too. */

3765

for (i = 0; i < cset->nranges; ++i)

3766

if (cset->range_starts[i] <= in_collseq

3767

&& in_collseq <= cset->range_ends[i])

3768

{

3769

match_len = elem_len;

3770

goto check_node_accept_bytes_match;

3771

}

3772

3773

/* match with equivalence_class? */

3774

if (cset->nequiv_classes)

3775

{

3776

const unsigned char *cp = pin;

3777

table = (const int32_t *)

3778

_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);

3779

weights = (const unsigned char *)

3780

_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);

3781

extra = (const unsigned char *)

3782

_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);

3783

indirect = (const int32_t *)

3784

_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);

3785

int32_t idx = findidx (table, indirect, extra, &cp, elem_len);

3786

int32_t rule = idx >> 24;

3787

idx &= 0xffffff;

3788

if (idx > 0)

3789

{

3790

size_t weight_len = weights[idx];

3791

for (i = 0; i < cset->nequiv_classes; ++i)

3792

{

3793

int32_t equiv_class_idx = cset->equiv_classes[i];

3794

int32_t equiv_class_rule = equiv_class_idx >> 24;

3795

equiv_class_idx &= 0xffffff;

3796

if (weights[equiv_class_idx] == weight_len

3797

&& equiv_class_rule == rule

3798

&& memcmp (weights + idx + 1,

3799

weights + equiv_class_idx + 1,

3800

weight_len) == 0)

3801

{

3802

match_len = elem_len;

3803

goto check_node_accept_bytes_match;

3804

}

3805

}

3806

}

3807

}

3808

}

3809

else

3810

#endif /* _LIBC */

3811

{

3812

/* match with range expression? */

3813

for (i = 0; i < cset->nranges; ++i)

3814

{

3815

if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i])

3816

{

3817

match_len = char_len;

3818

goto check_node_accept_bytes_match;

3819

}

3820

}

3821

}

3822

check_node_accept_bytes_match:

3823

if (!cset->non_match)

3824

return match_len;

3825

else

3826

{

3827

if (match_len > 0)

3828

return 0;

3829

else

3830

return (elem_len > char_len) ? elem_len : char_len;

3831

}

3832

}

3833

return 0;

3834

}

3835

3836

#ifdef _LIBC

3837

static unsigned int

3838

find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)

3839

{

3840

uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);

3841

if (nrules == 0)

3842

{

3843

if (mbs_len == 1)

3844

{

3845

/* No valid character. Match it as a single byte character. */

3846

const unsigned char *collseq = (const unsigned char *)

3847

_NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);

3848

return collseq[mbs[0]];

3849

}

3850

return UINT_MAX;

3851

}

3852

else

3853

{

3854

int32_t idx;

3855

const unsigned char *extra = (const unsigned char *)

3856

_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);

3857

int32_t extrasize = (const unsigned char *)

3858

_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;

3859

3860

for (idx = 0; idx < extrasize;)

3861

{

3862

int mbs_cnt;

3863

bool found = false;

3864

int32_t elem_mbs_len;

3865

/* Skip the name of collating element name. */

3866

idx = idx + extra[idx] + 1;

3867

elem_mbs_len = extra[idx++];

3868

if (mbs_len == elem_mbs_len)

3869

{

3870

for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)

3871

if (extra[idx + mbs_cnt] != mbs[mbs_cnt])

3872

break;

3873

if (mbs_cnt == elem_mbs_len)

3874

/* Found the entry. */

3875

found = true;

3876

}

3877

/* Skip the byte sequence of the collating element. */

3878

idx += elem_mbs_len;

3879

/* Adjust for the alignment. */

3880

idx = (idx + 3) & ~3;

3881

/* Skip the collation sequence value. */

3882

idx += sizeof (uint32_t);

3883

/* Skip the wide char sequence of the collating element. */

3884

idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1);

3885

/* If we found the entry, return the sequence value. */

3886

if (found)

3887

return *(uint32_t *) (extra + idx);

3888

/* Skip the collation sequence value. */

3889

idx += sizeof (uint32_t);

3890

}

3891

return UINT_MAX;

3892

}

3893

}

3894

#endif /* _LIBC */

3895

3896

/* Check whether the node accepts the byte which is IDX-th

3897

byte of the INPUT. */

3898

3899

static bool

3900

check_node_accept (const re_match_context_t *mctx, const re_token_t *node,

3901

Idx idx)

3902

{

3903

unsigned char ch;

3904

ch = re_string_byte_at (&mctx->input, idx);

3905

switch (node->type)

3906

{

3907

case CHARACTER:

3908

if (node->opr.c != ch)

3909

return false;

3910

break;

3911

3912

case SIMPLE_BRACKET:

3913

if (!bitset_contain (node->opr.sbcset, ch))

3914

return false;

3915

break;

3916

3917

case OP_UTF8_PERIOD:

3918

if (ch >= ASCII_CHARS)

3919

return false;

3920

FALLTHROUGH;

3921

case OP_PERIOD:

3922

if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))

3923

|| (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))

3924

return false;

3925

break;

3926

3927

default:

3928

return false;

3929

}

3930

3931

if (node->constraint)

3932

{

3933

/* The node has constraints. Check whether the current context

3934

satisfies the constraints. */

3935

unsigned int context = re_string_context_at (&mctx->input, idx,

3936

mctx->eflags);

3937

if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))

3938

return false;

3939

}

3940

3941

return true;

3942

}

3943

3944

/* Extend the buffers, if the buffers have run out. */

3945

3946

static reg_errcode_t

3947

__attribute_warn_unused_result__

3948

extend_buffers (re_match_context_t *mctx, int min_len)

3949

{

3950

reg_errcode_t ret;

3951

re_string_t *pstr = &mctx->input;

3952

3953

/* Avoid overflow. */

3954

if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / 2

3955

<= pstr->bufs_len))

3956

return REG_ESPACE;

3957

3958

/* Double the lengths of the buffers, but allocate at least MIN_LEN. */

3959

ret = re_string_realloc_buffers (pstr,

3960

MAX (min_len,

3961

MIN (pstr->len, pstr->bufs_len * 2)));

3962

if (__glibc_unlikely (ret != REG_NOERROR))

3963

return ret;

3964

3965

if (mctx->state_log != NULL)

3966

{

3967

/* And double the length of state_log. */

3968

/* XXX We have no indication of the size of this buffer. If this

3969

allocation fail we have no indication that the state_log array

3970

does not have the right size. */

3971

re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,

3972

pstr->bufs_len + 1);

3973

if (__glibc_unlikely (new_array == NULL))

3974

return REG_ESPACE;

3975

mctx->state_log = new_array;

3976

}

3977

3978

/* Then reconstruct the buffers. */

3979

if (pstr->icase)

3980

{

3981

if (pstr->mb_cur_max > 1)

3982

{

3983

ret = build_wcs_upper_buffer (pstr);

3984

if (__glibc_unlikely (ret != REG_NOERROR))

3985

return ret;

3986

}

3987

else

3988

build_upper_buffer (pstr);

3989

}

3990

else

3991

{

3992

if (pstr->mb_cur_max > 1)

3993

build_wcs_buffer (pstr);

3994

else

3995

{

3996

if (pstr->trans != NULL)

3997

re_string_translate_buffer (pstr);

3998

}

3999

}

4000

return REG_NOERROR;

4001

}

4002

4003

4004

/* Functions for matching context. */

4005

4006

/* Initialize MCTX. */

4007

4008

static reg_errcode_t

4009

__attribute_warn_unused_result__

4010

match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)

4011

{

4012

mctx->eflags = eflags;

4013

mctx->match_last = -1;

4014

if (n > 0)

4015

{

4016

/* Avoid overflow. */

4017

size_t max_object_size =

4018

MAX (sizeof (struct re_backref_cache_entry),

4019

sizeof (re_sub_match_top_t *));

4020

if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n))

4021

return REG_ESPACE;

4022

4023

mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);

4024

mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);

4025

if (__glibc_unlikely (mctx->bkref_ents == NULL || mctx->sub_tops == NULL))

4026

return REG_ESPACE;

4027

}

4028

/* Already zero-ed by the caller.

4029

else

4030

mctx->bkref_ents = NULL;

4031

mctx->nbkref_ents = 0;

4032

mctx->nsub_tops = 0; */

4033

mctx->abkref_ents = n;

4034

mctx->max_mb_elem_len = 1;

4035

mctx->asub_tops = n;

4036

return REG_NOERROR;

4037

}

4038

4039

/* Clean the entries which depend on the current input in MCTX.

4040

This function must be invoked when the matcher changes the start index

4041

of the input, or changes the input string. */

4042

4043

static void

4044

match_ctx_clean (re_match_context_t *mctx)

4045

{

4046

Idx st_idx;

4047

for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)

4048

{

4049

Idx sl_idx;

4050

re_sub_match_top_t *top = mctx->sub_tops[st_idx];

4051

for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)

4052

{

4053

re_sub_match_last_t *last = top->lasts[sl_idx];

4054

re_free (last->path.array);

4055

re_free (last);

4056

}

4057

re_free (top->lasts);

4058

if (top->path)

4059

{

4060

re_free (top->path->array);

4061

re_free (top->path);

4062

}

4063

re_free (top);

4064

}

4065

4066

mctx->nsub_tops = 0;

4067

mctx->nbkref_ents = 0;

4068

}

4069

4070

/* Free all the memory associated with MCTX. */

4071

4072

static void

4073

match_ctx_free (re_match_context_t *mctx)

4074

{

4075

/* First, free all the memory associated with MCTX->SUB_TOPS. */

4076

match_ctx_clean (mctx);

4077

re_free (mctx->sub_tops);

4078

re_free (mctx->bkref_ents);

4079

}

4080

4081

/* Add a new backreference entry to MCTX.

4082

Note that we assume that caller never call this function with duplicate

4083

entry, and call with STR_IDX which isn't smaller than any existing entry.

4084

4085

4086

static reg_errcode_t

4087

__attribute_warn_unused_result__

4088

match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,

4089

Idx to)

4090

{

4091

if (mctx->nbkref_ents >= mctx->abkref_ents)

4092

{

4093

struct re_backref_cache_entry* new_entry;

4094

new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,

4095

mctx->abkref_ents * 2);

4096

if (__glibc_unlikely (new_entry == NULL))

4097

{

4098

re_free (mctx->bkref_ents);

4099

return REG_ESPACE;

4100

}

4101

mctx->bkref_ents = new_entry;

4102

memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',

4103

sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);

4104

mctx->abkref_ents *= 2;

4105

}

4106

if (mctx->nbkref_ents > 0

4107

&& mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)

4108

mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;

4109

4110

mctx->bkref_ents[mctx->nbkref_ents].node = node;

4111

mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;

4112

mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;

4113

mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;

4114

4115

/* This is a cache that saves negative results of check_dst_limits_calc_pos.

4116

If bit N is clear, means that this entry won't epsilon-transition to

4117

an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If

4118

it is set, check_dst_limits_calc_pos_1 will recurse and try to find one

4119

such node.

4120

4121

A backreference does not epsilon-transition unless it is empty, so set

4122

to all zeros if FROM != TO. */

4123

mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map

4124

= (from == to ? -1 : 0);

4125

4126

mctx->bkref_ents[mctx->nbkref_ents++].more = 0;

4127

if (mctx->max_mb_elem_len < to - from)

4128

mctx->max_mb_elem_len = to - from;

4129

return REG_NOERROR;

4130

}

4131

4132

/* Return the first entry with the same str_idx, or -1 if none is

4133

found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */

4134

4135

static Idx

4136

search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)

4137

{

4138

Idx left, right, mid, last;

4139

last = right = mctx->nbkref_ents;

4140

for (left = 0; left < right;)

4141

{

4142

mid = (left + right) / 2;

4143

if (mctx->bkref_ents[mid].str_idx < str_idx)

4144

left = mid + 1;

4145

else

4146

right = mid;

4147

}

4148

if (left < last && mctx->bkref_ents[left].str_idx == str_idx)

4149

return left;

4150

else

4151

return -1;

4152

}

4153

4154

/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches

4155

at STR_IDX. */

4156

4157

static reg_errcode_t

4158

__attribute_warn_unused_result__

4159

match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)

4160

{

4161

DEBUG_ASSERT (mctx->sub_tops != NULL);

4162

DEBUG_ASSERT (mctx->asub_tops > 0);

4163

if (__glibc_unlikely (mctx->nsub_tops == mctx->asub_tops))

4164

{

4165

Idx new_asub_tops = mctx->asub_tops * 2;

4166

re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,

4167

re_sub_match_top_t *,

4168

new_asub_tops);

4169

if (__glibc_unlikely (new_array == NULL))

4170

return REG_ESPACE;

4171

mctx->sub_tops = new_array;

4172

mctx->asub_tops = new_asub_tops;

4173

}

4174

mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));

4175

if (__glibc_unlikely (mctx->sub_tops[mctx->nsub_tops] == NULL))

4176

return REG_ESPACE;

4177

mctx->sub_tops[mctx->nsub_tops]->node = node;

4178

mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;

4179

return REG_NOERROR;

4180

}

4181

4182

/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches

4183

at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.

4184

Return the new entry if successful, NULL if memory is exhausted. */

4185

4186

static re_sub_match_last_t *

4187

match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)

4188

{

4189

re_sub_match_last_t *new_entry;

4190

if (__glibc_unlikely (subtop->nlasts == subtop->alasts))

4191

{

4192

Idx new_alasts = 2 * subtop->alasts + 1;

4193

re_sub_match_last_t **new_array = re_realloc (subtop->lasts,

4194

re_sub_match_last_t *,

4195

new_alasts);

4196

if (__glibc_unlikely (new_array == NULL))

4197

return NULL;

4198

subtop->lasts = new_array;

4199

subtop->alasts = new_alasts;

4200

}

4201

new_entry = calloc (1, sizeof (re_sub_match_last_t));

4202

if (__glibc_likely (new_entry != NULL))

4203

{

4204

subtop->lasts[subtop->nlasts] = new_entry;

4205

new_entry->node = node;

4206

new_entry->str_idx = str_idx;

4207

++subtop->nlasts;

4208

}

4209

return new_entry;

4210

}

4211

4212

static void

4213

sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,

4214

re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)

4215

{

4216

sctx->sifted_states = sifted_sts;

4217

sctx->limited_states = limited_sts;

4218

sctx->last_node = last_node;

4219

sctx->last_str_idx = last_str_idx;

4220

re_node_set_init_empty (&sctx->limits);

4221

}

Older »