~ubuntu-branches/ubuntu/feisty/apache2/feisty

Committer: Bazaar Package Importer
Author(s): Andreas Barth
Date: 2006-12-09 21:05:45 UTC
mfrom: (0.6.1 upstream)
Revision ID: james.westby@ubuntu.com-20061209210545-h70s0xaqc2v8vqr2

Tags: 2.2.3-3.2

http://bugs.debian.org/396265

* Non-maintainer upload.
* 043_ajp_connection_reuse: Patch from upstream Bugzilla, fixing a critical
issue with regard to connection reuse in mod_proxy_ajp.
Closes: #396265

files added:
.gdbinit

ABOUT_APACHE

Apache.dsw

BuildBin.dsp

CHANGES

INSTALL

InstallBin.dsp

LAYOUT

LICENSE

Makefile.in

Makefile.win

NOTICE

NWGNUmakefile

README

README.platforms

ROADMAP

VERSIONING

acinclude.m4

apache2-2.2.3

apache2-2.2.3/debian

apache2-2.2.3/debian/patches

apache2-2.2.3/debian/patches/043_ajp_connection_reuse

apachenw.mcp.zip

build

build/NWGNUenvironment.inc

build/NWGNUhead.inc

build/NWGNUmakefile

build/NWGNUtail.inc

build/PrintPath

build/apr_common.m4

build/binbuild.sh

build/bsd_makefile

build/build-modules-c.awk

build/buildinfo.sh

build/config-stubs

build/config.guess

build/config.sub

build/config_vars.sh.in

build/default.pl

build/fastgen.sh

build/find_apr.m4

build/find_apu.m4

build/get-version.sh

build/install-bindist.sh.in

build/install.sh

build/instdso.sh

build/library.mk

build/ltlib.mk

build/ltmain.sh

build/make_exports.awk

build/make_nw_export.awk

build/make_var_export.awk

build/mkconfNW.awk

build/mkdep.perl

build/mkdir.sh

build/nw_export.inc

build/nw_ver.awk

build/pkg

build/pkg/README

build/pkg/buildpkg.sh

build/pkg/pkginfo.in

build/prebuildNW.bat

build/program.mk

build/rpm

build/rpm/httpd.init

build/rpm/httpd.logrotate

build/rpm/httpd.spec.in

build/rules.mk.in

build/special.mk

build/sysv_makefile

build/win32

build/win32/apache.ico

build/win32/httpd.rc

build/win32/win32ver.awk

buildconf

confdefs.h

config.layout

config.log

config.nice

configure

configure.in

debian/apache2-doc.install

debian/apache2-mpm-event.dirs

debian/apache2-mpm-prefork.prerm

debian/apache2-prefork-dev.install

debian/apache2-src.dirs

debian/apache2-threaded-dev.install

debian/apache2-utils.install

debian/apache2.2-common.dirs

debian/apache2.2-common.init.d

debian/apache2.2-common.install

debian/apache2.2-common.manpages

debian/apache2.2-common.postinst

debian/apache2.2-common.preinst

debian/apache2.conf

debian/bash_completion

debian/compat

debian/config-dir

debian/config-dir/apache2.conf

debian/config-dir/mods-available

debian/config-dir/mods-available/actions.load

debian/config-dir/mods-available/alias.load

debian/config-dir/mods-available/asis.load

debian/config-dir/mods-available/auth_basic.load

debian/config-dir/mods-available/auth_digest.load

debian/config-dir/mods-available/authn_alias.load

debian/config-dir/mods-available/authn_anon.load

debian/config-dir/mods-available/authn_dbd.load

debian/config-dir/mods-available/authn_dbm.load

debian/config-dir/mods-available/authn_default.load

debian/config-dir/mods-available/authn_file.load

debian/config-dir/mods-available/authnz_ldap.load

debian/config-dir/mods-available/authz_dbm.load

debian/config-dir/mods-available/authz_default.load

debian/config-dir/mods-available/authz_groupfile.load

debian/config-dir/mods-available/authz_host.load

debian/config-dir/mods-available/authz_owner.load

debian/config-dir/mods-available/authz_user.load

debian/config-dir/mods-available/autoindex.load

debian/config-dir/mods-available/cache.load

debian/config-dir/mods-available/cern_meta.load

debian/config-dir/mods-available/cgi.load

debian/config-dir/mods-available/cgid.conf

debian/config-dir/mods-available/cgid.load

debian/config-dir/mods-available/charset_lite.load

debian/config-dir/mods-available/dav.load

debian/config-dir/mods-available/dav_fs.conf

debian/config-dir/mods-available/dav_fs.load

debian/config-dir/mods-available/dav_lock.load

debian/config-dir/mods-available/dbd.load

debian/config-dir/mods-available/deflate.conf

debian/config-dir/mods-available/deflate.load

debian/config-dir/mods-available/dir.conf

debian/config-dir/mods-available/dir.load

debian/config-dir/mods-available/disk_cache.conf

debian/config-dir/mods-available/disk_cache.load

debian/config-dir/mods-available/dump_io.load

debian/config-dir/mods-available/env.load

debian/config-dir/mods-available/expires.load

debian/config-dir/mods-available/ext_filter.load

debian/config-dir/mods-available/file_cache.load

debian/config-dir/mods-available/filter.load

debian/config-dir/mods-available/headers.load

debian/config-dir/mods-available/ident.load

debian/config-dir/mods-available/imagemap.load

debian/config-dir/mods-available/include.load

debian/config-dir/mods-available/info.load

debian/config-dir/mods-available/ldap.load

debian/config-dir/mods-available/log_forensic.load

debian/config-dir/mods-available/mem_cache.conf

debian/config-dir/mods-available/mem_cache.load

debian/config-dir/mods-available/mime.load

debian/config-dir/mods-available/mime_magic.conf

debian/config-dir/mods-available/mime_magic.load

debian/config-dir/mods-available/negotiation.load

debian/config-dir/mods-available/proxy.conf

debian/config-dir/mods-available/proxy.load

debian/config-dir/mods-available/proxy_ajp.load

debian/config-dir/mods-available/proxy_balancer.load

debian/config-dir/mods-available/proxy_connect.load

debian/config-dir/mods-available/proxy_ftp.load

debian/config-dir/mods-available/proxy_http.load

debian/config-dir/mods-available/rewrite.load

debian/config-dir/mods-available/setenvif.load

debian/config-dir/mods-available/sick-hack-to-update-modules

debian/config-dir/mods-available/speling.load

debian/config-dir/mods-available/ssl.conf

debian/config-dir/mods-available/ssl.load

debian/config-dir/mods-available/status.load

debian/config-dir/mods-available/suexec.load

debian/config-dir/mods-available/unique_id.load

debian/config-dir/mods-available/userdir.conf

debian/config-dir/mods-available/userdir.load

debian/config-dir/mods-available/usertrack.load

debian/config-dir/mods-available/version.load

debian/config-dir/mods-available/vhost_alias.load

debian/config-dir/sites-available

debian/config-dir/sites-available/default

debian/patches/001_branding

debian/patches/002_apachectl

debian/patches/00list

debian/patches/010_fhs_compliance

debian/patches/034_apxs2_libtool_fixtastic

debian/patches/043_ajp_connection_reuse

debian/patches/099_config_guess_sub_update.dpatch

docs

docs/cgi-examples

docs/cgi-examples/printenv

docs/cgi-examples/test-cgi

docs/conf

docs/conf/charset.conv

docs/conf/extra

docs/conf/extra/httpd-autoindex.conf.in

docs/conf/extra/httpd-dav.conf.in

docs/conf/extra/httpd-default.conf.in

docs/conf/extra/httpd-info.conf.in

docs/conf/extra/httpd-languages.conf.in

docs/conf/extra/httpd-manual.conf.in

docs/conf/extra/httpd-mpm.conf.in

docs/conf/extra/httpd-multilang-errordoc.conf.in

docs/conf/extra/httpd-ssl.conf.in

docs/conf/extra/httpd-userdir.conf.in

docs/conf/extra/httpd-vhosts.conf.in

docs/conf/httpd-win.conf

docs/conf/httpd.conf.in

docs/conf/magic

docs/conf/mime.types

docs/docroot

docs/docroot/apache_pb.gif

docs/docroot/apache_pb.png

docs/docroot/apache_pb22.gif

docs/docroot/apache_pb22.png

docs/docroot/apache_pb22_ani.gif

docs/docroot/index.html

docs/doxygen.conf

docs/error

docs/error/HTTP_BAD_GATEWAY.html.var

docs/error/HTTP_BAD_REQUEST.html.var

docs/error/HTTP_FORBIDDEN.html.var

docs/error/HTTP_GONE.html.var

docs/error/HTTP_INTERNAL_SERVER_ERROR.html.var

docs/error/HTTP_LENGTH_REQUIRED.html.var

docs/error/HTTP_METHOD_NOT_ALLOWED.html.var

docs/error/HTTP_NOT_FOUND.html.var

docs/error/HTTP_NOT_IMPLEMENTED.html.var

docs/error/HTTP_PRECONDITION_FAILED.html.var

docs/error/HTTP_REQUEST_ENTITY_TOO_LARGE.html.var

docs/error/HTTP_REQUEST_TIME_OUT.html.var

docs/error/HTTP_REQUEST_URI_TOO_LARGE.html.var

docs/error/HTTP_SERVICE_UNAVAILABLE.html.var

docs/error/HTTP_UNAUTHORIZED.html.var

docs/error/HTTP_UNSUPPORTED_MEDIA_TYPE.html.var

docs/error/HTTP_VARIANT_ALSO_VARIES.html.var

docs/error/README

docs/error/contact.html.var

docs/error/include

docs/error/include/bottom.html

docs/error/include/spacer.html

docs/error/include/top.html

docs/icons

docs/icons/README

docs/icons/a.gif

docs/icons/a.png

docs/icons/alert.black.gif

docs/icons/alert.black.png

docs/icons/alert.red.gif

docs/icons/alert.red.png

docs/icons/apache_pb.gif

docs/icons/apache_pb.png

docs/icons/apache_pb2.gif

docs/icons/apache_pb2.png

docs/icons/apache_pb2_ani.gif

docs/icons/back.gif

docs/icons/back.png

docs/icons/ball.gray.gif

docs/icons/ball.gray.png

docs/icons/ball.red.gif

docs/icons/ball.red.png

docs/icons/binary.gif

docs/icons/binary.png

docs/icons/binhex.gif

docs/icons/binhex.png

docs/icons/blank.gif

docs/icons/blank.png

docs/icons/bomb.gif

docs/icons/bomb.png

docs/icons/box1.gif

docs/icons/box1.png

docs/icons/box2.gif

docs/icons/box2.png

docs/icons/broken.gif

docs/icons/broken.png

docs/icons/burst.gif

docs/icons/burst.png

docs/icons/c.gif

docs/icons/c.png

docs/icons/comp.blue.gif

docs/icons/comp.blue.png

docs/icons/comp.gray.gif

docs/icons/comp.gray.png

docs/icons/compressed.gif

docs/icons/compressed.png

docs/icons/continued.gif

docs/icons/continued.png

docs/icons/dir.gif

docs/icons/dir.png

docs/icons/diskimg.gif

docs/icons/diskimg.png

docs/icons/down.gif

docs/icons/down.png

docs/icons/dvi.gif

docs/icons/dvi.png

docs/icons/f.gif

docs/icons/f.png

docs/icons/folder.gif

docs/icons/folder.open.gif

docs/icons/folder.open.png

docs/icons/folder.png

docs/icons/folder.sec.gif

docs/icons/folder.sec.png

docs/icons/forward.gif

docs/icons/forward.png

docs/icons/generic.gif

docs/icons/generic.png

docs/icons/generic.red.gif

docs/icons/generic.red.png

docs/icons/generic.sec.gif

docs/icons/generic.sec.png

docs/icons/hand.right.gif

docs/icons/hand.right.png

docs/icons/hand.up.gif

docs/icons/hand.up.png

docs/icons/icon.sheet.gif

docs/icons/icon.sheet.png

docs/icons/image1.gif

docs/icons/image1.png

docs/icons/image2.gif

docs/icons/image2.png

docs/icons/image3.gif

docs/icons/image3.png

docs/icons/index.gif

docs/icons/index.png

docs/icons/layout.gif

docs/icons/layout.png

docs/icons/left.gif

docs/icons/left.png

docs/icons/link.gif

docs/icons/link.png

docs/icons/movie.gif

docs/icons/movie.png

docs/icons/p.gif

docs/icons/p.png

docs/icons/patch.gif

docs/icons/patch.png

docs/icons/pdf.gif

docs/icons/pdf.png

docs/icons/pie0.gif

docs/icons/pie0.png

docs/icons/pie1.gif

docs/icons/pie1.png

docs/icons/pie2.gif

docs/icons/pie2.png

docs/icons/pie3.gif

docs/icons/pie3.png

docs/icons/pie4.gif

docs/icons/pie4.png

docs/icons/pie5.gif

docs/icons/pie5.png

docs/icons/pie6.gif

docs/icons/pie6.png

docs/icons/pie7.gif

docs/icons/pie7.png

docs/icons/pie8.gif

docs/icons/pie8.png

docs/icons/portal.gif

docs/icons/portal.png

docs/icons/ps.gif

docs/icons/ps.png

docs/icons/quill.gif

docs/icons/quill.png

docs/icons/right.gif

docs/icons/right.png

docs/icons/screw1.gif

docs/icons/screw1.png

docs/icons/screw2.gif

docs/icons/screw2.png

docs/icons/script.gif

docs/icons/script.png

docs/icons/small

docs/icons/small/README.txt

docs/icons/small/back.gif

docs/icons/small/back.png

docs/icons/small/binary.gif

docs/icons/small/binary.png

docs/icons/small/binhex.gif

docs/icons/small/binhex.png

docs/icons/small/blank.gif

docs/icons/small/blank.png

docs/icons/small/broken.gif

docs/icons/small/broken.png

docs/icons/small/burst.gif

docs/icons/small/burst.png

docs/icons/small/comp1.gif

docs/icons/small/comp1.png

docs/icons/small/comp2.gif

docs/icons/small/comp2.png

docs/icons/small/compressed.gif

docs/icons/small/compressed.png

docs/icons/small/continued.gif

docs/icons/small/continued.png

docs/icons/small/dir.gif

docs/icons/small/dir.png

docs/icons/small/dir2.gif

docs/icons/small/dir2.png

docs/icons/small/doc.gif

docs/icons/small/doc.png

docs/icons/small/forward.gif

docs/icons/small/forward.png

docs/icons/small/generic.gif

docs/icons/small/generic.png

docs/icons/small/generic2.gif

docs/icons/small/generic2.png

docs/icons/small/generic3.gif

docs/icons/small/generic3.png

docs/icons/small/image.gif

docs/icons/small/image.png

docs/icons/small/image2.gif

docs/icons/small/image2.png

docs/icons/small/index.gif

docs/icons/small/index.png

docs/icons/small/key.gif

docs/icons/small/key.png

docs/icons/small/movie.gif

docs/icons/small/movie.png

docs/icons/small/patch.gif

docs/icons/small/patch.png

docs/icons/small/ps.gif

docs/icons/small/ps.png

docs/icons/small/rainbow.gif

docs/icons/small/rainbow.png

docs/icons/small/sound.gif

docs/icons/small/sound.png

docs/icons/small/sound2.gif

docs/icons/small/sound2.png

docs/icons/small/tar.gif

docs/icons/small/tar.png

docs/icons/small/text.gif

docs/icons/small/text.png

docs/icons/small/transfer.gif

docs/icons/small/transfer.png

docs/icons/small/unknown.gif

docs/icons/small/unknown.png

docs/icons/small/uu.gif

docs/icons/small/uu.png

docs/icons/sound1.gif

docs/icons/sound1.png

docs/icons/sound2.gif

docs/icons/sound2.png

docs/icons/sphere1.gif

docs/icons/sphere1.png

docs/icons/sphere2.gif

docs/icons/sphere2.png

docs/icons/tar.gif

docs/icons/tar.png

docs/icons/tex.gif

docs/icons/tex.png

docs/icons/text.gif

docs/icons/text.png

docs/icons/transfer.gif

docs/icons/transfer.png

docs/icons/unknown.gif

docs/icons/unknown.png

docs/icons/up.gif

docs/icons/up.png

docs/icons/uu.gif

docs/icons/uu.png

docs/icons/uuencoded.gif

docs/icons/uuencoded.png

docs/icons/world1.gif

docs/icons/world1.png

docs/icons/world2.gif

docs/icons/world2.png

docs/man

docs/man/ab.8

docs/man/apachectl.8

docs/man/apxs.8

docs/man/dbmmanage.1

docs/man/htcacheclean.8

docs/man/htdbm.1

docs/man/htdigest.1

docs/man/htpasswd.1

docs/man/httpd.8

docs/man/logresolve.8

docs/man/rotatelogs.8

docs/man/suexec.8

docs/manual

docs/manual/LICENSE

docs/manual/bind.html

docs/manual/bind.html.de

docs/manual/bind.html.en

docs/manual/bind.html.fr

docs/manual/bind.html.ja.euc-jp

docs/manual/bind.html.ko.euc-kr

docs/manual/caching.html

docs/manual/caching.html.en

docs/manual/configuring.html

docs/manual/configuring.html.de

docs/manual/configuring.html.en

docs/manual/configuring.html.ja.euc-jp

docs/manual/configuring.html.ko.euc-kr

docs/manual/content-negotiation.html

docs/manual/content-negotiation.html.en

docs/manual/content-negotiation.html.ja.euc-jp

docs/manual/content-negotiation.html.ko.euc-kr

docs/manual/convenience.map

docs/manual/custom-error.html

docs/manual/custom-error.html.en

docs/manual/custom-error.html.es

docs/manual/custom-error.html.ja.euc-jp

docs/manual/custom-error.html.ko.euc-kr

docs/manual/developer

docs/manual/developer/API.html

docs/manual/developer/API.html.en

docs/manual/developer/debugging.html

docs/manual/developer/debugging.html.en

docs/manual/developer/documenting.html

docs/manual/developer/documenting.html.en

docs/manual/developer/filters.html

docs/manual/developer/filters.html.en

docs/manual/developer/hooks.html

docs/manual/developer/hooks.html.en

docs/manual/developer/index.html

docs/manual/developer/index.html.en

docs/manual/developer/modules.html

docs/manual/developer/modules.html.en

docs/manual/developer/modules.html.ja.euc-jp

docs/manual/developer/request.html

docs/manual/developer/request.html.en

docs/manual/developer/thread_safety.html

docs/manual/developer/thread_safety.html.en

docs/manual/dns-caveats.html

docs/manual/dns-caveats.html.en

docs/manual/dns-caveats.html.ja.euc-jp

docs/manual/dns-caveats.html.ko.euc-kr

docs/manual/dso.html

docs/manual/dso.html.en

docs/manual/dso.html.ja.euc-jp

docs/manual/dso.html.ko.euc-kr

docs/manual/env.html

docs/manual/env.html.en

docs/manual/env.html.ja.euc-jp

docs/manual/env.html.ko.euc-kr

docs/manual/faq

docs/manual/faq/all_in_one.html

docs/manual/faq/all_in_one.html.en

docs/manual/faq/all_in_one.html.ja.euc-jp

docs/manual/faq/all_in_one.html.ko.euc-kr

docs/manual/faq/background.html

docs/manual/faq/background.html.en

docs/manual/faq/error.html

docs/manual/faq/error.html.en

docs/manual/faq/error.html.ja.euc-jp

docs/manual/faq/error.html.ko.euc-kr

docs/manual/faq/index.html

docs/manual/faq/index.html.en

docs/manual/faq/index.html.ja.euc-jp

docs/manual/faq/index.html.ko.euc-kr

docs/manual/faq/support.html

docs/manual/faq/support.html.en

docs/manual/faq/support.html.ja.euc-jp

docs/manual/faq/support.html.ko.euc-kr

docs/manual/filter.html

docs/manual/filter.html.en

docs/manual/filter.html.es

docs/manual/filter.html.fr

docs/manual/filter.html.ja.euc-jp

docs/manual/filter.html.ko.euc-kr

docs/manual/glossary.html

docs/manual/glossary.html.de

docs/manual/glossary.html.en

docs/manual/glossary.html.es

docs/manual/glossary.html.ko.euc-kr

docs/manual/handler.html

docs/manual/handler.html.en

docs/manual/handler.html.es

docs/manual/handler.html.ja.euc-jp

docs/manual/handler.html.ko.euc-kr

docs/manual/handler.html.ru.koi8-r

docs/manual/howto

docs/manual/howto/access.html

docs/manual/howto/access.html.en

docs/manual/howto/auth.html

docs/manual/howto/auth.html.en

docs/manual/howto/auth.html.ja.euc-jp

docs/manual/howto/auth.html.ko.euc-kr

docs/manual/howto/cgi.html

docs/manual/howto/cgi.html.en

docs/manual/howto/cgi.html.ja.euc-jp

docs/manual/howto/cgi.html.ko.euc-kr

docs/manual/howto/htaccess.html

docs/manual/howto/htaccess.html.en

docs/manual/howto/htaccess.html.ja.euc-jp

docs/manual/howto/htaccess.html.ko.euc-kr

docs/manual/howto/htaccess.html.pt-br

docs/manual/howto/index.html

docs/manual/howto/index.html.en

docs/manual/howto/index.html.ja.euc-jp

docs/manual/howto/index.html.ko.euc-kr

docs/manual/howto/public_html.html

docs/manual/howto/public_html.html.en

docs/manual/howto/public_html.html.ja.euc-jp

docs/manual/howto/public_html.html.ko.euc-kr

docs/manual/howto/ssi.html

docs/manual/howto/ssi.html.en

docs/manual/howto/ssi.html.ja.euc-jp

docs/manual/howto/ssi.html.ko.euc-kr

docs/manual/images

docs/manual/images/apache_header.gif

docs/manual/images/caching_fig1.gif

docs/manual/images/caching_fig1.png

docs/manual/images/custom_errordocs.png

docs/manual/images/down.gif

docs/manual/images/favicon.ico

docs/manual/images/feather.gif

docs/manual/images/feather.png

docs/manual/images/filter_arch.png

docs/manual/images/home.gif

docs/manual/images/index.gif

docs/manual/images/left.gif

docs/manual/images/mod_filter_new.gif

docs/manual/images/mod_filter_new.png

docs/manual/images/mod_filter_old.gif

docs/manual/images/mod_rewrite_fig1.gif

docs/manual/images/mod_rewrite_fig1.png

docs/manual/images/mod_rewrite_fig2.gif

docs/manual/images/mod_rewrite_fig2.png

docs/manual/images/pixel.gif

docs/manual/images/right.gif

docs/manual/images/ssl_intro_fig1.gif

docs/manual/images/ssl_intro_fig1.png

docs/manual/images/ssl_intro_fig2.gif

docs/manual/images/ssl_intro_fig2.png

docs/manual/images/ssl_intro_fig3.gif

docs/manual/images/ssl_intro_fig3.png

docs/manual/images/sub.gif

docs/manual/images/up.gif

docs/manual/index.html

docs/manual/index.html.de

docs/manual/index.html.en

docs/manual/index.html.es

docs/manual/index.html.fr

docs/manual/index.html.ja.euc-jp

docs/manual/index.html.ko.euc-kr

docs/manual/index.html.pt-br

docs/manual/install.html

docs/manual/install.html.de

docs/manual/install.html.en

docs/manual/install.html.es

docs/manual/install.html.fr

docs/manual/install.html.ja.euc-jp

docs/manual/install.html.ko.euc-kr

docs/manual/invoking.html

docs/manual/invoking.html.de

docs/manual/invoking.html.en

docs/manual/invoking.html.es

docs/manual/invoking.html.ja.euc-jp

docs/manual/invoking.html.ko.euc-kr

docs/manual/invoking.html.ru.koi8-r

docs/manual/license.html

docs/manual/license.html.en

docs/manual/logs.html

docs/manual/logs.html.en

docs/manual/logs.html.ja.euc-jp

docs/manual/logs.html.ko.euc-kr

docs/manual/misc

docs/manual/misc/index.html

docs/manual/misc/index.html.en

docs/manual/misc/index.html.ko.euc-kr

docs/manual/misc/perf-tuning.html

docs/manual/misc/perf-tuning.html.en

docs/manual/misc/perf-tuning.html.ko.euc-kr

docs/manual/misc/relevant_standards.html

docs/manual/misc/relevant_standards.html.en

docs/manual/misc/relevant_standards.html.ko.euc-kr

docs/manual/misc/rewriteguide.html

docs/manual/misc/rewriteguide.html.en

docs/manual/misc/rewriteguide.html.ko.euc-kr

docs/manual/misc/security_tips.html

docs/manual/misc/security_tips.html.en

docs/manual/misc/security_tips.html.ko.euc-kr

docs/manual/mod

docs/manual/mod/beos.html

docs/manual/mod/beos.html.de

docs/manual/mod/beos.html.en

docs/manual/mod/beos.html.es

docs/manual/mod/beos.html.ko.euc-kr

docs/manual/mod/core.html

docs/manual/mod/core.html.de

docs/manual/mod/core.html.en

docs/manual/mod/core.html.ja.euc-jp

docs/manual/mod/directive-dict.html

docs/manual/mod/directive-dict.html.en

docs/manual/mod/directive-dict.html.ja.euc-jp

docs/manual/mod/directive-dict.html.ko.euc-kr

docs/manual/mod/directives.html

docs/manual/mod/directives.html.de

docs/manual/mod/directives.html.en

docs/manual/mod/directives.html.es

docs/manual/mod/directives.html.ja.euc-jp

docs/manual/mod/directives.html.ko.euc-kr

docs/manual/mod/directives.html.ru.koi8-r

docs/manual/mod/event.html

docs/manual/mod/event.html.en

docs/manual/mod/index.html

docs/manual/mod/index.html.de

docs/manual/mod/index.html.en

docs/manual/mod/index.html.es

docs/manual/mod/index.html.ja.euc-jp

docs/manual/mod/index.html.ko.euc-kr

docs/manual/mod/mod_actions.html

docs/manual/mod/mod_actions.html.de

docs/manual/mod/mod_actions.html.en

docs/manual/mod/mod_actions.html.ja.euc-jp

docs/manual/mod/mod_actions.html.ko.euc-kr

docs/manual/mod/mod_alias.html

docs/manual/mod/mod_alias.html.en

docs/manual/mod/mod_alias.html.ja.euc-jp

docs/manual/mod/mod_alias.html.ko.euc-kr

docs/manual/mod/mod_asis.html

docs/manual/mod/mod_asis.html.en

docs/manual/mod/mod_asis.html.ja.euc-jp

docs/manual/mod/mod_asis.html.ko.euc-kr

docs/manual/mod/mod_auth_basic.html

docs/manual/mod/mod_auth_basic.html.en

docs/manual/mod/mod_auth_basic.html.ja.euc-jp

docs/manual/mod/mod_auth_basic.html.ko.euc-kr

docs/manual/mod/mod_auth_digest.html

docs/manual/mod/mod_auth_digest.html.en

docs/manual/mod/mod_auth_digest.html.ko.euc-kr

docs/manual/mod/mod_authn_alias.html

docs/manual/mod/mod_authn_alias.html.en

docs/manual/mod/mod_authn_anon.html

docs/manual/mod/mod_authn_anon.html.en

docs/manual/mod/mod_authn_anon.html.ja.euc-jp

docs/manual/mod/mod_authn_anon.html.ko.euc-kr

docs/manual/mod/mod_authn_dbd.html

docs/manual/mod/mod_authn_dbd.html.en

docs/manual/mod/mod_authn_dbm.html

docs/manual/mod/mod_authn_dbm.html.en

docs/manual/mod/mod_authn_dbm.html.ja.euc-jp

docs/manual/mod/mod_authn_dbm.html.ko.euc-kr

docs/manual/mod/mod_authn_default.html

docs/manual/mod/mod_authn_default.html.en

docs/manual/mod/mod_authn_default.html.ja.euc-jp

docs/manual/mod/mod_authn_default.html.ko.euc-kr

docs/manual/mod/mod_authn_file.html

docs/manual/mod/mod_authn_file.html.en

docs/manual/mod/mod_authn_file.html.ja.euc-jp

docs/manual/mod/mod_authn_file.html.ko.euc-kr

docs/manual/mod/mod_authnz_ldap.html

docs/manual/mod/mod_authnz_ldap.html.en

docs/manual/mod/mod_authz_dbm.html

docs/manual/mod/mod_authz_dbm.html.en

docs/manual/mod/mod_authz_dbm.html.ko.euc-kr

docs/manual/mod/mod_authz_default.html

docs/manual/mod/mod_authz_default.html.en

docs/manual/mod/mod_authz_default.html.ja.euc-jp

docs/manual/mod/mod_authz_default.html.ko.euc-kr

docs/manual/mod/mod_authz_groupfile.html

docs/manual/mod/mod_authz_groupfile.html.en

docs/manual/mod/mod_authz_groupfile.html.ja.euc-jp

docs/manual/mod/mod_authz_groupfile.html.ko.euc-kr

docs/manual/mod/mod_authz_host.html

docs/manual/mod/mod_authz_host.html.en

docs/manual/mod/mod_authz_host.html.ja.euc-jp

docs/manual/mod/mod_authz_host.html.ko.euc-kr

docs/manual/mod/mod_authz_owner.html

docs/manual/mod/mod_authz_owner.html.en

docs/manual/mod/mod_authz_owner.html.ja.euc-jp

docs/manual/mod/mod_authz_owner.html.ko.euc-kr

docs/manual/mod/mod_authz_user.html

docs/manual/mod/mod_authz_user.html.en

docs/manual/mod/mod_authz_user.html.ja.euc-jp

docs/manual/mod/mod_authz_user.html.ko.euc-kr

docs/manual/mod/mod_autoindex.html

docs/manual/mod/mod_autoindex.html.en

docs/manual/mod/mod_autoindex.html.ja.euc-jp

docs/manual/mod/mod_autoindex.html.ko.euc-kr

docs/manual/mod/mod_cache.html

docs/manual/mod/mod_cache.html.en

docs/manual/mod/mod_cache.html.ja.euc-jp

docs/manual/mod/mod_cache.html.ko.euc-kr

docs/manual/mod/mod_cern_meta.html

docs/manual/mod/mod_cern_meta.html.en

docs/manual/mod/mod_cern_meta.html.ko.euc-kr

docs/manual/mod/mod_cgi.html

docs/manual/mod/mod_cgi.html.en

docs/manual/mod/mod_cgi.html.ja.euc-jp

docs/manual/mod/mod_cgi.html.ko.euc-kr

docs/manual/mod/mod_cgid.html

docs/manual/mod/mod_cgid.html.en

docs/manual/mod/mod_cgid.html.ja.euc-jp

docs/manual/mod/mod_cgid.html.ko.euc-kr

docs/manual/mod/mod_charset_lite.html

docs/manual/mod/mod_charset_lite.html.en

docs/manual/mod/mod_charset_lite.html.ko.euc-kr

docs/manual/mod/mod_dav.html

docs/manual/mod/mod_dav.html.en

docs/manual/mod/mod_dav.html.ja.euc-jp

docs/manual/mod/mod_dav.html.ko.euc-kr

docs/manual/mod/mod_dav_fs.html

docs/manual/mod/mod_dav_fs.html.en

docs/manual/mod/mod_dav_fs.html.ja.euc-jp

docs/manual/mod/mod_dav_fs.html.ko.euc-kr

docs/manual/mod/mod_dav_lock.html

docs/manual/mod/mod_dav_lock.html.en

docs/manual/mod/mod_dav_lock.html.ja.euc-jp

docs/manual/mod/mod_dbd.html

docs/manual/mod/mod_dbd.html.en

docs/manual/mod/mod_deflate.html

docs/manual/mod/mod_deflate.html.en

docs/manual/mod/mod_deflate.html.ja.euc-jp

docs/manual/mod/mod_deflate.html.ko.euc-kr

docs/manual/mod/mod_dir.html

docs/manual/mod/mod_dir.html.en

docs/manual/mod/mod_dir.html.ja.euc-jp

docs/manual/mod/mod_dir.html.ko.euc-kr

docs/manual/mod/mod_disk_cache.html

docs/manual/mod/mod_disk_cache.html.en

docs/manual/mod/mod_disk_cache.html.ja.euc-jp

docs/manual/mod/mod_disk_cache.html.ko.euc-kr

docs/manual/mod/mod_dumpio.html

docs/manual/mod/mod_dumpio.html.en

docs/manual/mod/mod_dumpio.html.ja.euc-jp

docs/manual/mod/mod_echo.html

docs/manual/mod/mod_echo.html.en

docs/manual/mod/mod_echo.html.ja.euc-jp

docs/manual/mod/mod_echo.html.ko.euc-kr

docs/manual/mod/mod_env.html

docs/manual/mod/mod_env.html.en

docs/manual/mod/mod_env.html.ja.euc-jp

docs/manual/mod/mod_env.html.ko.euc-kr

docs/manual/mod/mod_example.html

docs/manual/mod/mod_example.html.en

docs/manual/mod/mod_example.html.ko.euc-kr

docs/manual/mod/mod_expires.html

docs/manual/mod/mod_expires.html.en

docs/manual/mod/mod_expires.html.ja.euc-jp

docs/manual/mod/mod_expires.html.ko.euc-kr

docs/manual/mod/mod_ext_filter.html

docs/manual/mod/mod_ext_filter.html.en

docs/manual/mod/mod_ext_filter.html.ja.euc-jp

docs/manual/mod/mod_ext_filter.html.ko.euc-kr

docs/manual/mod/mod_file_cache.html

docs/manual/mod/mod_file_cache.html.en

docs/manual/mod/mod_file_cache.html.ko.euc-kr

docs/manual/mod/mod_filter.html

docs/manual/mod/mod_filter.html.en

docs/manual/mod/mod_headers.html

docs/manual/mod/mod_headers.html.en

docs/manual/mod/mod_headers.html.ja.euc-jp

docs/manual/mod/mod_headers.html.ko.euc-kr

docs/manual/mod/mod_ident.html

docs/manual/mod/mod_ident.html.en

docs/manual/mod/mod_ident.html.ko.euc-kr

docs/manual/mod/mod_imagemap.html

docs/manual/mod/mod_imagemap.html.en

docs/manual/mod/mod_imagemap.html.ko.euc-kr

docs/manual/mod/mod_include.html

docs/manual/mod/mod_include.html.en

docs/manual/mod/mod_include.html.ja.euc-jp

docs/manual/mod/mod_info.html

docs/manual/mod/mod_info.html.en

docs/manual/mod/mod_info.html.ja.euc-jp

docs/manual/mod/mod_info.html.ko.euc-kr

docs/manual/mod/mod_isapi.html

docs/manual/mod/mod_isapi.html.en

docs/manual/mod/mod_isapi.html.ko.euc-kr

docs/manual/mod/mod_ldap.html

docs/manual/mod/mod_ldap.html.en

docs/manual/mod/mod_log_config.html

docs/manual/mod/mod_log_config.html.en

docs/manual/mod/mod_log_config.html.ja.euc-jp

docs/manual/mod/mod_log_config.html.ko.euc-kr

docs/manual/mod/mod_log_forensic.html

docs/manual/mod/mod_log_forensic.html.en

docs/manual/mod/mod_log_forensic.html.ja.euc-jp

docs/manual/mod/mod_logio.html

docs/manual/mod/mod_logio.html.en

docs/manual/mod/mod_logio.html.ja.euc-jp

docs/manual/mod/mod_logio.html.ko.euc-kr

docs/manual/mod/mod_mem_cache.html

docs/manual/mod/mod_mem_cache.html.en

docs/manual/mod/mod_mem_cache.html.ja.euc-jp

docs/manual/mod/mod_mem_cache.html.ko.euc-kr

docs/manual/mod/mod_mime.html

docs/manual/mod/mod_mime.html.en

docs/manual/mod/mod_mime.html.ja.euc-jp

docs/manual/mod/mod_mime_magic.html

docs/manual/mod/mod_mime_magic.html.en

docs/manual/mod/mod_negotiation.html

docs/manual/mod/mod_negotiation.html.en

docs/manual/mod/mod_negotiation.html.ja.euc-jp

docs/manual/mod/mod_nw_ssl.html

docs/manual/mod/mod_nw_ssl.html.en

docs/manual/mod/mod_proxy.html

docs/manual/mod/mod_proxy.html.en

docs/manual/mod/mod_proxy.html.ja.euc-jp

docs/manual/mod/mod_proxy_ajp.html

docs/manual/mod/mod_proxy_ajp.html.en

docs/manual/mod/mod_proxy_ajp.html.ja.euc-jp

docs/manual/mod/mod_proxy_balancer.html

docs/manual/mod/mod_proxy_balancer.html.en

docs/manual/mod/mod_proxy_balancer.html.ja.euc-jp

docs/manual/mod/mod_proxy_connect.html

docs/manual/mod/mod_proxy_connect.html.en

docs/manual/mod/mod_proxy_ftp.html

docs/manual/mod/mod_proxy_ftp.html.en

docs/manual/mod/mod_proxy_http.html

docs/manual/mod/mod_proxy_http.html.en

docs/manual/mod/mod_rewrite.html

docs/manual/mod/mod_rewrite.html.en

docs/manual/mod/mod_setenvif.html

docs/manual/mod/mod_setenvif.html.en

docs/manual/mod/mod_setenvif.html.ja.euc-jp

docs/manual/mod/mod_setenvif.html.ko.euc-kr

docs/manual/mod/mod_so.html

docs/manual/mod/mod_so.html.en

docs/manual/mod/mod_so.html.ja.euc-jp

docs/manual/mod/mod_so.html.ko.euc-kr

docs/manual/mod/mod_speling.html

docs/manual/mod/mod_speling.html.en

docs/manual/mod/mod_speling.html.ja.euc-jp

docs/manual/mod/mod_speling.html.ko.euc-kr

docs/manual/mod/mod_ssl.html

docs/manual/mod/mod_ssl.html.en

docs/manual/mod/mod_status.html

docs/manual/mod/mod_status.html.en

docs/manual/mod/mod_status.html.ja.euc-jp

docs/manual/mod/mod_status.html.ko.euc-kr

docs/manual/mod/mod_suexec.html

docs/manual/mod/mod_suexec.html.en

docs/manual/mod/mod_suexec.html.ja.euc-jp

docs/manual/mod/mod_suexec.html.ko.euc-kr

docs/manual/mod/mod_unique_id.html

docs/manual/mod/mod_unique_id.html.en

docs/manual/mod/mod_unique_id.html.ja.euc-jp

docs/manual/mod/mod_unique_id.html.ko.euc-kr

docs/manual/mod/mod_userdir.html

docs/manual/mod/mod_userdir.html.en

docs/manual/mod/mod_userdir.html.ja.euc-jp

docs/manual/mod/mod_userdir.html.ko.euc-kr

docs/manual/mod/mod_usertrack.html

docs/manual/mod/mod_usertrack.html.en

docs/manual/mod/mod_version.html

docs/manual/mod/mod_version.html.en

docs/manual/mod/mod_version.html.ja.euc-jp

docs/manual/mod/mod_version.html.ko.euc-kr

docs/manual/mod/mod_vhost_alias.html

docs/manual/mod/mod_vhost_alias.html.en

docs/manual/mod/module-dict.html

docs/manual/mod/module-dict.html.en

docs/manual/mod/module-dict.html.ja.euc-jp

docs/manual/mod/module-dict.html.ko.euc-kr

docs/manual/mod/mpm_common.html

docs/manual/mod/mpm_common.html.de

docs/manual/mod/mpm_common.html.en

docs/manual/mod/mpm_common.html.ja.euc-jp

docs/manual/mod/mpm_netware.html

docs/manual/mod/mpm_netware.html.en

docs/manual/mod/mpm_winnt.html

docs/manual/mod/mpm_winnt.html.de

docs/manual/mod/mpm_winnt.html.en

docs/manual/mod/mpm_winnt.html.ja.euc-jp

docs/manual/mod/mpmt_os2.html

docs/manual/mod/mpmt_os2.html.en

docs/manual/mod/prefork.html

docs/manual/mod/prefork.html.de

docs/manual/mod/prefork.html.en

docs/manual/mod/prefork.html.ja.euc-jp

docs/manual/mod/quickreference.html

docs/manual/mod/quickreference.html.de

docs/manual/mod/quickreference.html.en

docs/manual/mod/quickreference.html.es

docs/manual/mod/quickreference.html.ja.euc-jp

docs/manual/mod/quickreference.html.ko.euc-kr

docs/manual/mod/quickreference.html.ru.koi8-r

docs/manual/mod/worker.html

docs/manual/mod/worker.html.de

docs/manual/mod/worker.html.en

docs/manual/mod/worker.html.ja.euc-jp

docs/manual/mpm.html

docs/manual/mpm.html.de

docs/manual/mpm.html.en

docs/manual/mpm.html.es

docs/manual/mpm.html.ja.euc-jp

docs/manual/mpm.html.ko.euc-kr

docs/manual/new_features_2_0.html

docs/manual/new_features_2_0.html.de

docs/manual/new_features_2_0.html.en

docs/manual/new_features_2_0.html.fr

docs/manual/new_features_2_0.html.ja.euc-jp

docs/manual/new_features_2_0.html.ko.euc-kr

docs/manual/new_features_2_0.html.pt-br

docs/manual/new_features_2_0.html.ru.koi8-r

docs/manual/new_features_2_2.html

docs/manual/new_features_2_2.html.en

docs/manual/new_features_2_2.html.ko.euc-kr

docs/manual/new_features_2_2.html.pt-br

docs/manual/platform

docs/manual/platform/ebcdic.html

docs/manual/platform/ebcdic.html.en

docs/manual/platform/ebcdic.html.ko.euc-kr

docs/manual/platform/index.html

docs/manual/platform/index.html.en

docs/manual/platform/index.html.ko.euc-kr

docs/manual/platform/netware.html

docs/manual/platform/netware.html.en

docs/manual/platform/netware.html.ko.euc-kr

docs/manual/platform/perf-hp.html

docs/manual/platform/perf-hp.html.en

docs/manual/platform/perf-hp.html.ko.euc-kr

docs/manual/platform/win_compiling.html

docs/manual/platform/win_compiling.html.en

docs/manual/platform/win_compiling.html.ko.euc-kr

docs/manual/platform/windows.html

docs/manual/platform/windows.html.en

docs/manual/platform/windows.html.ko.euc-kr

docs/manual/programs

docs/manual/programs/ab.html

docs/manual/programs/ab.html.en

docs/manual/programs/ab.html.ko.euc-kr

docs/manual/programs/apachectl.html

docs/manual/programs/apachectl.html.en

docs/manual/programs/apachectl.html.ko.euc-kr

docs/manual/programs/apxs.html

docs/manual/programs/apxs.html.en

docs/manual/programs/apxs.html.ko.euc-kr

docs/manual/programs/configure.html

docs/manual/programs/configure.html.en

docs/manual/programs/configure.html.ko.euc-kr

docs/manual/programs/dbmmanage.html

docs/manual/programs/dbmmanage.html.en

docs/manual/programs/dbmmanage.html.ko.euc-kr

docs/manual/programs/htcacheclean.html

docs/manual/programs/htcacheclean.html.en

docs/manual/programs/htcacheclean.html.ko.euc-kr

docs/manual/programs/htdbm.html

docs/manual/programs/htdbm.html.en

docs/manual/programs/htdigest.html

docs/manual/programs/htdigest.html.en

docs/manual/programs/htdigest.html.ko.euc-kr

docs/manual/programs/htpasswd.html

docs/manual/programs/htpasswd.html.en

docs/manual/programs/htpasswd.html.ko.euc-kr

docs/manual/programs/httpd.html

docs/manual/programs/httpd.html.en

docs/manual/programs/httpd.html.ko.euc-kr

docs/manual/programs/httxt2dbm.html

docs/manual/programs/httxt2dbm.html.en

docs/manual/programs/index.html

docs/manual/programs/index.html.en

docs/manual/programs/index.html.es

docs/manual/programs/index.html.ko.euc-kr

docs/manual/programs/index.html.ru.koi8-r

docs/manual/programs/logresolve.html

docs/manual/programs/logresolve.html.en

docs/manual/programs/logresolve.html.ko.euc-kr

docs/manual/programs/other.html

docs/manual/programs/other.html.en

docs/manual/programs/other.html.ko.euc-kr

docs/manual/programs/rotatelogs.html

docs/manual/programs/rotatelogs.html.en

docs/manual/programs/rotatelogs.html.ko.euc-kr

docs/manual/programs/suexec.html

docs/manual/programs/suexec.html.en

docs/manual/programs/suexec.html.ko.euc-kr

docs/manual/rewrite

docs/manual/rewrite/index.html

docs/manual/rewrite/index.html.en

docs/manual/rewrite/rewrite_guide.html

docs/manual/rewrite/rewrite_guide.html.en

docs/manual/rewrite/rewrite_guide_advanced.html

docs/manual/rewrite/rewrite_guide_advanced.html.en

docs/manual/rewrite/rewrite_intro.html

docs/manual/rewrite/rewrite_intro.html.en

docs/manual/rewrite/rewrite_tech.html

docs/manual/rewrite/rewrite_tech.html.en

docs/manual/sections.html

docs/manual/sections.html.en

docs/manual/sections.html.ja.euc-jp

docs/manual/sections.html.ko.euc-kr

docs/manual/server-wide.html

docs/manual/server-wide.html.en

docs/manual/server-wide.html.ja.euc-jp

docs/manual/server-wide.html.ko.euc-kr

docs/manual/sitemap.html

docs/manual/sitemap.html.de

docs/manual/sitemap.html.en

docs/manual/sitemap.html.es

docs/manual/sitemap.html.ja.euc-jp

docs/manual/sitemap.html.ko.euc-kr

docs/manual/ssl

docs/manual/ssl/index.html

docs/manual/ssl/index.html.en

docs/manual/ssl/index.html.ja.euc-jp

docs/manual/ssl/ssl_compat.html

docs/manual/ssl/ssl_compat.html.en

docs/manual/ssl/ssl_faq.html

docs/manual/ssl/ssl_faq.html.en

docs/manual/ssl/ssl_howto.html

docs/manual/ssl/ssl_howto.html.en

docs/manual/ssl/ssl_intro.html

docs/manual/ssl/ssl_intro.html.en

docs/manual/ssl/ssl_intro.html.ja.euc-jp

docs/manual/stopping.html

docs/manual/stopping.html.de

docs/manual/stopping.html.en

docs/manual/stopping.html.es

docs/manual/stopping.html.ja.euc-jp

docs/manual/stopping.html.ko.euc-kr

docs/manual/style

docs/manual/style/build.properties

docs/manual/style/common.dtd

docs/manual/style/css

docs/manual/style/css/manual-chm.css

docs/manual/style/css/manual-loose-100pc.css

docs/manual/style/css/manual-print.css

docs/manual/style/css/manual-zip-100pc.css

docs/manual/style/css/manual-zip.css

docs/manual/style/css/manual.css

docs/manual/style/faq.dtd

docs/manual/style/lang

docs/manual/style/lang.dtd

docs/manual/style/latex

docs/manual/style/latex/atbeginend.sty

docs/manual/style/latex/common.xsl

docs/manual/style/latex/directiveindex.xsl

docs/manual/style/latex/faq.xsl

docs/manual/style/latex/html.xsl

docs/manual/style/latex/latex.xsl

docs/manual/style/latex/manualpage.xsl

docs/manual/style/latex/moduleindex.xsl

docs/manual/style/latex/quickreference.xsl

docs/manual/style/latex/synopsis.xsl

docs/manual/style/manual.de.xsl

docs/manual/style/manual.en.xsl

docs/manual/style/manual.es.xsl

docs/manual/style/manual.fr.xsl

docs/manual/style/manual.ja.xsl

docs/manual/style/manual.ko.xsl

docs/manual/style/manual.pt-br.xsl

docs/manual/style/manual.ru.xsl

docs/manual/style/manualpage.dtd

docs/manual/style/modulesynopsis.dtd

docs/manual/style/sitemap.dtd

docs/manual/style/version.ent

docs/manual/style/xsl

docs/manual/style/xsl/common.xsl

docs/manual/style/xsl/convmap.xsl

docs/manual/style/xsl/directiveindex.xsl

docs/manual/style/xsl/faq.xsl

docs/manual/style/xsl/hhc.xsl

docs/manual/style/xsl/hhp.xsl

docs/manual/style/xsl/indexpage.xsl

docs/manual/style/xsl/language.xsl

docs/manual/style/xsl/maf.xsl

docs/manual/style/xsl/manualpage.xsl

docs/manual/style/xsl/moduleindex.xsl

docs/manual/style/xsl/nroff.xsl

docs/manual/style/xsl/quickreference.xsl

docs/manual/style/xsl/sitemap.xsl

docs/manual/style/xsl/synopsis.xsl

docs/manual/style/xsl/typemap.xsl

docs/manual/style/xsl/util

docs/manual/style/xsl/util/modtrans.xsl

docs/manual/suexec.html

docs/manual/suexec.html.en

docs/manual/suexec.html.ja.euc-jp

docs/manual/suexec.html.ko.euc-kr

docs/manual/upgrading.html

docs/manual/upgrading.html.de

docs/manual/upgrading.html.en

docs/manual/upgrading.html.ja.euc-jp

docs/manual/upgrading.html.ko.euc-kr

docs/manual/upgrading.html.pt-br

docs/manual/upgrading.html.ru.koi8-r

docs/manual/urlmapping.html

docs/manual/urlmapping.html.en

docs/manual/urlmapping.html.ja.euc-jp

docs/manual/urlmapping.html.ko.euc-kr

docs/manual/vhosts

docs/manual/vhosts/details.html

docs/manual/vhosts/details.html.en

docs/manual/vhosts/details.html.fr

docs/manual/vhosts/details.html.ko.euc-kr

docs/manual/vhosts/examples.html

docs/manual/vhosts/examples.html.en

docs/manual/vhosts/examples.html.fr

docs/manual/vhosts/examples.html.ja.euc-jp

docs/manual/vhosts/examples.html.ko.euc-kr

docs/manual/vhosts/fd-limits.html

docs/manual/vhosts/fd-limits.html.en

docs/manual/vhosts/fd-limits.html.fr

docs/manual/vhosts/fd-limits.html.ja.euc-jp

docs/manual/vhosts/fd-limits.html.ko.euc-kr

docs/manual/vhosts/index.html

docs/manual/vhosts/index.html.de

docs/manual/vhosts/index.html.en

docs/manual/vhosts/index.html.fr

docs/manual/vhosts/index.html.ja.euc-jp

docs/manual/vhosts/index.html.ko.euc-kr

docs/manual/vhosts/index.html.ru.koi8-r

docs/manual/vhosts/ip-based.html

docs/manual/vhosts/ip-based.html.en

docs/manual/vhosts/ip-based.html.fr

docs/manual/vhosts/ip-based.html.ja.euc-jp

docs/manual/vhosts/ip-based.html.ko.euc-kr

docs/manual/vhosts/mass.html

docs/manual/vhosts/mass.html.en

docs/manual/vhosts/mass.html.ko.euc-kr

docs/manual/vhosts/name-based.html

docs/manual/vhosts/name-based.html.de

docs/manual/vhosts/name-based.html.en

docs/manual/vhosts/name-based.html.fr

docs/manual/vhosts/name-based.html.ja.euc-jp

docs/manual/vhosts/name-based.html.ko.euc-kr

emacs-style

httpd.dsp

httpd.spec

include

include/.indent.pro

include/ap_compat.h

include/ap_config.h

include/ap_config_auto.h.in

include/ap_config_layout.h.in

include/ap_listen.h

include/ap_mmn.h

include/ap_mpm.h

include/ap_provider.h

include/ap_regex.h

include/ap_regkey.h

include/ap_release.h

include/http_config.h

include/http_connection.h

include/http_core.h

include/http_log.h

include/http_main.h

include/http_protocol.h

include/http_request.h

include/http_vhost.h

include/httpd.h

include/mpm_common.h

include/scoreboard.h

include/util_cfgtree.h

include/util_charset.h

include/util_ebcdic.h

include/util_filter.h

include/util_ldap.h

include/util_md5.h

include/util_script.h

include/util_time.h

include/util_xml.h

libhttpd.dsp

modules

modules/Makefile.in

modules/NWGNUmakefile

modules/README

modules/aaa

modules/aaa/.indent.pro

modules/aaa/Makefile.in

modules/aaa/NWGNUauthbasc

modules/aaa/NWGNUauthdigt

modules/aaa/NWGNUauthnalias

modules/aaa/NWGNUauthnano

modules/aaa/NWGNUauthndbd

modules/aaa/NWGNUauthndbm

modules/aaa/NWGNUauthndef

modules/aaa/NWGNUauthnfil

modules/aaa/NWGNUauthnzldap

modules/aaa/NWGNUauthzdbm

modules/aaa/NWGNUauthzdef

modules/aaa/NWGNUauthzgrp

modules/aaa/NWGNUauthzusr

modules/aaa/NWGNUmakefile

modules/aaa/config.m4

modules/aaa/mod_auth.h

modules/aaa/mod_auth_basic.c

modules/aaa/mod_auth_basic.dsp

modules/aaa/mod_auth_digest.c

modules/aaa/mod_auth_digest.dsp

modules/aaa/mod_authn_alias.c

modules/aaa/mod_authn_anon.c

modules/aaa/mod_authn_anon.dsp

modules/aaa/mod_authn_dbd.c

modules/aaa/mod_authn_dbd.dsp

modules/aaa/mod_authn_dbm.c

modules/aaa/mod_authn_dbm.dsp

modules/aaa/mod_authn_default.c

modules/aaa/mod_authn_default.dsp

modules/aaa/mod_authn_file.c

modules/aaa/mod_authn_file.dsp

modules/aaa/mod_authnz_ldap.c

modules/aaa/mod_authnz_ldap.dsp

modules/aaa/mod_authz_dbm.c

modules/aaa/mod_authz_dbm.dsp

modules/aaa/mod_authz_default.c

modules/aaa/mod_authz_default.dsp

modules/aaa/mod_authz_groupfile.c

modules/aaa/mod_authz_groupfile.dsp

modules/aaa/mod_authz_host.c

modules/aaa/mod_authz_host.dsp

modules/aaa/mod_authz_owner.c

modules/aaa/mod_authz_user.c

modules/aaa/mod_authz_user.dsp

modules/arch

modules/arch/netware

modules/arch/netware/libprews.c

modules/arch/netware/mod_auth_basic.def

modules/arch/netware/mod_auth_digest.def

modules/arch/netware/mod_authn_anon.def

modules/arch/netware/mod_authn_dbm.def

modules/arch/netware/mod_authn_default.def

modules/arch/netware/mod_authn_file.def

modules/arch/netware/mod_authz_dbm.def

modules/arch/netware/mod_authz_default.def

modules/arch/netware/mod_authz_groupfile.def

modules/arch/netware/mod_authz_user.def

modules/arch/netware/mod_cache.def

modules/arch/netware/mod_cern_meta.def

modules/arch/netware/mod_dav.def

modules/arch/netware/mod_disk_cache.def

modules/arch/netware/mod_echo.def

modules/arch/netware/mod_expires.def

modules/arch/netware/mod_file_cache.def

modules/arch/netware/mod_headers.def

modules/arch/netware/mod_info.def

modules/arch/netware/mod_logio.def

modules/arch/netware/mod_mem_cache.def

modules/arch/netware/mod_mime_magic.def

modules/arch/netware/mod_netware.c

modules/arch/netware/mod_nw_ssl.c

modules/arch/netware/mod_proxy.def

modules/arch/netware/mod_proxy_connect.def

modules/arch/netware/mod_proxy_ftp.def

modules/arch/netware/mod_proxy_http.def

modules/arch/netware/mod_rewrite.def

modules/arch/netware/mod_speling.def

modules/arch/netware/mod_status.def

modules/arch/netware/mod_unique_id.def

modules/arch/netware/mod_usertrack.def

modules/arch/netware/mod_vhost_alias.def

modules/arch/netware/moddavfs.def

modules/arch/win32

modules/arch/win32/Makefile.in

modules/arch/win32/config.m4

modules/arch/win32/mod_isapi.c

modules/arch/win32/mod_isapi.dsp

modules/arch/win32/mod_isapi.h

modules/arch/win32/mod_win32.c

modules/cache

modules/cache/.indent.pro

modules/cache/Makefile.in

modules/cache/NWGNUdsk_cach

modules/cache/NWGNUmakefile

modules/cache/NWGNUmem_cach

modules/cache/NWGNUmod_cach

modules/cache/cache_cache.c

modules/cache/cache_cache.h

modules/cache/cache_hash.c

modules/cache/cache_hash.h

modules/cache/cache_pqueue.c

modules/cache/cache_pqueue.h

modules/cache/cache_storage.c

modules/cache/cache_util.c

modules/cache/config.m4

modules/cache/mod_cache.c

modules/cache/mod_cache.dsp

modules/cache/mod_cache.h

modules/cache/mod_cache.imp

modules/cache/mod_disk_cache.c

modules/cache/mod_disk_cache.dsp

modules/cache/mod_disk_cache.h

modules/cache/mod_file_cache.c

modules/cache/mod_file_cache.dsp

modules/cache/mod_file_cache.exp

modules/cache/mod_mem_cache.c

modules/cache/mod_mem_cache.dsp

modules/config5.m4

modules/database

modules/database/Makefile.in

modules/database/NWGNUmakefile

modules/database/config.m4

modules/database/mod_dbd.c

modules/database/mod_dbd.dsp

modules/database/mod_dbd.h

modules/dav

modules/dav/fs

modules/dav/fs/Makefile.in

modules/dav/fs/NWGNUmakefile

modules/dav/fs/config6.m4

modules/dav/fs/dbm.c

modules/dav/fs/lock.c

modules/dav/fs/mod_dav_fs.c

modules/dav/fs/mod_dav_fs.dsp

modules/dav/fs/repos.c

modules/dav/fs/repos.h

modules/dav/lock

modules/dav/lock/Makefile.in

modules/dav/lock/NWGNUmakefile

modules/dav/lock/config6.m4

modules/dav/lock/locks.c

modules/dav/lock/locks.h

modules/dav/lock/mod_dav_lock.c

modules/dav/main

modules/dav/main/Makefile.in

modules/dav/main/NWGNUmakefile

modules/dav/main/config5.m4

modules/dav/main/dav.imp

modules/dav/main/liveprop.c

modules/dav/main/mod_dav.c

modules/dav/main/mod_dav.dsp

modules/dav/main/mod_dav.h

modules/dav/main/props.c

modules/dav/main/providers.c

modules/dav/main/std_liveprop.c

modules/dav/main/util.c

modules/dav/main/util_lock.c

modules/debug

modules/debug/Makefile.in

modules/debug/NWGNUmakefile

modules/debug/NWGNUmodbucketeer

modules/debug/NWGNUmoddumpio

modules/debug/README

modules/debug/config.m4

modules/debug/mod_bucketeer.c

modules/debug/mod_bucketeer.dsp

modules/debug/mod_dumpio.c

modules/debug/mod_dumpio.dsp

modules/echo

modules/echo/.indent.pro

modules/echo/Makefile.in

modules/echo/NWGNUmakefile

modules/echo/config.m4

modules/echo/mod_echo.c

modules/echo/mod_echo.dsp

modules/experimental

modules/experimental/.indent.pro

modules/experimental/Makefile.in

modules/experimental/NWGNUexample

modules/experimental/NWGNUmakefile

modules/experimental/README

modules/experimental/config.m4

modules/experimental/mod_case_filter.c

modules/experimental/mod_case_filter_in.c

modules/experimental/mod_example.c

modules/filters

modules/filters/.indent.pro

modules/filters/Makefile.in

modules/filters/NWGNUcharsetl

modules/filters/NWGNUdeflate

modules/filters/NWGNUextfiltr

modules/filters/NWGNUmakefile

modules/filters/NWGNUmod_filter

modules/filters/config.m4

modules/filters/mod_charset_lite.c

modules/filters/mod_charset_lite.dsp

modules/filters/mod_charset_lite.exp

modules/filters/mod_deflate.c

modules/filters/mod_deflate.dsp

modules/filters/mod_deflate.exp

modules/filters/mod_ext_filter.c

modules/filters/mod_ext_filter.dsp

modules/filters/mod_ext_filter.exp

modules/filters/mod_filter.c

modules/filters/mod_include.c

modules/filters/mod_include.dsp

modules/filters/mod_include.exp

modules/filters/mod_include.h

modules/generators

modules/generators/.indent.pro

modules/generators/Makefile.in

modules/generators/NWGNUautoindex

modules/generators/NWGNUinfo

modules/generators/NWGNUmakefile

modules/generators/NWGNUmod_asis

modules/generators/NWGNUmod_cgi

modules/generators/NWGNUstatus

modules/generators/config5.m4

modules/generators/mod_asis.c

modules/generators/mod_asis.dsp

modules/generators/mod_asis.exp

modules/generators/mod_autoindex.c

modules/generators/mod_autoindex.dsp

modules/generators/mod_autoindex.exp

modules/generators/mod_cgi.c

modules/generators/mod_cgi.dsp

modules/generators/mod_cgi.exp

modules/generators/mod_cgi.h

modules/generators/mod_cgid.c

modules/generators/mod_cgid.exp

modules/generators/mod_info.c

modules/generators/mod_info.dsp

modules/generators/mod_info.exp

modules/generators/mod_status.c

modules/generators/mod_status.dsp

modules/generators/mod_status.exp

modules/generators/mod_status.h

modules/generators/mod_suexec.c

modules/generators/mod_suexec.h

modules/http

modules/http/.indent.pro

modules/http/Makefile.in

modules/http/byterange_filter.c

modules/http/chunk_filter.c

modules/http/config2.m4

modules/http/http_core.c

modules/http/http_etag.c

modules/http/http_filters.c

modules/http/http_protocol.c

modules/http/http_request.c

modules/http/mod_core.h

modules/http/mod_mime.c

modules/http/mod_mime.dsp

modules/http/mod_mime.exp

modules/ldap

modules/ldap/Makefile.in

modules/ldap/NWGNUmakefile

modules/ldap/README.ldap

modules/ldap/config.m4

modules/ldap/mod_ldap.dsp

modules/ldap/util_ldap.c

modules/ldap/util_ldap_cache.c

modules/ldap/util_ldap_cache.h

modules/ldap/util_ldap_cache_mgr.c

modules/loggers

modules/loggers/.indent.pro

modules/loggers/Makefile.in

modules/loggers/NWGNUforensic

modules/loggers/NWGNUmakefile

modules/loggers/NWGNUmodlogio

modules/loggers/config.m4

modules/loggers/mod_log_config.c

modules/loggers/mod_log_config.dsp

modules/loggers/mod_log_config.exp

modules/loggers/mod_log_config.h

modules/loggers/mod_log_forensic.c

modules/loggers/mod_log_forensic.dsp

modules/loggers/mod_log_forensic.exp

modules/loggers/mod_logio.c

modules/loggers/mod_logio.dsp

modules/mappers

modules/mappers/.indent.pro

modules/mappers/Makefile.in

modules/mappers/NWGNUactions

modules/mappers/NWGNUimagemap

modules/mappers/NWGNUmakefile

modules/mappers/NWGNUrewrite

modules/mappers/NWGNUspeling

modules/mappers/NWGNUuserdir

modules/mappers/NWGNUvhost

modules/mappers/config9.m4

modules/mappers/mod_actions.c

modules/mappers/mod_actions.dsp

modules/mappers/mod_actions.exp

modules/mappers/mod_alias.c

modules/mappers/mod_alias.dsp

modules/mappers/mod_alias.exp

modules/mappers/mod_dir.c

modules/mappers/mod_dir.dsp

modules/mappers/mod_dir.exp

modules/mappers/mod_imagemap.c

modules/mappers/mod_imagemap.dsp

modules/mappers/mod_imagemap.exp

modules/mappers/mod_negotiation.c

modules/mappers/mod_negotiation.dsp

modules/mappers/mod_negotiation.exp

modules/mappers/mod_rewrite.c

modules/mappers/mod_rewrite.dsp

modules/mappers/mod_rewrite.exp

modules/mappers/mod_rewrite.h

modules/mappers/mod_so.c

modules/mappers/mod_so.h

modules/mappers/mod_speling.c

modules/mappers/mod_speling.dsp

modules/mappers/mod_speling.exp

modules/mappers/mod_userdir.c

modules/mappers/mod_userdir.dsp

modules/mappers/mod_userdir.exp

modules/mappers/mod_vhost_alias.c

modules/mappers/mod_vhost_alias.dsp

modules/mappers/mod_vhost_alias.exp

modules/metadata

modules/metadata/.indent.pro

modules/metadata/Makefile.in

modules/metadata/NWGNUcernmeta

modules/metadata/NWGNUexpires

modules/metadata/NWGNUheaders

modules/metadata/NWGNUmakefile

modules/metadata/NWGNUmimemagi

modules/metadata/NWGNUmodident

modules/metadata/NWGNUmodversion

modules/metadata/NWGNUuniqueid

modules/metadata/NWGNUusertrk

modules/metadata/config.m4

modules/metadata/mod_cern_meta.c

modules/metadata/mod_cern_meta.dsp

modules/metadata/mod_cern_meta.exp

modules/metadata/mod_env.c

modules/metadata/mod_env.dsp

modules/metadata/mod_env.exp

modules/metadata/mod_expires.c

modules/metadata/mod_expires.dsp

modules/metadata/mod_expires.exp

modules/metadata/mod_headers.c

modules/metadata/mod_headers.dsp

modules/metadata/mod_headers.exp

modules/metadata/mod_ident.c

modules/metadata/mod_ident.dsp

modules/metadata/mod_ident.exp

modules/metadata/mod_mime_magic.c

modules/metadata/mod_mime_magic.dsp

modules/metadata/mod_mime_magic.exp

modules/metadata/mod_setenvif.c

modules/metadata/mod_setenvif.dsp

modules/metadata/mod_setenvif.exp

modules/metadata/mod_unique_id.c

modules/metadata/mod_unique_id.dsp

modules/metadata/mod_unique_id.exp

modules/metadata/mod_usertrack.c

modules/metadata/mod_usertrack.dsp

modules/metadata/mod_usertrack.exp

modules/metadata/mod_version.c

modules/metadata/mod_version.dsp

modules/metadata/mod_version.exp

modules/proxy

modules/proxy/.indent.pro

modules/proxy/CHANGES

modules/proxy/Makefile.in

modules/proxy/NWGNUmakefile

modules/proxy/NWGNUproxy

modules/proxy/NWGNUproxyajp

modules/proxy/NWGNUproxybalancer

modules/proxy/NWGNUproxycon

modules/proxy/NWGNUproxyftp

modules/proxy/NWGNUproxyhtp

modules/proxy/ajp.h

modules/proxy/ajp_header.c

modules/proxy/ajp_header.h

modules/proxy/ajp_link.c

modules/proxy/ajp_msg.c

modules/proxy/config.m4

modules/proxy/libproxy.exp

modules/proxy/mod_proxy.c

modules/proxy/mod_proxy.dsp

modules/proxy/mod_proxy.h

modules/proxy/mod_proxy_ajp.c

modules/proxy/mod_proxy_ajp.dsp

modules/proxy/mod_proxy_balancer.c

modules/proxy/mod_proxy_balancer.dsp

modules/proxy/mod_proxy_connect.c

modules/proxy/mod_proxy_connect.dsp

modules/proxy/mod_proxy_ftp.c

modules/proxy/mod_proxy_ftp.dsp

modules/proxy/mod_proxy_http.c

modules/proxy/mod_proxy_http.dsp

modules/proxy/proxy_util.c

modules/ssl

modules/ssl/Makefile.in

modules/ssl/NWGNUmakefile

modules/ssl/README

modules/ssl/README.dsov.fig

modules/ssl/README.dsov.ps

modules/ssl/config.m4

modules/ssl/mod_ssl.c

modules/ssl/mod_ssl.dsp

modules/ssl/mod_ssl.h

modules/ssl/ssl_engine_config.c

modules/ssl/ssl_engine_dh.c

modules/ssl/ssl_engine_init.c

modules/ssl/ssl_engine_io.c

modules/ssl/ssl_engine_kernel.c

modules/ssl/ssl_engine_log.c

modules/ssl/ssl_engine_mutex.c

modules/ssl/ssl_engine_pphrase.c

modules/ssl/ssl_engine_rand.c

modules/ssl/ssl_engine_vars.c

modules/ssl/ssl_expr.c

modules/ssl/ssl_expr.h

modules/ssl/ssl_expr_eval.c

modules/ssl/ssl_expr_parse.c

modules/ssl/ssl_expr_parse.h

modules/ssl/ssl_expr_parse.y

modules/ssl/ssl_expr_scan.c

modules/ssl/ssl_expr_scan.l

modules/ssl/ssl_private.h

modules/ssl/ssl_scache.c

modules/ssl/ssl_scache_dbm.c

modules/ssl/ssl_scache_dc.c

modules/ssl/ssl_scache_shmcb.c

modules/ssl/ssl_toolkit_compat.h

modules/ssl/ssl_util.c

modules/ssl/ssl_util_ssl.c

modules/ssl/ssl_util_ssl.h

modules/test

modules/test/.indent.pro

modules/test/Makefile.in

modules/test/README

modules/test/config.m4

modules/test/mod_optional_fn_export.c

modules/test/mod_optional_fn_export.h

modules/test/mod_optional_fn_import.c

modules/test/mod_optional_hook_export.c

modules/test/mod_optional_hook_export.h

modules/test/mod_optional_hook_import.c

os/.indent.pro

os/Makefile.in

os/beos

os/beos/Makefile.in

os/beos/beosd.c

os/beos/beosd.h

os/beos/config.m4

os/beos/os.c

os/beos/os.h

os/bs2000

os/bs2000/ebcdic.c

os/bs2000/ebcdic.h

os/bs2000/os.c

os/bs2000/os.h

os/config.m4

os/netware

os/netware/Apache.def

os/netware/apache.xdc

os/netware/modules.c

os/netware/os.h

os/netware/pre_nw.h

os/netware/util_nw.c

os/os2

os/os2/Makefile.in

os/os2/config.m4

os/os2/core.mk

os/os2/core_header.def

os/os2/os.h

os/os2/util_os2.c

os/tpf

os/tpf/TPFExport

os/tpf/ebcdic.c

os/tpf/ebcdic.h

os/tpf/os.c

os/tpf/os.h

os/tpf/samples

os/tpf/samples/linkdll.jcl

os/tpf/samples/loadset.jcl

os/unix

os/unix/Makefile.in

os/unix/config.m4

os/unix/os.h

os/unix/unixd.c

os/unix/unixd.h

os/win32

os/win32/BaseAddr.ref

os/win32/ap_regkey.c

os/win32/modules.c

os/win32/os.h

os/win32/util_win32.c

server

server/.indent.pro

server/Makefile.in

server/NWGNUmakefile

server/buildmark.c

server/config.c

server/config.m4

server/connection.c

server/core.c

server/core_filters.c

server/eoc_bucket.c

server/error_bucket.c

server/gen_test_char.c

server/gen_test_char.dsp

server/listen.c

server/log.c

server/main.c

server/mpm

server/mpm/MPM.NAMING

server/mpm/Makefile.in

server/mpm/beos

server/mpm/beos/Makefile.in

server/mpm/beos/beos.c

server/mpm/beos/beos.h

server/mpm/beos/config5.m4

server/mpm/beos/mpm.h

server/mpm/beos/mpm_default.h

server/mpm/config.m4

server/mpm/experimental

server/mpm/experimental/event

server/mpm/experimental/event/Makefile.in

server/mpm/experimental/event/config5.m4

server/mpm/experimental/event/event.c

server/mpm/experimental/event/fdqueue.c

server/mpm/experimental/event/fdqueue.h

server/mpm/experimental/event/mpm.h

server/mpm/experimental/event/mpm_default.h

server/mpm/experimental/event/pod.c

server/mpm/experimental/event/pod.h

server/mpm/mpmt_os2

server/mpm/mpmt_os2/Makefile.in

server/mpm/mpmt_os2/config5.m4

server/mpm/mpmt_os2/mpm.h

server/mpm/mpmt_os2/mpm_default.h

server/mpm/mpmt_os2/mpmt_os2.c

server/mpm/mpmt_os2/mpmt_os2_child.c

server/mpm/netware

server/mpm/netware/mpm.h

server/mpm/netware/mpm_default.h

server/mpm/netware/mpm_netware.c

server/mpm/prefork

server/mpm/prefork/Makefile.in

server/mpm/prefork/config.m4

server/mpm/prefork/mpm.h

server/mpm/prefork/mpm_default.h

server/mpm/prefork/prefork.c

server/mpm/winnt

server/mpm/winnt/Win9xConHook.c

server/mpm/winnt/Win9xConHook.def

server/mpm/winnt/Win9xConHook.dsp

server/mpm/winnt/Win9xConHook.h

server/mpm/winnt/child.c

server/mpm/winnt/mpm.h

server/mpm/winnt/mpm_default.h

server/mpm/winnt/mpm_winnt.c

server/mpm/winnt/mpm_winnt.h

server/mpm/winnt/nt_eventlog.c

server/mpm/winnt/service.c

server/mpm/worker

server/mpm/worker/Makefile.in

server/mpm/worker/config5.m4

server/mpm/worker/fdqueue.c

server/mpm/worker/fdqueue.h

server/mpm/worker/mpm.h

server/mpm/worker/mpm_default.h

server/mpm/worker/pod.c

server/mpm/worker/pod.h

server/mpm/worker/worker.c

server/mpm_common.c

server/protocol.c

server/provider.c

server/request.c

server/scoreboard.c

server/util.c

server/util_cfgtree.c

server/util_charset.c

server/util_debug.c

server/util_ebcdic.c

server/util_filter.c

server/util_md5.c

server/util_pcre.c

server/util_script.c

server/util_time.c

server/util_xml.c

server/vhost.c

srclib

srclib/Makefile.in

srclib/apr

srclib/apr-util

srclib/apr-util/CHANGES

srclib/apr-util/INSTALL.MySQL

srclib/apr-util/LICENSE

srclib/apr-util/Makefile.in

srclib/apr-util/NOTICE

srclib/apr-util/NWGNUmakefile

srclib/apr-util/apr-util.pc.in

srclib/apr-util/aprutil.dsp

srclib/apr-util/aprutil.dsw

srclib/apr-util/apu-config.in

srclib/apr-util/buckets

srclib/apr-util/buckets/apr_brigade.c

srclib/apr-util/buckets/apr_buckets.c

srclib/apr-util/buckets/apr_buckets_alloc.c

srclib/apr-util/buckets/apr_buckets_eos.c

srclib/apr-util/buckets/apr_buckets_file.c

srclib/apr-util/buckets/apr_buckets_flush.c

srclib/apr-util/buckets/apr_buckets_heap.c

srclib/apr-util/buckets/apr_buckets_mmap.c

srclib/apr-util/buckets/apr_buckets_pipe.c

srclib/apr-util/buckets/apr_buckets_pool.c

srclib/apr-util/buckets/apr_buckets_refcount.c

srclib/apr-util/buckets/apr_buckets_simple.c

srclib/apr-util/buckets/apr_buckets_socket.c

srclib/apr-util/build

srclib/apr-util/build-outputs.mk

srclib/apr-util/build.conf

srclib/apr-util/build/apr_common.m4

srclib/apr-util/build/apu-conf.m4

srclib/apr-util/build/apu-hints.m4

srclib/apr-util/build/apu-iconv.m4

srclib/apr-util/build/config.guess

srclib/apr-util/build/config.sub

srclib/apr-util/build/dbd.m4

srclib/apr-util/build/dbm.m4

srclib/apr-util/build/find_apr.m4

srclib/apr-util/build/find_apu.m4

srclib/apr-util/build/get-version.sh

srclib/apr-util/build/install.sh

srclib/apr-util/build/mkdir.sh

srclib/apr-util/build/pkg

srclib/apr-util/build/pkg/README

srclib/apr-util/build/pkg/buildpkg.sh

srclib/apr-util/build/pkg/pkginfo.in

srclib/apr-util/build/rpm

srclib/apr-util/build/rpm/apr-util.spec.in

srclib/apr-util/build/w32locatedb.pl

srclib/apr-util/buildconf

srclib/apr-util/config.layout

srclib/apr-util/configure

srclib/apr-util/configure.in

srclib/apr-util/crypto

srclib/apr-util/crypto/apr_md4.c

srclib/apr-util/crypto/apr_md5.c

srclib/apr-util/crypto/apr_sha1.c

srclib/apr-util/crypto/getuuid.c

srclib/apr-util/crypto/uuid.c

srclib/apr-util/dbd

srclib/apr-util/dbd/apr_dbd.c

srclib/apr-util/dbd/apr_dbd_pgsql.c

srclib/apr-util/dbd/apr_dbd_sqlite2.c

srclib/apr-util/dbd/apr_dbd_sqlite3.c

srclib/apr-util/dbm

srclib/apr-util/dbm/apr_dbm.c

srclib/apr-util/dbm/apr_dbm_berkeleydb.c

srclib/apr-util/dbm/apr_dbm_gdbm.c

srclib/apr-util/dbm/apr_dbm_ndbm.c

srclib/apr-util/dbm/apr_dbm_sdbm.c

srclib/apr-util/dbm/sdbm

srclib/apr-util/dbm/sdbm/sdbm.c

srclib/apr-util/dbm/sdbm/sdbm_hash.c

srclib/apr-util/dbm/sdbm/sdbm_lock.c

srclib/apr-util/dbm/sdbm/sdbm_pair.c

srclib/apr-util/dbm/sdbm/sdbm_pair.h

srclib/apr-util/dbm/sdbm/sdbm_private.h

srclib/apr-util/dbm/sdbm/sdbm_tune.h

srclib/apr-util/docs

srclib/apr-util/docs/doxygen.conf

srclib/apr-util/encoding

srclib/apr-util/encoding/apr_base64.c

srclib/apr-util/export_vars.sh.in

srclib/apr-util/hooks

srclib/apr-util/hooks/apr_hooks.c

srclib/apr-util/include

srclib/apr-util/include/apr_anylock.h

srclib/apr-util/include/apr_base64.h

srclib/apr-util/include/apr_buckets.h

srclib/apr-util/include/apr_date.h

srclib/apr-util/include/apr_dbd.h

srclib/apr-util/include/apr_dbm.h

srclib/apr-util/include/apr_hooks.h

srclib/apr-util/include/apr_ldap.h.in

srclib/apr-util/include/apr_ldap.hnw

srclib/apr-util/include/apr_ldap.hw

srclib/apr-util/include/apr_ldap_init.h

srclib/apr-util/include/apr_ldap_option.h

srclib/apr-util/include/apr_ldap_url.h

srclib/apr-util/include/apr_md4.h

srclib/apr-util/include/apr_md5.h

srclib/apr-util/include/apr_optional.h

srclib/apr-util/include/apr_optional_hooks.h

srclib/apr-util/include/apr_queue.h

srclib/apr-util/include/apr_reslist.h

srclib/apr-util/include/apr_rmm.h

srclib/apr-util/include/apr_sdbm.h

srclib/apr-util/include/apr_sha1.h

srclib/apr-util/include/apr_strmatch.h

srclib/apr-util/include/apr_uri.h

srclib/apr-util/include/apr_uuid.h

srclib/apr-util/include/apr_xlate.h

srclib/apr-util/include/apr_xml.h

srclib/apr-util/include/apu.h.in

srclib/apr-util/include/apu.hnw

srclib/apr-util/include/apu.hw

srclib/apr-util/include/apu_version.h

srclib/apr-util/include/apu_want.h.in

srclib/apr-util/include/apu_want.hnw

srclib/apr-util/include/apu_want.hw

srclib/apr-util/include/private

srclib/apr-util/include/private/apr_dbd_internal.h

srclib/apr-util/include/private/apr_dbm_private.h

srclib/apr-util/include/private/apu_config.h.in

srclib/apr-util/include/private/apu_config.hw

srclib/apr-util/include/private/apu_select_dbm.h.in

srclib/apr-util/include/private/apu_select_dbm.hw

srclib/apr-util/ldap

srclib/apr-util/ldap/NWGNUmakefile

srclib/apr-util/ldap/apr_ldap_init.c

srclib/apr-util/ldap/apr_ldap_option.c

srclib/apr-util/ldap/apr_ldap_url.c

srclib/apr-util/libaprutil.dsp

srclib/apr-util/libaprutil.rc

srclib/apr-util/misc

srclib/apr-util/misc/apr_date.c

srclib/apr-util/misc/apr_queue.c

srclib/apr-util/misc/apr_reslist.c

srclib/apr-util/misc/apr_rmm.c

srclib/apr-util/misc/apu_version.c

srclib/apr-util/renames_pending

srclib/apr-util/strmatch

srclib/apr-util/strmatch/apr_strmatch.c

srclib/apr-util/test

srclib/apr-util/test/Makefile.in

srclib/apr-util/test/Makefile.win

srclib/apr-util/test/NWGNUmakefile

srclib/apr-util/test/abts.c

srclib/apr-util/test/abts.h

srclib/apr-util/test/abts_tests.h

srclib/apr-util/test/data

srclib/apr-util/test/dbd.c

srclib/apr-util/test/nw_misc.c

srclib/apr-util/test/nwgnuaputest

srclib/apr-util/test/test_apu.h

srclib/apr-util/test/testbuckets.c

srclib/apr-util/test/testdate.c

srclib/apr-util/test/testdbd.c

srclib/apr-util/test/testdbm.c

srclib/apr-util/test/testldap.c

srclib/apr-util/test/testmd4.c

srclib/apr-util/test/testmd5.c

srclib/apr-util/test/testpass.c

srclib/apr-util/test/testqueue.c

srclib/apr-util/test/testreslist.c

srclib/apr-util/test/testrmm.c

srclib/apr-util/test/teststrmatch.c

srclib/apr-util/test/testuri.c

srclib/apr-util/test/testutil.c

srclib/apr-util/test/testutil.h

srclib/apr-util/test/testuuid.c

srclib/apr-util/test/testxlate.c

srclib/apr-util/test/testxml.c

srclib/apr-util/uri

srclib/apr-util/uri/NWGNUmakefile

srclib/apr-util/uri/apr_uri.c

srclib/apr-util/xlate

srclib/apr-util/xlate/xlate.c

srclib/apr-util/xml

srclib/apr-util/xml/NWGNUmakefile

srclib/apr-util/xml/apr_xml.c

srclib/apr-util/xml/expat

srclib/apr-util/xml/expat/COPYING

srclib/apr-util/xml/expat/Makefile.in

srclib/apr-util/xml/expat/README

srclib/apr-util/xml/expat/acconfig.h

srclib/apr-util/xml/expat/aclocal.m4

srclib/apr-util/xml/expat/buildconf.sh

srclib/apr-util/xml/expat/config.h.in

srclib/apr-util/xml/expat/configure

srclib/apr-util/xml/expat/configure.in

srclib/apr-util/xml/expat/conftools

srclib/apr-util/xml/expat/conftools/PrintPath

srclib/apr-util/xml/expat/conftools/config.guess

srclib/apr-util/xml/expat/conftools/config.sub

srclib/apr-util/xml/expat/conftools/install-sh

srclib/apr-util/xml/expat/conftools/ltmain.sh

srclib/apr-util/xml/expat/conftools/missing

srclib/apr-util/xml/expat/conftools/mkinstalldirs

srclib/apr-util/xml/expat/lib

srclib/apr-util/xml/expat/lib/Makefile.in

srclib/apr-util/xml/expat/lib/ascii.h

srclib/apr-util/xml/expat/lib/asciitab.h

srclib/apr-util/xml/expat/lib/config.hnw

srclib/apr-util/xml/expat/lib/expat.dsp

srclib/apr-util/xml/expat/lib/expat.h.in

srclib/apr-util/xml/expat/lib/iasciitab.h

srclib/apr-util/xml/expat/lib/latin1tab.h

srclib/apr-util/xml/expat/lib/map_osd_ebcdic_df04_1.h

srclib/apr-util/xml/expat/lib/nametab.h

srclib/apr-util/xml/expat/lib/osd_ebcdic_df04_1.h

srclib/apr-util/xml/expat/lib/utf8tab.h

srclib/apr-util/xml/expat/lib/winconfig.h

srclib/apr-util/xml/expat/lib/xml.dsp

srclib/apr-util/xml/expat/lib/xmlparse.c

srclib/apr-util/xml/expat/lib/xmlrole.c

srclib/apr-util/xml/expat/lib/xmlrole.h

srclib/apr-util/xml/expat/lib/xmltok.c

srclib/apr-util/xml/expat/lib/xmltok.h

srclib/apr-util/xml/expat/lib/xmltok_impl.c

srclib/apr-util/xml/expat/lib/xmltok_impl.h

srclib/apr-util/xml/expat/lib/xmltok_ns.c

srclib/apr/CHANGES

srclib/apr/LICENSE

srclib/apr/Makefile.in

srclib/apr/NOTICE

srclib/apr/NWGNUmakefile

srclib/apr/README.dev

srclib/apr/apr-config.in

srclib/apr/apr.dsp

srclib/apr/apr.dsw

srclib/apr/apr.pc.in

srclib/apr/atomic

srclib/apr/atomic/netware

srclib/apr/atomic/netware/apr_atomic.c

srclib/apr/atomic/os390

srclib/apr/atomic/os390/atomic.c

srclib/apr/atomic/unix

srclib/apr/atomic/unix/apr_atomic.c

srclib/apr/atomic/win32

srclib/apr/atomic/win32/apr_atomic.c

srclib/apr/build

srclib/apr/build-outputs.mk

srclib/apr/build.conf

srclib/apr/build/MakeEtags

srclib/apr/build/NWGNUenvironment.inc

srclib/apr/build/NWGNUhead.inc

srclib/apr/build/NWGNUmakefile

srclib/apr/build/NWGNUtail.inc

srclib/apr/build/PrintPath

srclib/apr/build/aplibtool.c

srclib/apr/build/apr_app.dsp

srclib/apr/build/apr_common.m4

srclib/apr/build/apr_hints.m4

srclib/apr/build/apr_network.m4

srclib/apr/build/apr_rules.mk.in

srclib/apr/build/apr_threads.m4

srclib/apr/build/buildcheck.sh

srclib/apr/build/config.guess

srclib/apr/build/config.sub

srclib/apr/build/cvtdsp.pl

srclib/apr/build/find_apr.m4

srclib/apr/build/fixwin32mak.pl

srclib/apr/build/gen-build.py

srclib/apr/build/get-version.sh

srclib/apr/build/install.sh

srclib/apr/build/jlibtool.c

srclib/apr/build/libapr_app.dsp

srclib/apr/build/libtool.m4

srclib/apr/build/lineends.pl

srclib/apr/build/ltmain.sh

srclib/apr/build/make_exports.awk

srclib/apr/build/make_nw_export.awk

srclib/apr/build/make_var_export.awk

srclib/apr/build/mkdir.sh

srclib/apr/build/nw_export.inc

srclib/apr/build/nw_ver.awk

srclib/apr/build/pkg

srclib/apr/build/pkg/README

srclib/apr/build/pkg/buildpkg.sh

srclib/apr/build/pkg/pkginfo.in

srclib/apr/build/prebuildNW.bat

srclib/apr/build/rpm

srclib/apr/build/rpm/apr.spec.in

srclib/apr/build/run-gcov.sh

srclib/apr/build/win32ver.awk

srclib/apr/buildconf

srclib/apr/config.layout

srclib/apr/config.log

srclib/apr/config.nice

srclib/apr/configure

srclib/apr/configure.in

srclib/apr/docs

srclib/apr/docs/APRDesign.html

srclib/apr/docs/canonical_filenames.html

srclib/apr/docs/doxygen.conf

srclib/apr/docs/incomplete_types

srclib/apr/docs/non_apr_programs

srclib/apr/docs/pool-design.html

srclib/apr/docs/win32_builds.html

srclib/apr/dso

srclib/apr/dso/aix

srclib/apr/dso/aix/dso.c

srclib/apr/dso/beos

srclib/apr/dso/beos/dso.c

srclib/apr/dso/netware

srclib/apr/dso/netware/dso.c

srclib/apr/dso/os2

srclib/apr/dso/os2/dso.c

srclib/apr/dso/os390

srclib/apr/dso/os390/dso.c

srclib/apr/dso/unix

srclib/apr/dso/unix/dso.c

srclib/apr/dso/win32

srclib/apr/dso/win32/dso.c

srclib/apr/emacs-mode

srclib/apr/file_io

srclib/apr/file_io/netware

srclib/apr/file_io/netware/filepath.c

srclib/apr/file_io/netware/filestat.c

srclib/apr/file_io/netware/filesys.c

srclib/apr/file_io/netware/flock.c

srclib/apr/file_io/netware/mktemp.c

srclib/apr/file_io/netware/pipe.c

srclib/apr/file_io/os2

srclib/apr/file_io/os2/copy.c

srclib/apr/file_io/os2/dir.c

srclib/apr/file_io/os2/dir_make_recurse.c

srclib/apr/file_io/os2/fileacc.c

srclib/apr/file_io/os2/filedup.c

srclib/apr/file_io/os2/filepath.c

srclib/apr/file_io/os2/filepath_util.c

srclib/apr/file_io/os2/filestat.c

srclib/apr/file_io/os2/filesys.c

srclib/apr/file_io/os2/flock.c

srclib/apr/file_io/os2/fullrw.c

srclib/apr/file_io/os2/maperrorcode.c

srclib/apr/file_io/os2/mktemp.c

srclib/apr/file_io/os2/open.c

srclib/apr/file_io/os2/pipe.c

srclib/apr/file_io/os2/readwrite.c

srclib/apr/file_io/os2/seek.c

srclib/apr/file_io/os2/tempdir.c

srclib/apr/file_io/unix

srclib/apr/file_io/unix/copy.c

srclib/apr/file_io/unix/dir.c

srclib/apr/file_io/unix/fileacc.c

srclib/apr/file_io/unix/filedup.c

srclib/apr/file_io/unix/filepath.c

srclib/apr/file_io/unix/filepath_util.c

srclib/apr/file_io/unix/filestat.c

srclib/apr/file_io/unix/flock.c

srclib/apr/file_io/unix/fullrw.c

srclib/apr/file_io/unix/mktemp.c

srclib/apr/file_io/unix/open.c

srclib/apr/file_io/unix/pipe.c

srclib/apr/file_io/unix/readwrite.c

srclib/apr/file_io/unix/seek.c

srclib/apr/file_io/unix/tempdir.c

srclib/apr/file_io/win32

srclib/apr/file_io/win32/dir.c

srclib/apr/file_io/win32/filedup.c

srclib/apr/file_io/win32/filepath.c

srclib/apr/file_io/win32/filestat.c

srclib/apr/file_io/win32/filesys.c

srclib/apr/file_io/win32/flock.c

srclib/apr/file_io/win32/open.c

srclib/apr/file_io/win32/pipe.c

srclib/apr/file_io/win32/readwrite.c

srclib/apr/file_io/win32/seek.c

srclib/apr/helpers

srclib/apr/helpers/apr_rename.pl

srclib/apr/images

srclib/apr/images/ScanDocBig.jpg

srclib/apr/images/ScanDocSmall.jpg

srclib/apr/images/ball1.gif

srclib/apr/images/ball1.png

srclib/apr/images/ball2.gif

srclib/apr/images/ball2.png

srclib/apr/images/bug.gif

srclib/apr/images/bug.png

srclib/apr/images/caution.gif

srclib/apr/images/caution.png

srclib/apr/images/master.gif

srclib/apr/images/master.png

srclib/apr/images/tip.gif

srclib/apr/images/tip.png

srclib/apr/images/warning.gif

srclib/apr/images/warning.png

srclib/apr/include

srclib/apr/include/apr.h.in

srclib/apr/include/apr.hnw

srclib/apr/include/apr.hw

srclib/apr/include/apr_allocator.h

srclib/apr/include/apr_atomic.h

srclib/apr/include/apr_dso.h

srclib/apr/include/apr_env.h

srclib/apr/include/apr_errno.h

srclib/apr/include/apr_file_info.h

srclib/apr/include/apr_file_io.h

srclib/apr/include/apr_fnmatch.h

srclib/apr/include/apr_general.h

srclib/apr/include/apr_getopt.h

srclib/apr/include/apr_global_mutex.h

srclib/apr/include/apr_hash.h

srclib/apr/include/apr_inherit.h

srclib/apr/include/apr_lib.h

srclib/apr/include/apr_mmap.h

srclib/apr/include/apr_network_io.h

srclib/apr/include/apr_poll.h

srclib/apr/include/apr_pools.h

srclib/apr/include/apr_portable.h

srclib/apr/include/apr_proc_mutex.h

srclib/apr/include/apr_random.h

srclib/apr/include/apr_ring.h

srclib/apr/include/apr_shm.h

srclib/apr/include/apr_signal.h

srclib/apr/include/apr_strings.h

srclib/apr/include/apr_support.h

srclib/apr/include/apr_tables.h

srclib/apr/include/apr_thread_cond.h

srclib/apr/include/apr_thread_mutex.h

srclib/apr/include/apr_thread_proc.h

srclib/apr/include/apr_thread_rwlock.h

srclib/apr/include/apr_time.h

srclib/apr/include/apr_user.h

srclib/apr/include/apr_version.h

srclib/apr/include/apr_want.h

srclib/apr/include/arch

srclib/apr/include/arch/aix

srclib/apr/include/arch/aix/apr_arch_dso.h

srclib/apr/include/arch/apr_private_common.h

srclib/apr/include/arch/beos

srclib/apr/include/arch/beos/apr_arch_dso.h

srclib/apr/include/arch/beos/apr_arch_proc_mutex.h

srclib/apr/include/arch/beos/apr_arch_thread_cond.h

srclib/apr/include/arch/beos/apr_arch_thread_mutex.h

srclib/apr/include/arch/beos/apr_arch_thread_rwlock.h

srclib/apr/include/arch/beos/apr_arch_threadproc.h

srclib/apr/include/arch/netware

srclib/apr/include/arch/netware/apr_arch_dso.h

srclib/apr/include/arch/netware/apr_arch_file_io.h

srclib/apr/include/arch/netware/apr_arch_global_mutex.h

srclib/apr/include/arch/netware/apr_arch_internal_time.h

srclib/apr/include/arch/netware/apr_arch_networkio.h

srclib/apr/include/arch/netware/apr_arch_pre_nw.h

srclib/apr/include/arch/netware/apr_arch_proc_mutex.h

srclib/apr/include/arch/netware/apr_arch_thread_cond.h

srclib/apr/include/arch/netware/apr_arch_thread_mutex.h

srclib/apr/include/arch/netware/apr_arch_thread_rwlock.h

srclib/apr/include/arch/netware/apr_arch_threadproc.h

srclib/apr/include/arch/netware/apr_private.h

srclib/apr/include/arch/os2

srclib/apr/include/arch/os2/apr_arch_dso.h

srclib/apr/include/arch/os2/apr_arch_file_io.h

srclib/apr/include/arch/os2/apr_arch_networkio.h

srclib/apr/include/arch/os2/apr_arch_os2calls.h

srclib/apr/include/arch/os2/apr_arch_proc_mutex.h

srclib/apr/include/arch/os2/apr_arch_thread_cond.h

srclib/apr/include/arch/os2/apr_arch_thread_mutex.h

srclib/apr/include/arch/os2/apr_arch_thread_rwlock.h

srclib/apr/include/arch/os2/apr_arch_threadproc.h

srclib/apr/include/arch/os390

srclib/apr/include/arch/os390/apr_arch_dso.h

srclib/apr/include/arch/unix

srclib/apr/include/arch/unix/apr_arch_dso.h

srclib/apr/include/arch/unix/apr_arch_file_io.h

srclib/apr/include/arch/unix/apr_arch_global_mutex.h

srclib/apr/include/arch/unix/apr_arch_inherit.h

srclib/apr/include/arch/unix/apr_arch_internal_time.h

srclib/apr/include/arch/unix/apr_arch_misc.h

srclib/apr/include/arch/unix/apr_arch_networkio.h

srclib/apr/include/arch/unix/apr_arch_poll_private.h

srclib/apr/include/arch/unix/apr_arch_proc_mutex.h

srclib/apr/include/arch/unix/apr_arch_shm.h

srclib/apr/include/arch/unix/apr_arch_thread_cond.h

srclib/apr/include/arch/unix/apr_arch_thread_mutex.h

srclib/apr/include/arch/unix/apr_arch_thread_rwlock.h

srclib/apr/include/arch/unix/apr_arch_threadproc.h

srclib/apr/include/arch/unix/apr_private.h.in

srclib/apr/include/arch/win32

srclib/apr/include/arch/win32/apr_arch_atime.h

srclib/apr/include/arch/win32/apr_arch_dso.h

srclib/apr/include/arch/win32/apr_arch_file_io.h

srclib/apr/include/arch/win32/apr_arch_inherit.h

srclib/apr/include/arch/win32/apr_arch_misc.h

srclib/apr/include/arch/win32/apr_arch_networkio.h

srclib/apr/include/arch/win32/apr_arch_proc_mutex.h

srclib/apr/include/arch/win32/apr_arch_thread_cond.h

srclib/apr/include/arch/win32/apr_arch_thread_mutex.h

srclib/apr/include/arch/win32/apr_arch_thread_rwlock.h

srclib/apr/include/arch/win32/apr_arch_threadproc.h

srclib/apr/include/arch/win32/apr_arch_utf8.h

srclib/apr/include/arch/win32/apr_dbg_win32_handles.h

srclib/apr/include/arch/win32/apr_private.h

srclib/apr/libapr.dsp

srclib/apr/libapr.rc

srclib/apr/libaprnw.mcp.zip

srclib/apr/locks

srclib/apr/locks/beos

srclib/apr/locks/beos/proc_mutex.c

srclib/apr/locks/beos/thread_cond.c

srclib/apr/locks/beos/thread_mutex.c

srclib/apr/locks/beos/thread_rwlock.c

srclib/apr/locks/netware

srclib/apr/locks/netware/proc_mutex.c

srclib/apr/locks/netware/thread_cond.c

srclib/apr/locks/netware/thread_mutex.c

srclib/apr/locks/netware/thread_rwlock.c

srclib/apr/locks/os2

srclib/apr/locks/os2/proc_mutex.c

srclib/apr/locks/os2/thread_cond.c

srclib/apr/locks/os2/thread_mutex.c

srclib/apr/locks/os2/thread_rwlock.c

srclib/apr/locks/unix

srclib/apr/locks/unix/global_mutex.c

srclib/apr/locks/unix/proc_mutex.c

srclib/apr/locks/unix/thread_cond.c

srclib/apr/locks/unix/thread_mutex.c

srclib/apr/locks/unix/thread_rwlock.c

srclib/apr/locks/win32

srclib/apr/locks/win32/proc_mutex.c

srclib/apr/locks/win32/thread_cond.c

srclib/apr/locks/win32/thread_mutex.c

srclib/apr/locks/win32/thread_rwlock.c

srclib/apr/memory

srclib/apr/memory/unix

srclib/apr/memory/unix/apr_pools.c

srclib/apr/misc

srclib/apr/misc/netware

srclib/apr/misc/netware/apr.xdc

srclib/apr/misc/netware/aprlib.def

srclib/apr/misc/netware/charset.c

srclib/apr/misc/netware/libprews.c

srclib/apr/misc/netware/rand.c

srclib/apr/misc/netware/start.c

srclib/apr/misc/unix

srclib/apr/misc/unix/charset.c

srclib/apr/misc/unix/env.c

srclib/apr/misc/unix/errorcodes.c

srclib/apr/misc/unix/getopt.c

srclib/apr/misc/unix/otherchild.c

srclib/apr/misc/unix/rand.c

srclib/apr/misc/unix/randbyte_os2.inc

srclib/apr/misc/unix/start.c

srclib/apr/misc/unix/version.c

srclib/apr/misc/win32

srclib/apr/misc/win32/apr_app.c

srclib/apr/misc/win32/charset.c

srclib/apr/misc/win32/env.c

srclib/apr/misc/win32/internal.c

srclib/apr/misc/win32/misc.c

srclib/apr/misc/win32/rand.c

srclib/apr/misc/win32/start.c

srclib/apr/misc/win32/utf8.c

srclib/apr/mmap

srclib/apr/mmap/unix

srclib/apr/mmap/unix/common.c

srclib/apr/mmap/unix/mmap.c

srclib/apr/mmap/win32

srclib/apr/mmap/win32/mmap.c

srclib/apr/network_io

srclib/apr/network_io/beos

srclib/apr/network_io/beos/sendrecv.c

srclib/apr/network_io/beos/socketcommon.c

srclib/apr/network_io/os2

srclib/apr/network_io/os2/inet_ntop.c

srclib/apr/network_io/os2/inet_pton.c

srclib/apr/network_io/os2/os2calls.c

srclib/apr/network_io/os2/sendrecv.c

srclib/apr/network_io/os2/sendrecv_udp.c

srclib/apr/network_io/os2/sockaddr.c

srclib/apr/network_io/os2/sockets.c

srclib/apr/network_io/os2/sockopt.c

srclib/apr/network_io/unix

srclib/apr/network_io/unix/inet_ntop.c

srclib/apr/network_io/unix/inet_pton.c

srclib/apr/network_io/unix/multicast.c

srclib/apr/network_io/unix/sendrecv.c

srclib/apr/network_io/unix/sockaddr.c

srclib/apr/network_io/unix/sockets.c

srclib/apr/network_io/unix/sockopt.c

srclib/apr/network_io/win32

srclib/apr/network_io/win32/sendrecv.c

srclib/apr/network_io/win32/sockets.c

srclib/apr/network_io/win32/sockopt.c

srclib/apr/passwd

srclib/apr/passwd/apr_getpass.c

srclib/apr/poll

srclib/apr/poll/os2

srclib/apr/poll/os2/poll.c

srclib/apr/poll/os2/pollset.c

srclib/apr/poll/unix

srclib/apr/poll/unix/epoll.c

srclib/apr/poll/unix/kqueue.c

srclib/apr/poll/unix/poll.c

srclib/apr/poll/unix/port.c

srclib/apr/poll/unix/select.c

srclib/apr/random

srclib/apr/random/unix

srclib/apr/random/unix/apr_random.c

srclib/apr/random/unix/sha2.c

srclib/apr/random/unix/sha2.h

srclib/apr/random/unix/sha2_glue.c

srclib/apr/renames_pending

srclib/apr/shmem

srclib/apr/shmem/beos

srclib/apr/shmem/beos/shm.c

srclib/apr/shmem/os2

srclib/apr/shmem/os2/shm.c

srclib/apr/shmem/unix

srclib/apr/shmem/unix/shm.c

srclib/apr/shmem/win32

srclib/apr/shmem/win32/shm.c

srclib/apr/strings

srclib/apr/strings/apr_cpystrn.c

srclib/apr/strings/apr_fnmatch.c

srclib/apr/strings/apr_snprintf.c

srclib/apr/strings/apr_strings.c

srclib/apr/strings/apr_strnatcmp.c

srclib/apr/strings/apr_strtok.c

srclib/apr/support

srclib/apr/support/unix

srclib/apr/support/unix/waitio.c

srclib/apr/tables

srclib/apr/tables/apr_hash.c

srclib/apr/tables/apr_tables.c

srclib/apr/test

srclib/apr/test/Makefile.in

srclib/apr/test/Makefile.win

srclib/apr/test/NWGNUmakefile

srclib/apr/test/README

srclib/apr/test/abts.c

srclib/apr/test/abts.h

srclib/apr/test/abts_tests.h

srclib/apr/test/aprtest.def

srclib/apr/test/aprtest.dsp

srclib/apr/test/aprtest.win

srclib/apr/test/data

srclib/apr/test/data/file_datafile.txt

srclib/apr/test/data/mmap_datafile.txt

srclib/apr/test/globalmutexchild.c

srclib/apr/test/internal

srclib/apr/test/internal/Makefile.in

srclib/apr/test/internal/testregex.c

srclib/apr/test/internal/testucs.c

srclib/apr/test/mod_test.c

srclib/apr/test/nw_misc.c

srclib/apr/test/nwgnuaprtest

srclib/apr/test/nwgnuglobalmutexchild

srclib/apr/test/nwgnumod_test

srclib/apr/test/nwgnuproc_child

srclib/apr/test/nwgnureadchild

srclib/apr/test/nwgnusockchild

srclib/apr/test/nwgnutestatmc

srclib/apr/test/nwgnutryread

srclib/apr/test/occhild.c

srclib/apr/test/proc_child.c

srclib/apr/test/readchild.c

srclib/apr/test/sendfile.c

srclib/apr/test/sockchild.c

srclib/apr/test/testall.dsp

srclib/apr/test/testall.dsw

srclib/apr/test/testapp.c

srclib/apr/test/testapp.dsp

srclib/apr/test/testappnt.dsp

srclib/apr/test/testargs.c

srclib/apr/test/testatomic.c

srclib/apr/test/testdir.c

srclib/apr/test/testdso.c

srclib/apr/test/testdup.c

srclib/apr/test/testenv.c

srclib/apr/test/testfile.c

srclib/apr/test/testfilecopy.c

srclib/apr/test/testfileinfo.c

srclib/apr/test/testflock.c

srclib/apr/test/testflock.h

srclib/apr/test/testfmt.c

srclib/apr/test/testfnmatch.c

srclib/apr/test/testglobalmutex.c

srclib/apr/test/testglobalmutex.h

srclib/apr/test/testhash.c

srclib/apr/test/testipsub.c

srclib/apr/test/testlfs.c

srclib/apr/test/testlock.c

srclib/apr/test/testlockperf.c

srclib/apr/test/testmmap.c

srclib/apr/test/testmutexscope.c

srclib/apr/test/testnames.c

srclib/apr/test/testoc.c

srclib/apr/test/testpath.c

srclib/apr/test/testpipe.c

srclib/apr/test/testpoll.c

srclib/apr/test/testpools.c

srclib/apr/test/testproc.c

srclib/apr/test/testprocmutex.c

srclib/apr/test/testrand.c

srclib/apr/test/testrand2.c

srclib/apr/test/testshm.c

srclib/apr/test/testshm.h

srclib/apr/test/testshmconsumer.c

srclib/apr/test/testshmproducer.c

srclib/apr/test/testsleep.c

srclib/apr/test/testsock.c

srclib/apr/test/testsock.h

srclib/apr/test/testsockets.c

srclib/apr/test/testsockopt.c

srclib/apr/test/teststr.c

srclib/apr/test/teststrnatcmp.c

srclib/apr/test/testtable.c

srclib/apr/test/testtemp.c

srclib/apr/test/testthread.c

srclib/apr/test/testtime.c

srclib/apr/test/testud.c

srclib/apr/test/testuser.c

srclib/apr/test/testutil.c

srclib/apr/test/testutil.h

srclib/apr/test/testvsn.c

srclib/apr/test/tryread.c

srclib/apr/threadproc

srclib/apr/threadproc/beos

srclib/apr/threadproc/beos/apr_proc_stub.c

srclib/apr/threadproc/beos/proc.c

srclib/apr/threadproc/beos/thread.c

srclib/apr/threadproc/beos/threadpriv.c

srclib/apr/threadproc/beos/threadproc_common.c

srclib/apr/threadproc/netware

srclib/apr/threadproc/netware/proc.c

srclib/apr/threadproc/netware/procsup.c

srclib/apr/threadproc/netware/signals.c

srclib/apr/threadproc/netware/thread.c

srclib/apr/threadproc/netware/threadpriv.c

srclib/apr/threadproc/os2

srclib/apr/threadproc/os2/proc.c

srclib/apr/threadproc/os2/signals.c

srclib/apr/threadproc/os2/thread.c

srclib/apr/threadproc/os2/threadpriv.c

srclib/apr/threadproc/unix

srclib/apr/threadproc/unix/proc.c

srclib/apr/threadproc/unix/procsup.c

srclib/apr/threadproc/unix/signals.c

srclib/apr/threadproc/unix/thread.c

srclib/apr/threadproc/unix/threadpriv.c

srclib/apr/threadproc/win32

srclib/apr/threadproc/win32/proc.c

srclib/apr/threadproc/win32/signals.c

srclib/apr/threadproc/win32/thread.c

srclib/apr/threadproc/win32/threadpriv.c

srclib/apr/time

srclib/apr/time/unix

srclib/apr/time/unix/time.c

srclib/apr/time/unix/timestr.c

srclib/apr/time/win32

srclib/apr/time/win32/access.c

srclib/apr/time/win32/time.c

srclib/apr/time/win32/timestr.c

srclib/apr/user

srclib/apr/user/netware

srclib/apr/user/netware/groupinfo.c

srclib/apr/user/netware/userinfo.c

srclib/apr/user/unix

srclib/apr/user/unix/groupinfo.c

srclib/apr/user/unix/userinfo.c

srclib/apr/user/win32

srclib/apr/user/win32/groupinfo.c

srclib/apr/user/win32/userinfo.c

srclib/pcre

srclib/pcre/AUTHORS

srclib/pcre/COPYING

srclib/pcre/ChangeLog

srclib/pcre/INSTALL

srclib/pcre/LICENCE

srclib/pcre/Makefile.in

srclib/pcre/NEWS

srclib/pcre/NON-UNIX-USE

srclib/pcre/NWGNUmakefile

srclib/pcre/README

srclib/pcre/RunTest.in

srclib/pcre/config.hw

srclib/pcre/config.in

srclib/pcre/configure

srclib/pcre/configure.in

srclib/pcre/dftables.c

srclib/pcre/dftables.dsp

srclib/pcre/dll.mk

srclib/pcre/doc

srclib/pcre/doc/README_httpd

srclib/pcre/get.c

srclib/pcre/install-sh

srclib/pcre/internal.h

srclib/pcre/libpcre.def

srclib/pcre/libpcre.pc.in

srclib/pcre/libpcreposix.def

srclib/pcre/maketables.c

srclib/pcre/makevp.bat

srclib/pcre/mkinstalldirs

srclib/pcre/pcre-config.in

srclib/pcre/pcre.c

srclib/pcre/pcre.def

srclib/pcre/pcre.dsp

srclib/pcre/pcre.hw

srclib/pcre/pcre.in

srclib/pcre/pcredemo.c

srclib/pcre/pcregrep.c

srclib/pcre/pcreposix.c

srclib/pcre/pcreposix.dsp

srclib/pcre/pcretest.c

srclib/pcre/perltest

srclib/pcre/perltest8

srclib/pcre/pgrep.c

srclib/pcre/printint.c

srclib/pcre/study.c

srclib/pcre/testdata

srclib/pcre/testdata/testinput1

srclib/pcre/testdata/testinput2

srclib/pcre/testdata/testinput3

srclib/pcre/testdata/testinput4

srclib/pcre/testdata/testinput5

srclib/pcre/testdata/testinput6

srclib/pcre/testdata/testoutput1

srclib/pcre/testdata/testoutput2

srclib/pcre/testdata/testoutput3

srclib/pcre/testdata/testoutput4

srclib/pcre/testdata/testoutput5

srclib/pcre/testdata/testoutput6

srclib/pcre/ucp.c

srclib/pcre/ucp.h

srclib/pcre/ucpinternal.h

srclib/pcre/ucptable.c

srclib/pcre/ucptypetable.c

support

support/.indent.pro

support/Makefile.in

support/NWGNUab

support/NWGNUhtcacheclean

support/NWGNUhtdbm

support/NWGNUhtdigest

support/NWGNUhtpasswd

support/NWGNUhttxt2dbm

support/NWGNUlogres

support/NWGNUmakefile

support/NWGNUrotlogs

support/README

support/SHA1

support/SHA1/README.sha1

support/SHA1/convert-sha1.pl

support/SHA1/htpasswd-sha1.pl

support/SHA1/ldif-sha1.example

support/ab.c

support/ab.dsp

support/abs.dsp

support/apachectl.in

support/apxs.in

support/check_forensic

support/checkgid.c

support/config.m4

support/dbmmanage.in

support/envvars-std.in

support/htcacheclean.c

support/htcacheclean.dsp

support/htdbm.c

support/htdbm.dsp

support/htdigest.c

support/htdigest.dsp

support/htpasswd.c

support/htpasswd.dsp

support/httxt2dbm.c

support/list_hooks.pl

support/log_server_status.in

support/logresolve.c

support/logresolve.dsp

support/logresolve.pl.in

support/phf_abuse_log.cgi.in

support/rotatelogs.c

support/rotatelogs.dsp

support/split-logfile.in

support/suexec.c

support/suexec.h

support/utilitiesnw.def

support/win32

support/win32/ApacheMonitor.c

support/win32/ApacheMonitor.dsp

support/win32/ApacheMonitor.h

support/win32/ApacheMonitor.ico

support/win32/ApacheMonitor.rc

support/win32/apache_header.bmp

support/win32/aprun.ico

support/win32/apstop.ico

support/win32/srun.bmp

support/win32/sstop.bmp

support/win32/wintty.c

support/win32/wintty.dsp

test

test/.indent.pro

test/Makefile.in

test/README

test/check_chunked

test/cls.c

test/tcpdumpscii.txt

test/test-writev.c

test/test_find.c

test/test_limits.c

test/test_parser.c

test/test_select.c

test/time-sem.c

test/zb.c

files removed:
debian/NEWS.Debian

debian/README.Debian

debian/README.build

debian/README.etc

debian/TODO

debian/TODO.vhosts-base

debian/apache2-common.dirs

debian/apache2-common.docs

debian/apache2-common.lintian

debian/apache2-common.manpages

debian/apache2-common.postinst

debian/apache2-common.postrm

debian/apache2-mpm-perchild.dirs

debian/apache2-policy.sgml

debian/config

debian/config-mods

debian/config-mods/actions.load

debian/config-mods/asis.load

debian/config-mods/auth_anon.load

debian/config-mods/auth_dbm.load

debian/config-mods/auth_digest.load

debian/config-mods/auth_ldap.load

debian/config-mods/cache.load

debian/config-mods/cern_meta.load

debian/config-mods/cgi.load

debian/config-mods/cgid.conf

debian/config-mods/cgid.load

debian/config-mods/dav.load

debian/config-mods/dav_fs.conf

debian/config-mods/dav_fs.load

debian/config-mods/deflate.load

debian/config-mods/disk_cache.load

debian/config-mods/expires.load

debian/config-mods/ext_filter.load

debian/config-mods/file_cache.load

debian/config-mods/headers.load

debian/config-mods/imap.load

debian/config-mods/include.load

debian/config-mods/info.load

debian/config-mods/ldap.load

debian/config-mods/mem_cache.load

debian/config-mods/mime_magic.conf

debian/config-mods/mime_magic.load

debian/config-mods/proxy.conf

debian/config-mods/proxy.load

debian/config-mods/proxy_connect.load

debian/config-mods/proxy_ftp.load

debian/config-mods/proxy_http.load

debian/config-mods/rewrite.load

debian/config-mods/sick-hack-to-update-modules

debian/config-mods/speling.load

debian/config-mods/ssl.conf

debian/config-mods/ssl.load

debian/config-mods/suexec.load

debian/config-mods/unique_id.load

debian/config-mods/userdir.conf

debian/config-mods/userdir.load

debian/config-mods/usertrack.load

debian/config-mods/vhost_alias.load

debian/config-sites

debian/config-sites/default

debian/config-sites/default-443

debian/config/apache2.conf

debian/config/magic

debian/config_vars.mk

debian/default-site

debian/init-script

debian/libapr0-dev.dirs

debian/libapr0.dirs

debian/libapr0.postinst

debian/list.mods

debian/manual.conf

debian/mods-notes

debian/patches/001_apachectl_is_differently_fucked

debian/patches/002_kill_installbuilddir

debian/patches/003_build_with_autoconf_2.5

debian/patches/005_apxs

debian/patches/010_more_fhs_compliancy

debian/patches/011_fix_ap-config

debian/patches/012_apxs2_sucks

debian/patches/013_ship_find_apx.m4

debian/patches/014_unfuck_sctp

debian/patches/021_internal_pcre.patch

debian/patches/022_fix_apr-util_link

debian/patches/029_docroot_manual.patch

debian/patches/030_www-browser_apachectl.patch

debian/patches/034_ab2_has_openssl

debian/patches/035_HEAD_Content-Length_Fix_From_CVS

debian/patches/039_fix_forensic_tmpfiles

debian/patches/040_link_external_pcre

debian/patches/047_ssl_reneg_with_body

debian/patches/048_reverse_proxy_fix

debian/patches/049_apr_tables_HEAD_cleanup

debian/patches/050_mod_imap_CVE-2005-3352

debian/patches/051_mod_ssl_CVE-2005-3357

debian/patches/052_sendfile_2G_limit

debian/patches/053_mod_rewite_CVE-2006-3747

debian/patches/054_restore_prefix_fix

debian/patches/to-review

debian/patches/to-review/013_path_iconv

debian/patches/to-review/015_escape_amp

debian/patches/to-review/023_largefiles_upstream_fixes

debian/patches/to-review/024_largefiles_debian_hacks

debian/robots.txt

debian/scripts

debian/scripts/fix.source.patch

debian/scripts/lib

debian/scripts/messages

debian/scripts/patch.apply

debian/scripts/patch.unapply

debian/scripts/source.clean

debian/scripts/source.patch

debian/scripts/source.unpack

debian/scripts/unfix.source.patch

debian/scripts/vars

debian/ssl-certificate

debian/ssleay.cnf

debian/sys-build.mk

debian/vhost-base

debian/vhost-base/add.d

debian/vhost-base/add.d/apache2

debian/vhost-base/conf.d

debian/vhost-base/conf.d/apache2

debian/vhost-base/disable.d

debian/vhost-base/disable.d/apache2

debian/vhost-base/enable.d

debian/vhost-base/enable.d/apache2

debian/vhost-base/remove.d

debian/vhost-base/remove.d/apache2

debian/vhost-base/templates.d

debian/vhost-base/templates.d/apache2.in

debian/watch

upstream

upstream/tarballs

upstream/tarballs/httpd-2.0.55.tar.gz

upstream/tarballs/zzzdebicons.tgz

files modified:
debian/a2-scripts/a2dismod

debian/a2-scripts/a2enmod

debian/a2dismod.8

debian/a2enmod.8

debian/apache2-doc.conf

debian/apache2-mpm-prefork.dirs

debian/apache2-mpm-prefork.postinst

debian/apache2-mpm-prefork.preinst

debian/apache2-mpm-worker.dirs

debian/apache2-prefork-dev.dirs

debian/apache2-threaded-dev.dirs

debian/apache2.8

debian/changelog

debian/control

debian/copyright

debian/mpm-postinst-threaded

debian/mpm-preinst-threaded

debian/mpms.prerm

debian/patches/004_usr_bin_perl_0wnz_j00

debian/patches/008_make_include_safe

debian/patches/009_apache2_has_dso

debian/patches/017_fix_ipv6

debian/patches/031_apxs2_sucks_more

debian/patches/032_suexec_is_shared

debian/patches/033_dbm_read_hash_or_btree

debian/patches/038_no_LD_LIBRARY_PATH

debian/patches/042_htdigest_CAN-2005-1344

debian/rules

Show diffs side-by-side

added added

removed removed

srclib/pcre/pcre.c

/*************************************************

* Perl-Compatible Regular Expressions *

*************************************************/

This is a library of functions to support regular expressions whose syntax

and semantics are as close as possible to those of the Perl 5 language. See

the file Tech.Notes for some information on the internals.

Written by: Philip Hazel <ph10@cam.ac.uk>

-----------------------------------------------------------------------------

Redistribution and use in source and binary forms, with or without

modification, are permitted provided that the following conditions are met:

* Redistributions of source code must retain the above copyright notice,

this list of conditions and the following disclaimer.

* Redistributions in binary form must reproduce the above copyright

notice, this list of conditions and the following disclaimer in the

documentation and/or other materials provided with the distribution.

* Neither the name of the University of Cambridge nor the names of its

contributors may be used to endorse or promote products derived from

this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE

LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

POSSIBILITY OF SUCH DAMAGE.

-----------------------------------------------------------------------------

/* Define DEBUG to get debugging output on stdout. */

/* #define DEBUG */

/* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef

inline, and there are *still* stupid compilers about that don't like indented

pre-processor statements. I suppose it's only been 10 years... */

#ifdef DEBUG

#define DPRINTF(p) printf p

#else

#define DPRINTF(p) /*nothing*/

#endif

/* Include the internals header, which itself includes "config.h", the Standard

C headers, and the external pcre header. */

#include "internal.h"

/* If Unicode Property support is wanted, include a private copy of the

function that does it, and the table that translates names to numbers. */

#ifdef SUPPORT_UCP

#include "ucp.c"

#include "ucptypetable.c"

#endif

/* Maximum number of items on the nested bracket stacks at compile time. This

applies to the nesting of all kinds of parentheses. It does not limit

un-nested, non-capturing parentheses. This number can be made bigger if

necessary - it is used to dimension one int and one unsigned char vector at

compile time. */

#define BRASTACK_SIZE 200

/* Maximum number of ints of offset to save on the stack for recursive calls.

If the offset vector is bigger, malloc is used. This should be a multiple of 3,

because the offset vector is always a multiple of 3 long. */

#define REC_STACK_SAVE_MAX 30

/* The maximum remaining length of subject we are prepared to search for a

req_byte match. */

#define REQ_BYTE_MAX 1000

/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that

the definition is next to the definition of the opcodes in internal.h. */

static const uschar OP_lengths[] = { OP_LENGTHS };

/* Min and max values for the common repeats; for the maxima, 0 => infinity */

static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };

100

static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };

101

102

/* Table for handling escaped characters in the range '0'-'z'. Positive returns

103

are simple data values; negative values are for special things like \d and so

104

on. Zero means further processing is needed (for things like \x), or the escape

105

is invalid. */

106

107

#if !EBCDIC /* This is the "normal" table for ASCII systems */

108

static const short int escapes[] = {

109

0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 7 */

110

0, 0, ':', ';', '<', '=', '>', '?', /* 8 - ? */

111

'@', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E, 0, -ESC_G, /* @ - G */

112

0, 0, 0, 0, 0, 0, 0, 0, /* H - O */

113

-ESC_P, -ESC_Q, 0, -ESC_S, 0, 0, 0, -ESC_W, /* P - W */

114

-ESC_X, 0, -ESC_Z, '[', '\\', ']', '^', '_', /* X - _ */

115

'`', 7, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0, /* ` - g */

116

0, 0, 0, 0, 0, 0, ESC_n, 0, /* h - o */

117

-ESC_p, 0, ESC_r, -ESC_s, ESC_tee, 0, 0, -ESC_w, /* p - w */

118

0, 0, -ESC_z /* x - z */

119

};

120

121

#else /* This is the "abnormal" table for EBCDIC systems */

122

static const short int escapes[] = {

123

/* 48 */ 0, 0, 0, '.', '<', '(', '+', '|',

124

/* 50 */ '&', 0, 0, 0, 0, 0, 0, 0,

125

/* 58 */ 0, 0, '!', '$', '*', ')', ';', '~',

126

/* 60 */ '-', '/', 0, 0, 0, 0, 0, 0,

127

/* 68 */ 0, 0, '|', ',', '%', '_', '>', '?',

128

/* 70 */ 0, 0, 0, 0, 0, 0, 0, 0,

129

/* 78 */ 0, '`', ':', '#', '@', '\'', '=', '"',

130

/* 80 */ 0, 7, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0,

131

/* 88 */ 0, 0, 0, '{', 0, 0, 0, 0,

132

/* 90 */ 0, 0, 0, 'l', 0, ESC_n, 0, -ESC_p,

133

/* 98 */ 0, ESC_r, 0, '}', 0, 0, 0, 0,

134

/* A0 */ 0, '~', -ESC_s, ESC_tee, 0, 0, -ESC_w, 0,

135

/* A8 */ 0,-ESC_z, 0, 0, 0, '[', 0, 0,

136

/* B0 */ 0, 0, 0, 0, 0, 0, 0, 0,

137

/* B8 */ 0, 0, 0, 0, 0, ']', '=', '-',

138

/* C0 */ '{',-ESC_A, -ESC_B, -ESC_C, -ESC_D,-ESC_E, 0, -ESC_G,

139

/* C8 */ 0, 0, 0, 0, 0, 0, 0, 0,

140

/* D0 */ '}', 0, 0, 0, 0, 0, 0, -ESC_P,

141

/* D8 */-ESC_Q, 0, 0, 0, 0, 0, 0, 0,

142

/* E0 */ '\\', 0, -ESC_S, 0, 0, 0, -ESC_W, -ESC_X,

143

/* E8 */ 0,-ESC_Z, 0, 0, 0, 0, 0, 0,

144

/* F0 */ 0, 0, 0, 0, 0, 0, 0, 0,

145

/* F8 */ 0, 0, 0, 0, 0, 0, 0, 0

146

};

147

#endif

148

149

150

/* Tables of names of POSIX character classes and their lengths. The list is

151

terminated by a zero length entry. The first three must be alpha, upper, lower,

152

as this is assumed for handling case independence. */

153

154

static const char *const posix_names[] = {

155

"alpha", "lower", "upper",

156

"alnum", "ascii", "blank", "cntrl", "digit", "graph",

157

"print", "punct", "space", "word", "xdigit" };

158

159

static const uschar posix_name_lengths[] = {

160

5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };

161

162

/* Table of class bit maps for each POSIX class; up to three may be combined

163

to form the class. The table for [:blank:] is dynamically modified to remove

164

the vertical space characters. */

165

166

static const int posix_class_maps[] = {

167

cbit_lower, cbit_upper, -1, /* alpha */

168

cbit_lower, -1, -1, /* lower */

169

cbit_upper, -1, -1, /* upper */

170

cbit_digit, cbit_lower, cbit_upper, /* alnum */

171

cbit_print, cbit_cntrl, -1, /* ascii */

172

cbit_space, -1, -1, /* blank - a GNU extension */

173

cbit_cntrl, -1, -1, /* cntrl */

174

cbit_digit, -1, -1, /* digit */

175

cbit_graph, -1, -1, /* graph */

176

cbit_print, -1, -1, /* print */

177

cbit_punct, -1, -1, /* punct */

178

cbit_space, -1, -1, /* space */

179

cbit_word, -1, -1, /* word - a Perl extension */

180

cbit_xdigit,-1, -1 /* xdigit */

181

};

182

183

/* Table to identify digits and hex digits. This is used when compiling

184

patterns. Note that the tables in chartables are dependent on the locale, and

185

may mark arbitrary characters as digits - but the PCRE compiling code expects

186

to handle only 0-9, a-z, and A-Z as digits when compiling. That is why we have

187

a private table here. It costs 256 bytes, but it is a lot faster than doing

188

character value tests (at least in some simple cases I timed), and in some

189

applications one wants PCRE to compile efficiently as well as match

190

efficiently.

191

192

For convenience, we use the same bit definitions as in chartables:

193

194

0x04 decimal digit

195

0x08 hexadecimal digit

196

197

Then we can use ctype_digit and ctype_xdigit in the code. */

198

199

#if !EBCDIC /* This is the "normal" case, for ASCII systems */

200

static const unsigned char digitab[] =

201

{

202

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */

203

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 8- 15 */

204

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */

205

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */

206

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - ' */

207

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ( - / */

208

0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /* 0 - 7 */

209

0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00, /* 8 - ? */

210

0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* @ - G */

211

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* H - O */

212

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* P - W */

213

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* X - _ */

214

0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* ` - g */

215

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* h - o */

216

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* p - w */

217

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* x -127 */

218

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */

219

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */

220

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */

221

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */

222

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */

223

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */

224

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */

225

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */

226

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */

227

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */

228

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */

229

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */

230

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */

231

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */

232

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */

233

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */

234

235

#else /* This is the "abnormal" case, for EBCDIC systems */

236

static const unsigned char digitab[] =

237

{

238

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 0 */

239

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 8- 15 */

240

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 10 */

241

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */

242

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 32- 39 20 */

243

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 40- 47 */

244

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 48- 55 30 */

245

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 56- 63 */

246

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - 71 40 */

247

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 72- | */

248

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* & - 87 50 */

249

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 88- � */

250

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - -103 60 */

251

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 104- ? */

252

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 70 */

253

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 120- " */

254

0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* 128- g 80 */

255

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* h -143 */

256

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144- p 90 */

257

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* q -159 */

258

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160- x A0 */

259

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* y -175 */

260

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ^ -183 B0 */

261

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */

262

0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* { - G C0 */

263

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* H -207 */

264

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* } - P D0 */

265

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Q -223 */

266

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* \ - X E0 */

267

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Y -239 */

268

0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /* 0 - 7 F0 */

269

0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00};/* 8 -255 */

270

271

static const unsigned char ebcdic_chartab[] = { /* chartable partial dup */

272

0x80,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 0- 7 */

273

0x00,0x00,0x00,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */

274

0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 16- 23 */

275

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */

276

0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 32- 39 */

277

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 40- 47 */

278

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 48- 55 */

279

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 56- 63 */

280

0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - 71 */

281

0x00,0x00,0x00,0x80,0x00,0x80,0x80,0x80, /* 72- | */

282

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* & - 87 */

283

0x00,0x00,0x00,0x80,0x80,0x80,0x00,0x00, /* 88- � */

284

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - -103 */

285

0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x80, /* 104- ? */

286

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 */

287

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 120- " */

288

0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* 128- g */

289

0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* h -143 */

290

0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* 144- p */

291

0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* q -159 */

292

0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12, /* 160- x */

293

0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* y -175 */

294

0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ^ -183 */

295

0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00, /* 184-191 */

296

0x80,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* { - G */

297

0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* H -207 */

298

0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* } - P */

299

0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* Q -223 */

300

0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12, /* \ - X */

301

0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* Y -239 */

302

0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */

303

0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00};/* 8 -255 */

304

#endif

305

306

307

/* Definition to allow mutual recursion */

308

309

static BOOL

310

compile_regex(int, int, int *, uschar **, const uschar **, const char **,

311

BOOL, int, int *, int *, branch_chain *, compile_data *);

312

313

/* Structure for building a chain of data that actually lives on the

314

stack, for holding the values of the subject pointer at the start of each

315

subpattern, so as to detect when an empty string has been matched by a

316

subpattern - to break infinite loops. When NO_RECURSE is set, these blocks

317

are on the heap, not on the stack. */

318

319

typedef struct eptrblock {

320

struct eptrblock *epb_prev;

321

const uschar *epb_saved_eptr;

322

} eptrblock;

323

324

/* Flag bits for the match() function */

325

326

#define match_condassert 0x01 /* Called to check a condition assertion */

327

#define match_isgroup 0x02 /* Set if start of bracketed group */

328

329

/* Non-error returns from the match() function. Error returns are externally

330

defined PCRE_ERROR_xxx codes, which are all negative. */

331

332

#define MATCH_MATCH 1

333

#define MATCH_NOMATCH 0

334

335

336

337

/*************************************************

338

* Global variables *

339

*************************************************/

340

341

/* PCRE is thread-clean and doesn't use any global variables in the normal

342

sense. However, it calls memory allocation and free functions via the four

343

indirections below, and it can optionally do callouts. These values can be

344

changed by the caller, but are shared between all threads. However, when

345

compiling for Virtual Pascal, things are done differently (see pcre.in). */

346

347

#ifndef VPCOMPAT

348

#ifdef __cplusplus

349

extern "C" void *(*pcre_malloc)(size_t) = malloc;

350

extern "C" void (*pcre_free)(void *) = free;

351

extern "C" void *(*pcre_stack_malloc)(size_t) = malloc;

352

extern "C" void (*pcre_stack_free)(void *) = free;

353

extern "C" int (*pcre_callout)(pcre_callout_block *) = NULL;

354

#else

355

void *(*pcre_malloc)(size_t) = malloc;

356

void (*pcre_free)(void *) = free;

357

void *(*pcre_stack_malloc)(size_t) = malloc;

358

void (*pcre_stack_free)(void *) = free;

359

int (*pcre_callout)(pcre_callout_block *) = NULL;

360

#endif

361

#endif

362

363

364

/*************************************************

365

* Macros and tables for character handling *

366

*************************************************/

367

368

/* When UTF-8 encoding is being used, a character is no longer just a single

369

byte. The macros for character handling generate simple sequences when used in

370

byte-mode, and more complicated ones for UTF-8 characters. */

371

372

#ifndef SUPPORT_UTF8

373

#define GETCHAR(c, eptr) c = *eptr;

374

#define GETCHARINC(c, eptr) c = *eptr++;

375

#define GETCHARINCTEST(c, eptr) c = *eptr++;

376

#define GETCHARLEN(c, eptr, len) c = *eptr;

377

#define BACKCHAR(eptr)

378

379

#else /* SUPPORT_UTF8 */

380

381

/* Get the next UTF-8 character, not advancing the pointer. This is called when

382

we know we are in UTF-8 mode. */

383

384

#define GETCHAR(c, eptr) \

385

c = *eptr; \

386

if ((c & 0xc0) == 0xc0) \

387

{ \

388

int gcii; \

389

int gcaa = utf8_table4[c & 0x3f]; /* Number of additional bytes */ \

390

int gcss = 6*gcaa; \

391

c = (c & utf8_table3[gcaa]) << gcss; \

392

for (gcii = 1; gcii <= gcaa; gcii++) \

393

{ \

394

gcss -= 6; \

395

c |= (eptr[gcii] & 0x3f) << gcss; \

396

} \

397

}

398

399

/* Get the next UTF-8 character, advancing the pointer. This is called when we

400

know we are in UTF-8 mode. */

401

402

#define GETCHARINC(c, eptr) \

403

c = *eptr++; \

404

if ((c & 0xc0) == 0xc0) \

405

{ \

406

int gcaa = utf8_table4[c & 0x3f]; /* Number of additional bytes */ \

407

int gcss = 6*gcaa; \

408

c = (c & utf8_table3[gcaa]) << gcss; \

409

while (gcaa-- > 0) \

410

{ \

411

gcss -= 6; \

412

c |= (*eptr++ & 0x3f) << gcss; \

413

} \

414

}

415

416

/* Get the next character, testing for UTF-8 mode, and advancing the pointer */

417

418

#define GETCHARINCTEST(c, eptr) \

419

c = *eptr++; \

420

if (md->utf8 && (c & 0xc0) == 0xc0) \

421

{ \

422

int gcaa = utf8_table4[c & 0x3f]; /* Number of additional bytes */ \

423

int gcss = 6*gcaa; \

424

c = (c & utf8_table3[gcaa]) << gcss; \

425

while (gcaa-- > 0) \

426

{ \

427

gcss -= 6; \

428

c |= (*eptr++ & 0x3f) << gcss; \

429

} \

430

}

431

432

/* Get the next UTF-8 character, not advancing the pointer, incrementing length

433

if there are extra bytes. This is called when we know we are in UTF-8 mode. */

434

435

#define GETCHARLEN(c, eptr, len) \

436

c = *eptr; \

437

if ((c & 0xc0) == 0xc0) \

438

{ \

439

int gcii; \

440

int gcaa = utf8_table4[c & 0x3f]; /* Number of additional bytes */ \

441

int gcss = 6*gcaa; \

442

c = (c & utf8_table3[gcaa]) << gcss; \

443

for (gcii = 1; gcii <= gcaa; gcii++) \

444

{ \

445

gcss -= 6; \

446

c |= (eptr[gcii] & 0x3f) << gcss; \

447

} \

448

len += gcaa; \

449

}

450

451

/* If the pointer is not at the start of a character, move it back until

452

it is. Called only in UTF-8 mode. */

453

454

#define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr--;

455

456

#endif

457

458

459

460

/*************************************************

461

* Default character tables *

462

*************************************************/

463

464

/* A default set of character tables is included in the PCRE binary. Its source

465

is built by the maketables auxiliary program, which uses the default C ctypes

466

functions, and put in the file chartables.c. These tables are used by PCRE

467

whenever the caller of pcre_compile() does not provide an alternate set of

468

tables. */

469

470

#include "chartables.c"

471

472

473

474

#ifdef SUPPORT_UTF8

475

/*************************************************

476

* Tables for UTF-8 support *

477

*************************************************/

478

479

/* These are the breakpoints for different numbers of bytes in a UTF-8

480

character. */

481

482

static const int utf8_table1[] =

483

{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};

484

485

/* These are the indicator bits and the mask for the data bits to set in the

486

first byte of a character, indexed by the number of additional bytes. */

487

488

static const int utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};

489

static const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};

490

491

/* Table of the number of extra characters, indexed by the first character

492

masked with 0x3f. The highest number for a valid UTF-8 character is in fact

493

0x3d. */

494

495

static const uschar utf8_table4[] = {

496

1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,

497

1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,

498

2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,

499

3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };

500

501

502

/*************************************************

503

* Convert character value to UTF-8 *

504

*************************************************/

505

506

/* This function takes an integer value in the range 0 - 0x7fffffff

507

and encodes it as a UTF-8 character in 0 to 6 bytes.

508

509

Arguments:

510

cvalue the character value

511

buffer pointer to buffer for result - at least 6 bytes long

512

513

Returns: number of characters placed in the buffer

514

515

516

static int

517

ord2utf8(int cvalue, uschar *buffer)

518

{

519

520

for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)

521

if (cvalue <= utf8_table1[i]) break;

522

buffer += i;

523

for (j = i; j > 0; j--)

524

{

525

*buffer-- = 0x80 | (cvalue & 0x3f);

526

cvalue >>= 6;

527

}

528

*buffer = utf8_table2[i] | cvalue;

529

return i + 1;

530

}

531

#endif

532

533

534

535

/*************************************************

536

* Print compiled regex *

537

*************************************************/

538

539

/* The code for doing this is held in a separate file that is also included in

540

pcretest.c. It defines a function called print_internals(). */

541

542

#ifdef DEBUG

543

#include "printint.c"

544

#endif

545

546

547

548

/*************************************************

549

* Return version string *

550

*************************************************/

551

552

#define STRING(a) # a

553

#define XSTRING(s) STRING(s)

554

555

EXPORT const char *

556

pcre_version(void)

557

{

558

return XSTRING(PCRE_MAJOR) "." XSTRING(PCRE_MINOR) " " XSTRING(PCRE_DATE);

559

}

560

561

562

563

564

/*************************************************

565

* Flip bytes in an integer *

566

*************************************************/

567

568

/* This function is called when the magic number in a regex doesn't match in

569

order to flip its bytes to see if we are dealing with a pattern that was

570

compiled on a host of different endianness. If so, this function is used to

571

flip other byte values.

572

573

Arguments:

574

value the number to flip

575

n the number of bytes to flip (assumed to be 2 or 4)

576

577

Returns: the flipped value

578

579

580

static pcre_uint16

581

byteflip2(pcre_uint16 value)

582

{

583

return ((value & 0x00ff) << 8) |

584

((value & 0xff00) >> 8);

585

}

586

587

static pcre_uint32

588

byteflip4(pcre_uint32 value)

589

{

590

return ((value & 0x000000ff) << 24) |

591

((value & 0x0000ff00) << 8) |

592

((value & 0x00ff0000) >> 8) |

593

((value & 0xff000000) >> 24);

594

}

595

596

/*************************************************

597

* Test for a byte-flipped compiled regex *

598

*************************************************/

599

600

/* This function is called from pce_exec() and also from pcre_fullinfo(). Its

601

job is to test whether the regex is byte-flipped - that is, it was compiled on

602

a system of opposite endianness. The function is called only when the native

603

MAGIC_NUMBER test fails. If the regex is indeed flipped, we flip all the

604

relevant values into a different data block, and return it.

605

606

Arguments:

607

re points to the regex

608

study points to study data, or NULL

609

internal_re points to a new regex block

610

internal_study points to a new study block

611

612

Returns: the new block if is is indeed a byte-flipped regex

613

NULL if it is not

614

615

616

static real_pcre *

617

try_flipped(const real_pcre *re, real_pcre *internal_re,

618

const pcre_study_data *study, pcre_study_data *internal_study)

619

{

620

if (byteflip4(re->magic_number) != MAGIC_NUMBER)

621

return NULL;

622

623

*internal_re = *re; /* To copy other fields */

624

internal_re->size = byteflip4(re->size);

625

internal_re->options = byteflip4(re->options);

626

internal_re->top_bracket = byteflip2(re->top_bracket);

627

internal_re->top_backref = byteflip2(re->top_backref);

628

internal_re->first_byte = byteflip2(re->first_byte);

629

internal_re->req_byte = byteflip2(re->req_byte);

630

internal_re->name_table_offset = byteflip2(re->name_table_offset);

631

internal_re->name_entry_size = byteflip2(re->name_entry_size);

632

internal_re->name_count = byteflip2(re->name_count);

633

634

if (study != NULL)

635

{

636

*internal_study = *study; /* To copy other fields */

637

internal_study->size = byteflip4(study->size);

638

internal_study->options = byteflip4(study->options);

639

}

640

641

return internal_re;

642

}

643

644

645

646

/*************************************************

647

* (Obsolete) Return info about compiled pattern *

648

*************************************************/

649

650

/* This is the original "info" function. It picks potentially useful data out

651

of the private structure, but its interface was too rigid. It remains for

652

backwards compatibility. The public options are passed back in an int - though

653

the re->options field has been expanded to a long int, all the public options

654

at the low end of it, and so even on 16-bit systems this will still be OK.

655

Therefore, I haven't changed the API for pcre_info().

656

657

Arguments:

658

argument_re points to compiled code

659

optptr where to pass back the options

660

first_byte where to pass back the first character,

661

or -1 if multiline and all branches start ^,

662

or -2 otherwise

663

664

Returns: number of capturing subpatterns

665

or negative values on error

666

667

668

EXPORT int

669

pcre_info(const pcre *argument_re, int *optptr, int *first_byte)

670

{

671

real_pcre internal_re;

672

const real_pcre *re = (const real_pcre *)argument_re;

673

if (re == NULL) return PCRE_ERROR_NULL;

674

if (re->magic_number != MAGIC_NUMBER)

675

{

676

re = try_flipped(re, &internal_re, NULL, NULL);

677

if (re == NULL) return PCRE_ERROR_BADMAGIC;

678

}

679

if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_OPTIONS);

680

if (first_byte != NULL)

681

*first_byte = ((re->options & PCRE_FIRSTSET) != 0)? re->first_byte :

682

((re->options & PCRE_STARTLINE) != 0)? -1 : -2;

683

return re->top_bracket;

684

}

685

686

687

688

/*************************************************

689

* Return info about compiled pattern *

690

*************************************************/

691

692

/* This is a newer "info" function which has an extensible interface so

693

that additional items can be added compatibly.

694

695

Arguments:

696

argument_re points to compiled code

697

extra_data points extra data, or NULL

698

what what information is required

699

where where to put the information

700

701

Returns: 0 if data returned, negative on error

702

703

704

EXPORT int

705

pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what,

706

void *where)

707

{

708

real_pcre internal_re;

709

pcre_study_data internal_study;

710

const real_pcre *re = (const real_pcre *)argument_re;

711

const pcre_study_data *study = NULL;

712

713

if (re == NULL || where == NULL) return PCRE_ERROR_NULL;

714

715

if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)

716

study = (const pcre_study_data *)extra_data->study_data;

717

718

if (re->magic_number != MAGIC_NUMBER)

719

{

720

re = try_flipped(re, &internal_re, study, &internal_study);

721

if (re == NULL) return PCRE_ERROR_BADMAGIC;

722

if (study != NULL) study = &internal_study;

723

}

724

725

switch (what)

726

{

727

case PCRE_INFO_OPTIONS:

728

*((unsigned long int *)where) = re->options & PUBLIC_OPTIONS;

729

break;

730

731

case PCRE_INFO_SIZE:

732

*((size_t *)where) = re->size;

733

break;

734

735

case PCRE_INFO_STUDYSIZE:

736

*((size_t *)where) = (study == NULL)? 0 : study->size;

737

break;

738

739

case PCRE_INFO_CAPTURECOUNT:

740

*((int *)where) = re->top_bracket;

741

break;

742

743

case PCRE_INFO_BACKREFMAX:

744

*((int *)where) = re->top_backref;

745

break;

746

747

case PCRE_INFO_FIRSTBYTE:

748

*((int *)where) =

749

((re->options & PCRE_FIRSTSET) != 0)? re->first_byte :

750

((re->options & PCRE_STARTLINE) != 0)? -1 : -2;

751

break;

752

753

/* Make sure we pass back the pointer to the bit vector in the external

754

block, not the internal copy (with flipped integer fields). */

755

756

case PCRE_INFO_FIRSTTABLE:

757

*((const uschar **)where) =

758

(study != NULL && (study->options & PCRE_STUDY_MAPPED) != 0)?

759

((const pcre_study_data *)extra_data->study_data)->start_bits : NULL;

760

break;

761

762

case PCRE_INFO_LASTLITERAL:

763

*((int *)where) =

764

((re->options & PCRE_REQCHSET) != 0)? re->req_byte : -1;

765

break;

766

767

case PCRE_INFO_NAMEENTRYSIZE:

768

*((int *)where) = re->name_entry_size;

769

break;

770

771

case PCRE_INFO_NAMECOUNT:

772

*((int *)where) = re->name_count;

773

break;

774

775

case PCRE_INFO_NAMETABLE:

776

*((const uschar **)where) = (const uschar *)re + re->name_table_offset;

777

break;

778

779

case PCRE_INFO_DEFAULT_TABLES:

780

*((const uschar **)where) = (const uschar *)pcre_default_tables;

781

break;

782

783

default: return PCRE_ERROR_BADOPTION;

784

}

785

786

return 0;

787

}

788

789

790

791

/*************************************************

792

* Return info about what features are configured *

793

*************************************************/

794

795

/* This is function which has an extensible interface so that additional items

796

can be added compatibly.

797

798

Arguments:

799

what what information is required

800

where where to put the information

801

802

Returns: 0 if data returned, negative on error

803

804

805

EXPORT int

806

pcre_config(int what, void *where)

807

{

808

switch (what)

809

{

810

case PCRE_CONFIG_UTF8:

811

#ifdef SUPPORT_UTF8

812

*((int *)where) = 1;

813

#else

814

*((int *)where) = 0;

815

#endif

816

break;

817

818

case PCRE_CONFIG_UNICODE_PROPERTIES:

819

#ifdef SUPPORT_UCP

820

*((int *)where) = 1;

821

#else

822

*((int *)where) = 0;

823

#endif

824

break;

825

826

case PCRE_CONFIG_NEWLINE:

827

*((int *)where) = NEWLINE;

828

break;

829

830

case PCRE_CONFIG_LINK_SIZE:

831

*((int *)where) = LINK_SIZE;

832

break;

833

834

case PCRE_CONFIG_POSIX_MALLOC_THRESHOLD:

835

*((int *)where) = POSIX_MALLOC_THRESHOLD;

836

break;

837

838

case PCRE_CONFIG_MATCH_LIMIT:

839

*((unsigned int *)where) = MATCH_LIMIT;

840

break;

841

842

case PCRE_CONFIG_STACKRECURSE:

843

#ifdef NO_RECURSE

844

*((int *)where) = 0;

845

#else

846

*((int *)where) = 1;

847

#endif

848

break;

849

850

default: return PCRE_ERROR_BADOPTION;

851

}

852

853

return 0;

854

}

855

856

857

858

#ifdef DEBUG

859

/*************************************************

860

* Debugging function to print chars *

861

*************************************************/

862

863

/* Print a sequence of chars in printable format, stopping at the end of the

864

subject if the requested.

865

866

Arguments:

867

p points to characters

868

length number to print

869

is_subject TRUE if printing from within md->start_subject

870

md pointer to matching data block, if is_subject is TRUE

871

872

Returns: nothing

873

874

875

static void

876

pchars(const uschar *p, int length, BOOL is_subject, match_data *md)

877

{

878

int c;

879

if (is_subject && length > md->end_subject - p) length = md->end_subject - p;

880

while (length-- > 0)

881

if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);

882

}

883

#endif

884

885

886

887

888

/*************************************************

889

* Handle escapes *

890

*************************************************/

891

892

/* This function is called when a \ has been encountered. It either returns a

893

positive value for a simple escape such as \n, or a negative value which

894

encodes one of the more complicated things such as \d. When UTF-8 is enabled,

895

a positive value greater than 255 may be returned. On entry, ptr is pointing at

896

the \. On exit, it is on the final character of the escape sequence.

897

898

Arguments:

899

ptrptr points to the pattern position pointer

900

errorptr points to the pointer to the error message

901

bracount number of previous extracting brackets

902

options the options bits

903

isclass TRUE if inside a character class

904

905

Returns: zero or positive => a data character

906

negative => a special escape sequence

907

on error, errorptr is set

908

909

910

static int

911

check_escape(const uschar **ptrptr, const char **errorptr, int bracount,

912

int options, BOOL isclass)

913

{

914

const uschar *ptr = *ptrptr;

915

int c, i;

916

917

/* If backslash is at the end of the pattern, it's an error. */

918

919

c = *(++ptr);

920

if (c == 0) *errorptr = ERR1;

921

922

/* Non-alphamerics are literals. For digits or letters, do an initial lookup in

923

a table. A non-zero result is something that can be returned immediately.

924

Otherwise further processing may be required. */

925

926

#if !EBCDIC /* ASCII coding */

927

else if (c < '0' || c > 'z') {} /* Not alphameric */

928

else if ((i = escapes[c - '0']) != 0) c = i;

929

930

#else /* EBCDIC coding */

931

else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {} /* Not alphameric */

932

else if ((i = escapes[c - 0x48]) != 0) c = i;

933

#endif

934

935

/* Escapes that need further processing, or are illegal. */

936

937

else

938

{

939

const uschar *oldptr;

940

switch (c)

941

{

942

/* A number of Perl escapes are not handled by PCRE. We give an explicit

943

error. */

944

945

case 'l':

946

case 'L':

947

case 'N':

948

case 'u':

949

case 'U':

950

*errorptr = ERR37;

951

break;

952

953

/* The handling of escape sequences consisting of a string of digits

954

starting with one that is not zero is not straightforward. By experiment,

955

the way Perl works seems to be as follows:

956

957

Outside a character class, the digits are read as a decimal number. If the

958

number is less than 10, or if there are that many previous extracting

959

left brackets, then it is a back reference. Otherwise, up to three octal

960

digits are read to form an escaped byte. Thus \123 is likely to be octal

961

123 (cf \0123, which is octal 012 followed by the literal 3). If the octal

962

value is greater than 377, the least significant 8 bits are taken. Inside a

963

character class, \ followed by a digit is always an octal number. */

964

965

case '1': case '2': case '3': case '4': case '5':

966

case '6': case '7': case '8': case '9':

967

968

if (!isclass)

969

{

970

oldptr = ptr;

971

c -= '0';

972

while ((digitab[ptr[1]] & ctype_digit) != 0)

973

c = c * 10 + *(++ptr) - '0';

974

if (c < 10 || c <= bracount)

975

{

976

c = -(ESC_REF + c);

977

break;

978

}

979

ptr = oldptr; /* Put the pointer back and fall through */

980

}

981

982

/* Handle an octal number following \. If the first digit is 8 or 9, Perl

983

generates a binary zero byte and treats the digit as a following literal.

984

Thus we have to pull back the pointer by one. */

985

986

if ((c = *ptr) >= '8')

987

{

988

ptr--;

989

c = 0;

990

break;

991

}

992

993

/* \0 always starts an octal number, but we may drop through to here with a

994

larger first octal digit. */

995

996

case '0':

997

c -= '0';

998

while(i++ < 2 && ptr[1] >= '0' && ptr[1] <= '7')

999

c = c * 8 + *(++ptr) - '0';

1000

c &= 255; /* Take least significant 8 bits */

1001

break;

1002

1003

/* \x is complicated when UTF-8 is enabled. \x{ddd} is a character number

1004

which can be greater than 0xff, but only if the ddd are hex digits. */

1005

1006

case 'x':

1007

#ifdef SUPPORT_UTF8

1008

if (ptr[1] == '{' && (options & PCRE_UTF8) != 0)

1009

{

1010

const uschar *pt = ptr + 2;

1011

1012

c = 0;

1013

while ((digitab[*pt] & ctype_xdigit) != 0)

1014

{

1015

int cc = *pt++;

1016

count++;

1017

#if !EBCDIC /* ASCII coding */

1018

if (cc >= 'a') cc -= 32; /* Convert to upper case */

1019

c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10));

1020

#else /* EBCDIC coding */

1021

if (cc >= 'a' && cc <= 'z') cc += 64; /* Convert to upper case */

1022

c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10));

1023

#endif

1024

}

1025

if (*pt == '}')

1026

{

1027

if (c < 0 || count > 8) *errorptr = ERR34;

1028

ptr = pt;

1029

break;

1030

}

1031

/* If the sequence of hex digits does not end with '}', then we don't

1032

recognize this construct; fall through to the normal \x handling. */

1033

}

1034

#endif

1035

1036

/* Read just a single hex char */

1037

1038

c = 0;

1039

while (i++ < 2 && (digitab[ptr[1]] & ctype_xdigit) != 0)

1040

{

1041

int cc; /* Some compilers don't like ++ */

1042

cc = *(++ptr); /* in initializers */

1043

#if !EBCDIC /* ASCII coding */

1044

if (cc >= 'a') cc -= 32; /* Convert to upper case */

1045

c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10));

1046

#else /* EBCDIC coding */

1047

if (cc <= 'z') cc += 64; /* Convert to upper case */

1048

c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10));

1049

#endif

1050

}

1051

break;

1052

1053

/* Other special escapes not starting with a digit are straightforward */

1054

1055

case 'c':

1056

c = *(++ptr);

1057

if (c == 0)

1058

{

1059

*errorptr = ERR2;

1060

return 0;

1061

}

1062

1063

/* A letter is upper-cased; then the 0x40 bit is flipped. This coding

1064

is ASCII-specific, but then the whole concept of \cx is ASCII-specific.

1065

(However, an EBCDIC equivalent has now been added.) */

1066

1067

#if !EBCDIC /* ASCII coding */

1068

if (c >= 'a' && c <= 'z') c -= 32;

1069

c ^= 0x40;

1070

#else /* EBCDIC coding */

1071

if (c >= 'a' && c <= 'z') c += 64;

1072

c ^= 0xC0;

1073

#endif

1074

break;

1075

1076

/* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any

1077

other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,

1078

for Perl compatibility, it is a literal. This code looks a bit odd, but

1079

there used to be some cases other than the default, and there may be again

1080

in future, so I haven't "optimized" it. */

1081

1082

default:

1083

if ((options & PCRE_EXTRA) != 0) switch(c)

1084

{

1085

default:

1086

*errorptr = ERR3;

1087

break;

1088

}

1089

break;

1090

}

1091

}

1092

1093

*ptrptr = ptr;

1094

return c;

1095

}

1096

1097

1098

1099

#ifdef SUPPORT_UCP

1100

/*************************************************

1101

* Handle \P and \p *

1102

*************************************************/

1103

1104

/* This function is called after \P or \p has been encountered, provided that

1105

PCRE is compiled with support for Unicode properties. On entry, ptrptr is

1106

pointing at the P or p. On exit, it is pointing at the final character of the

1107

escape sequence.

1108

1109

Argument:

1110

ptrptr points to the pattern position pointer

1111

negptr points to a boolean that is set TRUE for negation else FALSE

1112

errorptr points to the pointer to the error message

1113

1114

Returns: value from ucp_type_table, or -1 for an invalid type

1115

1116

1117

static int

1118

get_ucp(const uschar **ptrptr, BOOL *negptr, const char **errorptr)

1119

{

1120

int c, i, bot, top;

1121

const uschar *ptr = *ptrptr;

1122

char name[4];

1123

1124

c = *(++ptr);

1125

if (c == 0) goto ERROR_RETURN;

1126

1127

*negptr = FALSE;

1128

1129

/* \P or \p can be followed by a one- or two-character name in {}, optionally

1130

preceded by ^ for negation. */

1131

1132

if (c == '{')

1133

{

1134

if (ptr[1] == '^')

1135

{

1136

*negptr = TRUE;

1137

ptr++;

1138

}

1139

for (i = 0; i <= 2; i++)

1140

{

1141

c = *(++ptr);

1142

if (c == 0) goto ERROR_RETURN;

1143

if (c == '}') break;

1144

name[i] = c;

1145

}

1146

if (c !='}') /* Try to distinguish error cases */

1147

{

1148

while (*(++ptr) != 0 && *ptr != '}');

1149

if (*ptr == '}') goto UNKNOWN_RETURN; else goto ERROR_RETURN;

1150

}

1151

name[i] = 0;

1152

}

1153

1154

/* Otherwise there is just one following character */

1155

1156

else

1157

{

1158

name[0] = c;

1159

name[1] = 0;

1160

}

1161

1162

*ptrptr = ptr;

1163

1164

/* Search for a recognized property name using binary chop */

1165

1166

bot = 0;

1167

top = sizeof(utt)/sizeof(ucp_type_table);

1168

1169

while (bot < top)

1170

{

1171

i = (bot + top)/2;

1172

c = strcmp(name, utt[i].name);

1173

if (c == 0) return utt[i].value;

1174

if (c > 0) bot = i + 1; else top = i;

1175

}

1176

1177

UNKNOWN_RETURN:

1178

*errorptr = ERR47;

1179

*ptrptr = ptr;

1180

return -1;

1181

1182

ERROR_RETURN:

1183

*errorptr = ERR46;

1184

*ptrptr = ptr;

1185

return -1;

1186

}

1187

#endif

1188

1189

1190

1191

1192

/*************************************************

1193

* Check for counted repeat *

1194

*************************************************/

1195

1196

/* This function is called when a '{' is encountered in a place where it might

1197

start a quantifier. It looks ahead to see if it really is a quantifier or not.

1198

It is only a quantifier if it is one of the forms {ddd} {ddd,} or {ddd,ddd}

1199

where the ddds are digits.

1200

1201

Arguments:

1202

p pointer to the first char after '{'

1203

1204

Returns: TRUE or FALSE

1205

1206

1207

static BOOL

1208

is_counted_repeat(const uschar *p)

1209

{

1210

if ((digitab[*p++] & ctype_digit) == 0) return FALSE;

1211

while ((digitab[*p] & ctype_digit) != 0) p++;

1212

if (*p == '}') return TRUE;

1213

1214

if (*p++ != ',') return FALSE;

1215

if (*p == '}') return TRUE;

1216

1217

if ((digitab[*p++] & ctype_digit) == 0) return FALSE;

1218

while ((digitab[*p] & ctype_digit) != 0) p++;

1219

1220

return (*p == '}');

1221

}

1222

1223

1224

1225

/*************************************************

1226

* Read repeat counts *

1227

*************************************************/

1228

1229

/* Read an item of the form {n,m} and return the values. This is called only

1230

after is_counted_repeat() has confirmed that a repeat-count quantifier exists,

1231

so the syntax is guaranteed to be correct, but we need to check the values.

1232

1233

Arguments:

1234

p pointer to first char after '{'

1235

minp pointer to int for min

1236

maxp pointer to int for max

1237

returned as -1 if no max

1238

errorptr points to pointer to error message

1239

1240

Returns: pointer to '}' on success;

1241

current ptr on error, with errorptr set

1242

1243

1244

static const uschar *

1245

read_repeat_counts(const uschar *p, int *minp, int *maxp, const char **errorptr)

1246

{

1247

int min = 0;

1248

int max = -1;

1249

1250

/* Read the minimum value and do a paranoid check: a negative value indicates

1251

an integer overflow. */

1252

1253

while ((digitab[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';

1254

if (min < 0 || min > 65535)

1255

{

1256

*errorptr = ERR5;

1257

return p;

1258

}

1259

1260

/* Read the maximum value if there is one, and again do a paranoid on its size.

1261

Also, max must not be less than min. */

1262

1263

if (*p == '}') max = min; else

1264

{

1265

if (*(++p) != '}')

1266

{

1267

max = 0;

1268

while((digitab[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';

1269

if (max < 0 || max > 65535)

1270

{

1271

*errorptr = ERR5;

1272

return p;

1273

}

1274

if (max < min)

1275

{

1276

*errorptr = ERR4;

1277

return p;

1278

}

1279

}

1280

}

1281

1282

/* Fill in the required variables, and pass back the pointer to the terminating

1283

'}'. */

1284

1285

*minp = min;

1286

*maxp = max;

1287

return p;

1288

}

1289

1290

1291

1292

/*************************************************

1293

* Find first significant op code *

1294

*************************************************/

1295

1296

/* This is called by several functions that scan a compiled expression looking

1297

for a fixed first character, or an anchoring op code etc. It skips over things

1298

that do not influence this. For some calls, a change of option is important.

1299

For some calls, it makes sense to skip negative forward and all backward

1300

assertions, and also the \b assertion; for others it does not.

1301

1302

Arguments:

1303

code pointer to the start of the group

1304

options pointer to external options

1305

optbit the option bit whose changing is significant, or

1306

zero if none are

1307

skipassert TRUE if certain assertions are to be skipped

1308

1309

Returns: pointer to the first significant opcode

1310

1311

1312

static const uschar*

1313

first_significant_code(const uschar *code, int *options, int optbit,

1314

BOOL skipassert)

1315

{

1316

for (;;)

1317

{

1318

switch ((int)*code)

1319

{

1320

case OP_OPT:

1321

if (optbit > 0 && ((int)code[1] & optbit) != (*options & optbit))

1322

*options = (int)code[1];

1323

code += 2;

1324

break;

1325

1326

case OP_ASSERT_NOT:

1327

case OP_ASSERTBACK:

1328

case OP_ASSERTBACK_NOT:

1329

if (!skipassert) return code;

1330

do code += GET(code, 1); while (*code == OP_ALT);

1331

code += OP_lengths[*code];

1332

break;

1333

1334

case OP_WORD_BOUNDARY:

1335

case OP_NOT_WORD_BOUNDARY:

1336

if (!skipassert) return code;

1337

/* Fall through */

1338

1339

case OP_CALLOUT:

1340

case OP_CREF:

1341

case OP_BRANUMBER:

1342

code += OP_lengths[*code];

1343

break;

1344

1345

default:

1346

return code;

1347

}

1348

}

1349

/* Control never reaches here */

1350

}

1351

1352

1353

1354

1355

/*************************************************

1356

* Find the fixed length of a pattern *

1357

*************************************************/

1358

1359

/* Scan a pattern and compute the fixed length of subject that will match it,

1360

if the length is fixed. This is needed for dealing with backward assertions.

1361

In UTF8 mode, the result is in characters rather than bytes.

1362

1363

Arguments:

1364

code points to the start of the pattern (the bracket)

1365

options the compiling options

1366

1367

Returns: the fixed length, or -1 if there is no fixed length,

1368

or -2 if \C was encountered

1369

1370

1371

static int

1372

find_fixedlength(uschar *code, int options)

1373

{

1374

int length = -1;

1375

1376

1377

1378

1379

/* Scan along the opcodes for this branch. If we get to the end of the

1380

branch, check the length against that of the other branches. */

1381

1382

for (;;)

1383

{

1384

int d;

1385

1386

if (op >= OP_BRA) op = OP_BRA;

1387

1388

switch (op)

1389

{

1390

case OP_BRA:

1391

case OP_ONCE:

1392

case OP_COND:

1393

d = find_fixedlength(cc, options);

1394

if (d < 0) return d;

1395

branchlength += d;

1396

do cc += GET(cc, 1); while (*cc == OP_ALT);

1397

cc += 1 + LINK_SIZE;

1398

break;

1399

1400

/* Reached end of a branch; if it's a ket it is the end of a nested

1401

call. If it's ALT it is an alternation in a nested call. If it is

1402

END it's the end of the outer call. All can be handled by the same code. */

1403

1404

case OP_ALT:

1405

case OP_KET:

1406

case OP_KETRMAX:

1407

case OP_KETRMIN:

1408

case OP_END:

1409

if (length < 0) length = branchlength;

1410

else if (length != branchlength) return -1;

1411

if (*cc != OP_ALT) return length;

1412

cc += 1 + LINK_SIZE;

1413

branchlength = 0;

1414

break;

1415

1416

/* Skip over assertive subpatterns */

1417

1418

case OP_ASSERT:

1419

case OP_ASSERT_NOT:

1420

case OP_ASSERTBACK:

1421

case OP_ASSERTBACK_NOT:

1422

do cc += GET(cc, 1); while (*cc == OP_ALT);

1423

/* Fall through */

1424

1425

/* Skip over things that don't match chars */

1426

1427

case OP_REVERSE:

1428

case OP_BRANUMBER:

1429

case OP_CREF:

1430

case OP_OPT:

1431

case OP_CALLOUT:

1432

case OP_SOD:

1433

case OP_SOM:

1434

case OP_EOD:

1435

case OP_EODN:

1436

case OP_CIRC:

1437

case OP_DOLL:

1438

case OP_NOT_WORD_BOUNDARY:

1439

case OP_WORD_BOUNDARY:

1440

cc += OP_lengths[*cc];

1441

break;

1442

1443

/* Handle literal characters */

1444

1445

case OP_CHAR:

1446

case OP_CHARNC:

1447

branchlength++;

1448

cc += 2;

1449

#ifdef SUPPORT_UTF8

1450

if ((options & PCRE_UTF8) != 0)

1451

{

1452

while ((*cc & 0xc0) == 0x80) cc++;

1453

}

1454

#endif

1455

break;

1456

1457

/* Handle exact repetitions. The count is already in characters, but we

1458

need to skip over a multibyte character in UTF8 mode. */

1459

1460

case OP_EXACT:

1461

branchlength += GET2(cc,1);

1462

cc += 4;

1463

#ifdef SUPPORT_UTF8

1464

if ((options & PCRE_UTF8) != 0)

1465

{

1466

while((*cc & 0x80) == 0x80) cc++;

1467

}

1468

#endif

1469

break;

1470

1471

case OP_TYPEEXACT:

1472

branchlength += GET2(cc,1);

1473

cc += 4;

1474

break;

1475

1476

/* Handle single-char matchers */

1477

1478

case OP_PROP:

1479

case OP_NOTPROP:

1480

cc++;

1481

/* Fall through */

1482

1483

case OP_NOT_DIGIT:

1484

case OP_DIGIT:

1485

case OP_NOT_WHITESPACE:

1486

case OP_WHITESPACE:

1487

case OP_NOT_WORDCHAR:

1488

case OP_WORDCHAR:

1489

case OP_ANY:

1490

branchlength++;

1491

cc++;

1492

break;

1493

1494

/* The single-byte matcher isn't allowed */

1495

1496

case OP_ANYBYTE:

1497

return -2;

1498

1499

/* Check a class for variable quantification */

1500

1501

#ifdef SUPPORT_UTF8

1502

case OP_XCLASS:

1503

cc += GET(cc, 1) - 33;

1504

/* Fall through */

1505

#endif

1506

1507

case OP_CLASS:

1508

case OP_NCLASS:

1509

cc += 33;

1510

1511

switch (*cc)

1512

{

1513

case OP_CRSTAR:

1514

case OP_CRMINSTAR:

1515

case OP_CRQUERY:

1516

case OP_CRMINQUERY:

1517

return -1;

1518

1519

case OP_CRRANGE:

1520

case OP_CRMINRANGE:

1521

if (GET2(cc,1) != GET2(cc,3)) return -1;

1522

branchlength += GET2(cc,1);

1523

cc += 5;

1524

break;

1525

1526

default:

1527

branchlength++;

1528

}

1529

break;

1530

1531

/* Anything else is variable length */

1532

1533

default:

1534

return -1;

1535

}

1536

}

1537

/* Control never gets here */

1538

}

1539

1540

1541

1542

1543

/*************************************************

1544

* Scan compiled regex for numbered bracket *

1545

*************************************************/

1546

1547

/* This little function scans through a compiled pattern until it finds a

1548

capturing bracket with the given number.

1549

1550

Arguments:

1551

code points to start of expression

1552

utf8 TRUE in UTF-8 mode

1553

number the required bracket number

1554

1555

Returns: pointer to the opcode for the bracket, or NULL if not found

1556

1557

1558

static const uschar *

1559

find_bracket(const uschar *code, BOOL utf8, int number)

1560

{

1561

#ifndef SUPPORT_UTF8

1562

utf8 = utf8; /* Stop pedantic compilers complaining */

1563

#endif

1564

1565

for (;;)

1566

{

1567

1568

if (c == OP_END) return NULL;

1569

else if (c > OP_BRA)

1570

{

1571

int n = c - OP_BRA;

1572

if (n > EXTRACT_BASIC_MAX) n = GET2(code, 2+LINK_SIZE);

1573

if (n == number) return (uschar *)code;

1574

code += OP_lengths[OP_BRA];

1575

}

1576

else

1577

{

1578

code += OP_lengths[c];

1579

1580

#ifdef SUPPORT_UTF8

1581

1582

/* In UTF-8 mode, opcodes that are followed by a character may be followed

1583

by a multi-byte character. The length in the table is a minimum, so we have

1584

to scan along to skip the extra bytes. All opcodes are less than 128, so we

1585

can use relatively efficient code. */

1586

1587

if (utf8) switch(c)

1588

{

1589

case OP_CHAR:

1590

case OP_CHARNC:

1591

case OP_EXACT:

1592

case OP_UPTO:

1593

case OP_MINUPTO:

1594

case OP_STAR:

1595

case OP_MINSTAR:

1596

case OP_PLUS:

1597

case OP_MINPLUS:

1598

case OP_QUERY:

1599

case OP_MINQUERY:

1600

while ((*code & 0xc0) == 0x80) code++;

1601

break;

1602

1603

/* XCLASS is used for classes that cannot be represented just by a bit

1604

map. This includes negated single high-valued characters. The length in

1605

the table is zero; the actual length is stored in the compiled code. */

1606

1607

case OP_XCLASS:

1608

code += GET(code, 1) + 1;

1609

break;

1610

}

1611

#endif

1612

}

1613

}

1614

}

1615

1616

1617

1618

/*************************************************

1619

* Scan compiled regex for recursion reference *

1620

*************************************************/

1621

1622

/* This little function scans through a compiled pattern until it finds an

1623

instance of OP_RECURSE.

1624

1625

Arguments:

1626

code points to start of expression

1627

utf8 TRUE in UTF-8 mode

1628

1629

Returns: pointer to the opcode for OP_RECURSE, or NULL if not found

1630

1631

1632

static const uschar *

1633

find_recurse(const uschar *code, BOOL utf8)

1634

{

1635

#ifndef SUPPORT_UTF8

1636

utf8 = utf8; /* Stop pedantic compilers complaining */

1637

#endif

1638

1639

for (;;)

1640

{

1641

1642

if (c == OP_END) return NULL;

1643

else if (c == OP_RECURSE) return code;

1644

else if (c > OP_BRA)

1645

{

1646

code += OP_lengths[OP_BRA];

1647

}

1648

else

1649

{

1650

code += OP_lengths[c];

1651

1652

#ifdef SUPPORT_UTF8

1653

1654

/* In UTF-8 mode, opcodes that are followed by a character may be followed

1655

by a multi-byte character. The length in the table is a minimum, so we have

1656

to scan along to skip the extra bytes. All opcodes are less than 128, so we

1657

can use relatively efficient code. */

1658

1659

if (utf8) switch(c)

1660

{

1661

case OP_CHAR:

1662

case OP_CHARNC:

1663

case OP_EXACT:

1664

case OP_UPTO:

1665

case OP_MINUPTO:

1666

case OP_STAR:

1667

case OP_MINSTAR:

1668

case OP_PLUS:

1669

case OP_MINPLUS:

1670

case OP_QUERY:

1671

case OP_MINQUERY:

1672

while ((*code & 0xc0) == 0x80) code++;

1673

break;

1674

1675

/* XCLASS is used for classes that cannot be represented just by a bit

1676

map. This includes negated single high-valued characters. The length in

1677

the table is zero; the actual length is stored in the compiled code. */

1678

1679

case OP_XCLASS:

1680

code += GET(code, 1) + 1;

1681

break;

1682

}

1683

#endif

1684

}

1685

}

1686

}

1687

1688

1689

1690

/*************************************************

1691

* Scan compiled branch for non-emptiness *

1692

*************************************************/

1693

1694

/* This function scans through a branch of a compiled pattern to see whether it

1695

can match the empty string or not. It is called only from could_be_empty()

1696

below. Note that first_significant_code() skips over assertions. If we hit an

1697

unclosed bracket, we return "empty" - this means we've struck an inner bracket

1698

whose current branch will already have been scanned.

1699

1700

Arguments:

1701

code points to start of search

1702

endcode points to where to stop

1703

utf8 TRUE if in UTF8 mode

1704

1705

Returns: TRUE if what is matched could be empty

1706

1707

1708

static BOOL

1709

could_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8)

1710

{

1711

1712

for (code = first_significant_code(code + 1 + LINK_SIZE, NULL, 0, TRUE);

1713

code < endcode;

1714

code = first_significant_code(code + OP_lengths[c], NULL, 0, TRUE))

1715

{

1716

const uschar *ccode;

1717

1718

c = *code;

1719

1720

if (c >= OP_BRA)

1721

{

1722

BOOL empty_branch;

1723

if (GET(code, 1) == 0) return TRUE; /* Hit unclosed bracket */

1724

1725

/* Scan a closed bracket */

1726

1727

empty_branch = FALSE;

1728

1729

{

1730

if (!empty_branch && could_be_empty_branch(code, endcode, utf8))

1731

empty_branch = TRUE;

1732

code += GET(code, 1);

1733

}

1734

while (*code == OP_ALT);

1735

if (!empty_branch) return FALSE; /* All branches are non-empty */

1736

code += 1 + LINK_SIZE;

1737

c = *code;

1738

}

1739

1740

else switch (c)

1741

{

1742

/* Check for quantifiers after a class */

1743

1744

#ifdef SUPPORT_UTF8

1745

case OP_XCLASS:

1746

ccode = code + GET(code, 1);

1747

goto CHECK_CLASS_REPEAT;

1748

#endif

1749

1750

case OP_CLASS:

1751

case OP_NCLASS:

1752

ccode = code + 33;

1753

1754

#ifdef SUPPORT_UTF8

1755

CHECK_CLASS_REPEAT:

1756

#endif

1757

1758

switch (*ccode)

1759

{

1760

case OP_CRSTAR: /* These could be empty; continue */

1761

case OP_CRMINSTAR:

1762

case OP_CRQUERY:

1763

case OP_CRMINQUERY:

1764

break;

1765

1766

default: /* Non-repeat => class must match */

1767

case OP_CRPLUS: /* These repeats aren't empty */

1768

case OP_CRMINPLUS:

1769

return FALSE;

1770

1771

case OP_CRRANGE:

1772

case OP_CRMINRANGE:

1773

if (GET2(ccode, 1) > 0) return FALSE; /* Minimum > 0 */

1774

break;

1775

}

1776

break;

1777

1778

/* Opcodes that must match a character */

1779

1780

case OP_PROP:

1781

case OP_NOTPROP:

1782

case OP_EXTUNI:

1783

case OP_NOT_DIGIT:

1784

case OP_DIGIT:

1785

case OP_NOT_WHITESPACE:

1786

case OP_WHITESPACE:

1787

case OP_NOT_WORDCHAR:

1788

case OP_WORDCHAR:

1789

case OP_ANY:

1790

case OP_ANYBYTE:

1791

case OP_CHAR:

1792

case OP_CHARNC:

1793

case OP_NOT:

1794

case OP_PLUS:

1795

case OP_MINPLUS:

1796

case OP_EXACT:

1797

case OP_NOTPLUS:

1798

case OP_NOTMINPLUS:

1799

case OP_NOTEXACT:

1800

case OP_TYPEPLUS:

1801

case OP_TYPEMINPLUS:

1802

case OP_TYPEEXACT:

1803

return FALSE;

1804

1805

/* End of branch */

1806

1807

case OP_KET:

1808

case OP_KETRMAX:

1809

case OP_KETRMIN:

1810

case OP_ALT:

1811

return TRUE;

1812

1813

/* In UTF-8 mode, STAR, MINSTAR, QUERY, MINQUERY, UPTO, and MINUPTO may be

1814

followed by a multibyte character */

1815

1816

#ifdef SUPPORT_UTF8

1817

case OP_STAR:

1818

case OP_MINSTAR:

1819

case OP_QUERY:

1820

case OP_MINQUERY:

1821

case OP_UPTO:

1822

case OP_MINUPTO:

1823

if (utf8) while ((code[2] & 0xc0) == 0x80) code++;

1824

break;

1825

#endif

1826

}

1827

}

1828

1829

return TRUE;

1830

}

1831

1832

1833

1834

/*************************************************

1835

* Scan compiled regex for non-emptiness *

1836

*************************************************/

1837

1838

/* This function is called to check for left recursive calls. We want to check

1839

the current branch of the current pattern to see if it could match the empty

1840

string. If it could, we must look outwards for branches at other levels,

1841

stopping when we pass beyond the bracket which is the subject of the recursion.

1842

1843

Arguments:

1844

code points to start of the recursion

1845

endcode points to where to stop (current RECURSE item)

1846

bcptr points to the chain of current (unclosed) branch starts

1847

utf8 TRUE if in UTF-8 mode

1848

1849

Returns: TRUE if what is matched could be empty

1850

1851

1852

static BOOL

1853

could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,

1854

BOOL utf8)

1855

{

1856

while (bcptr != NULL && bcptr->current >= code)

1857

{

1858

if (!could_be_empty_branch(bcptr->current, endcode, utf8)) return FALSE;

1859

bcptr = bcptr->outer;

1860

}

1861

return TRUE;

1862

}

1863

1864

1865

1866

/*************************************************

1867

* Check for POSIX class syntax *

1868

*************************************************/

1869

1870

/* This function is called when the sequence "[:" or "[." or "[=" is

1871

encountered in a character class. It checks whether this is followed by an

1872

optional ^ and then a sequence of letters, terminated by a matching ":]" or

1873

".]" or "=]".

1874

1875

Argument:

1876

ptr pointer to the initial [

1877

endptr where to return the end pointer

1878

cd pointer to compile data

1879

1880

Returns: TRUE or FALSE

1881

1882

1883

static BOOL

1884

check_posix_syntax(const uschar *ptr, const uschar **endptr, compile_data *cd)

1885

{

1886

int terminator; /* Don't combine these lines; the Solaris cc */

1887

terminator = *(++ptr); /* compiler warns about "non-constant" initializer. */

1888

if (*(++ptr) == '^') ptr++;

1889

while ((cd->ctypes[*ptr] & ctype_letter) != 0) ptr++;

1890

if (*ptr == terminator && ptr[1] == ']')

1891

{

1892

*endptr = ptr;

1893

return TRUE;

1894

}

1895

return FALSE;

1896

}

1897

1898

1899

1900

1901

/*************************************************

1902

* Check POSIX class name *

1903

*************************************************/

1904

1905

/* This function is called to check the name given in a POSIX-style class entry

1906

such as [:alnum:].

1907

1908

Arguments:

1909

ptr points to the first letter

1910

len the length of the name

1911

1912

Returns: a value representing the name, or -1 if unknown

1913

1914

1915

static int

1916

check_posix_name(const uschar *ptr, int len)

1917

{

1918

1919

while (posix_name_lengths[yield] != 0)

1920

{

1921

if (len == posix_name_lengths[yield] &&

1922

strncmp((const char *)ptr, posix_names[yield], len) == 0) return yield;

1923

yield++;

1924

}

1925

return -1;

1926

}

1927

1928

1929

/*************************************************

1930

* Adjust OP_RECURSE items in repeated group *

1931

*************************************************/

1932

1933

/* OP_RECURSE items contain an offset from the start of the regex to the group

1934

that is referenced. This means that groups can be replicated for fixed

1935

repetition simply by copying (because the recursion is allowed to refer to

1936

earlier groups that are outside the current group). However, when a group is

1937

optional (i.e. the minimum quantifier is zero), OP_BRAZERO is inserted before

1938

it, after it has been compiled. This means that any OP_RECURSE items within it

1939

that refer to the group itself or any contained groups have to have their

1940

offsets adjusted. That is the job of this function. Before it is called, the

1941

partially compiled regex must be temporarily terminated with OP_END.

1942

1943

Arguments:

1944

group points to the start of the group

1945

adjust the amount by which the group is to be moved

1946

utf8 TRUE in UTF-8 mode

1947

cd contains pointers to tables etc.

1948

1949

Returns: nothing

1950

1951

1952

static void

1953

adjust_recurse(uschar *group, int adjust, BOOL utf8, compile_data *cd)

1954

{

1955

uschar *ptr = group;

1956

while ((ptr = (uschar *)find_recurse(ptr, utf8)) != NULL)

1957

{

1958

int offset = GET(ptr, 1);

1959

if (cd->start_code + offset >= group) PUT(ptr, 1, offset + adjust);

1960

ptr += 1 + LINK_SIZE;

1961

}

1962

}

1963

1964

1965

1966

/*************************************************

1967

* Insert an automatic callout point *

1968

*************************************************/

1969

1970

/* This function is called when the PCRE_AUTO_CALLOUT option is set, to insert

1971

callout points before each pattern item.

1972

1973

Arguments:

1974

code current code pointer

1975

ptr current pattern pointer

1976

cd pointers to tables etc

1977

1978

Returns: new code pointer

1979

1980

1981

static uschar *

1982

auto_callout(uschar *code, const uschar *ptr, compile_data *cd)

1983

{

1984

*code++ = OP_CALLOUT;

1985

*code++ = 255;

1986

PUT(code, 0, ptr - cd->start_pattern); /* Pattern offset */

1987

PUT(code, LINK_SIZE, 0); /* Default length */

1988

return code + 2*LINK_SIZE;

1989

}

1990

1991

1992

1993

/*************************************************

1994

* Complete a callout item *

1995

*************************************************/

1996

1997

/* A callout item contains the length of the next item in the pattern, which

1998

we can't fill in till after we have reached the relevant point. This is used

1999

for both automatic and manual callouts.

2000

2001

Arguments:

2002

previous_callout points to previous callout item

2003

ptr current pattern pointer

2004

cd pointers to tables etc

2005

2006

Returns: nothing

2007

2008

2009

static void

2010

complete_callout(uschar *previous_callout, const uschar *ptr, compile_data *cd)

2011

{

2012

int length = ptr - cd->start_pattern - GET(previous_callout, 2);

2013

PUT(previous_callout, 2 + LINK_SIZE, length);

2014

}

2015

2016

2017

2018

#ifdef SUPPORT_UCP

2019

/*************************************************

2020

* Get othercase range *

2021

*************************************************/

2022

2023

/* This function is passed the start and end of a class range, in UTF-8 mode

2024

with UCP support. It searches up the characters, looking for internal ranges of

2025

characters in the "other" case. Each call returns the next one, updating the

2026

start address.

2027

2028

Arguments:

2029

cptr points to starting character value; updated

2030

d end value

2031

ocptr where to put start of othercase range

2032

odptr where to put end of othercase range

2033

2034

Yield: TRUE when range returned; FALSE when no more

2035

2036

2037

static BOOL

2038

get_othercase_range(int *cptr, int d, int *ocptr, int *odptr)

2039

{

2040

int c, chartype, othercase, next;

2041

2042

for (c = *cptr; c <= d; c++)

2043

{

2044

if (ucp_findchar(c, &chartype, &othercase) == ucp_L && othercase != 0) break;

2045

}

2046

2047

if (c > d) return FALSE;

2048

2049

*ocptr = othercase;

2050

next = othercase + 1;

2051

2052

for (++c; c <= d; c++)

2053

{

2054

if (ucp_findchar(c, &chartype, &othercase) != ucp_L || othercase != next)

2055

break;

2056

next++;

2057

}

2058

2059

*odptr = next - 1;

2060

*cptr = c;

2061

2062

return TRUE;

2063

}

2064

#endif /* SUPPORT_UCP */

2065

2066

2067

/*************************************************

2068

* Compile one branch *

2069

*************************************************/

2070

2071

/* Scan the pattern, compiling it into the code vector. If the options are

2072

changed during the branch, the pointer is used to change the external options

2073

bits.

2074

2075

Arguments:

2076

optionsptr pointer to the option bits

2077

brackets points to number of extracting brackets used

2078

codeptr points to the pointer to the current code point

2079

ptrptr points to the current pattern pointer

2080

errorptr points to pointer to error message

2081

firstbyteptr set to initial literal character, or < 0 (REQ_UNSET, REQ_NONE)

2082

reqbyteptr set to the last literal character required, else < 0

2083

bcptr points to current branch chain

2084

cd contains pointers to tables etc.

2085

2086

Returns: TRUE on success

2087

FALSE, with *errorptr set on error

2088

2089

2090

static BOOL

2091

compile_branch(int *optionsptr, int *brackets, uschar **codeptr,

2092

const uschar **ptrptr, const char **errorptr, int *firstbyteptr,

2093

int *reqbyteptr, branch_chain *bcptr, compile_data *cd)

2094

{

2095

int repeat_type, op_type;

2096

int repeat_min = 0, repeat_max = 0; /* To please picky compilers */

2097

int bravalue = 0;

2098

int greedy_default, greedy_non_default;

2099

int firstbyte, reqbyte;

2100

int zeroreqbyte, zerofirstbyte;

2101

int req_caseopt, reqvary, tempreqvary;

2102

int condcount = 0;

2103

int options = *optionsptr;

2104

int after_manual_callout = 0;

2105

2106

2107

uschar *tempcode;

2108

BOOL inescq = FALSE;

2109

BOOL groupsetfirstbyte = FALSE;

2110

const uschar *ptr = *ptrptr;

2111

const uschar *tempptr;

2112

uschar *previous = NULL;

2113

uschar *previous_callout = NULL;

2114

uschar classbits[32];

2115

2116

#ifdef SUPPORT_UTF8

2117

BOOL class_utf8;

2118

BOOL utf8 = (options & PCRE_UTF8) != 0;

2119

uschar *class_utf8data;

2120

uschar utf8_char[6];

2121

#else

2122

BOOL utf8 = FALSE;

2123

#endif

2124

2125

/* Set up the default and non-default settings for greediness */

2126

2127

greedy_default = ((options & PCRE_UNGREEDY) != 0);

2128

greedy_non_default = greedy_default ^ 1;

2129

2130

/* Initialize no first byte, no required byte. REQ_UNSET means "no char

2131

matching encountered yet". It gets changed to REQ_NONE if we hit something that

2132

matches a non-fixed char first char; reqbyte just remains unset if we never

2133

find one.

2134

2135

When we hit a repeat whose minimum is zero, we may have to adjust these values

2136

to take the zero repeat into account. This is implemented by setting them to

2137

zerofirstbyte and zeroreqbyte when such a repeat is encountered. The individual

2138

item types that can be repeated set these backoff variables appropriately. */

2139

2140

firstbyte = reqbyte = zerofirstbyte = zeroreqbyte = REQ_UNSET;

2141

2142

/* The variable req_caseopt contains either the REQ_CASELESS value or zero,

2143

according to the current setting of the caseless flag. REQ_CASELESS is a bit

2144

value > 255. It is added into the firstbyte or reqbyte variables to record the

2145

case status of the value. This is used only for ASCII characters. */

2146

2147

req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;

2148

2149

/* Switch on next character until the end of the branch */

2150

2151

for (;; ptr++)

2152

{

2153

BOOL negate_class;

2154

BOOL possessive_quantifier;

2155

BOOL is_quantifier;

2156

int class_charcount;

2157

int class_lastchar;

2158

int newoptions;

2159

int recno;

2160

int skipbytes;

2161

int subreqbyte;

2162

int subfirstbyte;

2163

int mclength;

2164

uschar mcbuffer[8];

2165

2166

/* Next byte in the pattern */

2167

2168

c = *ptr;

2169

2170

/* If in \Q...\E, check for the end; if not, we have a literal */

2171

2172

if (inescq && c != 0)

2173

{

2174

if (c == '\\' && ptr[1] == 'E')

2175

{

2176

inescq = FALSE;

2177

ptr++;

2178

continue;

2179

}

2180

else

2181

{

2182

if (previous_callout != NULL)

2183

{

2184

complete_callout(previous_callout, ptr, cd);

2185

previous_callout = NULL;

2186

}

2187

if ((options & PCRE_AUTO_CALLOUT) != 0)

2188

{

2189

previous_callout = code;

2190

code = auto_callout(code, ptr, cd);

2191

}

2192

goto NORMAL_CHAR;

2193

}

2194

}

2195

2196

/* Fill in length of a previous callout, except when the next thing is

2197

a quantifier. */

2198

2199

is_quantifier = c == '*' || c == '+' || c == '?' ||

2200

(c == '{' && is_counted_repeat(ptr+1));

2201

2202

if (!is_quantifier && previous_callout != NULL &&

2203

after_manual_callout-- <= 0)

2204

{

2205

complete_callout(previous_callout, ptr, cd);

2206

previous_callout = NULL;

2207

}

2208

2209

/* In extended mode, skip white space and comments */

2210

2211

if ((options & PCRE_EXTENDED) != 0)

2212

{

2213

if ((cd->ctypes[c] & ctype_space) != 0) continue;

2214

if (c == '#')

2215

{

2216

/* The space before the ; is to avoid a warning on a silly compiler

2217

on the Macintosh. */

2218

while ((c = *(++ptr)) != 0 && c != NEWLINE) ;

2219

if (c != 0) continue; /* Else fall through to handle end of string */

2220

}

2221

}

2222

2223

/* No auto callout for quantifiers. */

2224

2225

if ((options & PCRE_AUTO_CALLOUT) != 0 && !is_quantifier)

2226

{

2227

previous_callout = code;

2228

code = auto_callout(code, ptr, cd);

2229

}

2230

2231

switch(c)

2232

{

2233

/* The branch terminates at end of string, |, or ). */

2234

2235

case 0:

2236

case '|':

2237

case ')':

2238

*firstbyteptr = firstbyte;

2239

*reqbyteptr = reqbyte;

2240

*codeptr = code;

2241

*ptrptr = ptr;

2242

return TRUE;

2243

2244

/* Handle single-character metacharacters. In multiline mode, ^ disables

2245

the setting of any following char as a first character. */

2246

2247

case '^':

2248

if ((options & PCRE_MULTILINE) != 0)

2249

{

2250

if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;

2251

}

2252

previous = NULL;

2253

*code++ = OP_CIRC;

2254

break;

2255

2256

case '$':

2257

previous = NULL;

2258

*code++ = OP_DOLL;

2259

break;

2260

2261

/* There can never be a first char if '.' is first, whatever happens about

2262

repeats. The value of reqbyte doesn't change either. */

2263

2264

case '.':

2265

if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;

2266

zerofirstbyte = firstbyte;

2267

zeroreqbyte = reqbyte;

2268

previous = code;

2269

*code++ = OP_ANY;

2270

break;

2271

2272

/* Character classes. If the included characters are all < 255 in value, we

2273

build a 32-byte bitmap of the permitted characters, except in the special

2274

case where there is only one such character. For negated classes, we build

2275

the map as usual, then invert it at the end. However, we use a different

2276

opcode so that data characters > 255 can be handled correctly.

2277

2278

If the class contains characters outside the 0-255 range, a different

2279

opcode is compiled. It may optionally have a bit map for characters < 256,

2280

but those above are are explicitly listed afterwards. A flag byte tells

2281

whether the bitmap is present, and whether this is a negated class or not.

2282

2283

2284

case '[':

2285

previous = code;

2286

2287

/* PCRE supports POSIX class stuff inside a class. Perl gives an error if

2288

they are encountered at the top level, so we'll do that too. */

2289

2290

if ((ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&

2291

check_posix_syntax(ptr, &tempptr, cd))

2292

{

2293

*errorptr = (ptr[1] == ':')? ERR13 : ERR31;

2294

goto FAILED;

2295

}

2296

2297

/* If the first character is '^', set the negation flag and skip it. */

2298

2299

if ((c = *(++ptr)) == '^')

2300

{

2301

negate_class = TRUE;

2302

c = *(++ptr);

2303

}

2304

else

2305

{

2306

negate_class = FALSE;

2307

}

2308

2309

/* Keep a count of chars with values < 256 so that we can optimize the case

2310

of just a single character (as long as it's < 256). For higher valued UTF-8

2311

characters, we don't yet do any optimization. */

2312

2313

class_charcount = 0;

2314

class_lastchar = -1;

2315

2316

#ifdef SUPPORT_UTF8

2317

class_utf8 = FALSE; /* No chars >= 256 */

2318

class_utf8data = code + LINK_SIZE + 34; /* For UTF-8 items */

2319

#endif

2320

2321

/* Initialize the 32-char bit map to all zeros. We have to build the

2322

map in a temporary bit of store, in case the class contains only 1

2323

character (< 256), because in that case the compiled code doesn't use the

2324

bit map. */

2325

2326

memset(classbits, 0, 32 * sizeof(uschar));

2327

2328

/* Process characters until ] is reached. By writing this as a "do" it

2329

means that an initial ] is taken as a data character. The first pass

2330

through the regex checked the overall syntax, so we don't need to be very

2331

strict here. At the start of the loop, c contains the first byte of the

2332

character. */

2333

2334

2335

{

2336

#ifdef SUPPORT_UTF8

2337

if (utf8 && c > 127)

2338

{ /* Braces are required because the */

2339

GETCHARLEN(c, ptr, ptr); /* macro generates multiple statements */

2340

}

2341

#endif

2342

2343

/* Inside \Q...\E everything is literal except \E */

2344

2345

if (inescq)

2346

{

2347

if (c == '\\' && ptr[1] == 'E')

2348

{

2349

inescq = FALSE;

2350

ptr++;

2351

continue;

2352

}

2353

else goto LONE_SINGLE_CHARACTER;

2354

}

2355

2356

/* Handle POSIX class names. Perl allows a negation extension of the

2357

form [:^name:]. A square bracket that doesn't match the syntax is

2358

treated as a literal. We also recognize the POSIX constructions

2359

[.ch.] and [=ch=] ("collating elements") and fault them, as Perl

2360

5.6 and 5.8 do. */

2361

2362

if (c == '[' &&

2363

(ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&

2364

check_posix_syntax(ptr, &tempptr, cd))

2365

{

2366

BOOL local_negate = FALSE;

2367

int posix_class, i;

2368

2369

2370

if (ptr[1] != ':')

2371

{

2372

*errorptr = ERR31;

2373

goto FAILED;

2374

}

2375

2376

ptr += 2;

2377

if (*ptr == '^')

2378

{

2379

local_negate = TRUE;

2380

ptr++;

2381

}

2382

2383

posix_class = check_posix_name(ptr, tempptr - ptr);

2384

if (posix_class < 0)

2385

{

2386

*errorptr = ERR30;

2387

goto FAILED;

2388

}

2389

2390

/* If matching is caseless, upper and lower are converted to

2391

alpha. This relies on the fact that the class table starts with

2392

alpha, lower, upper as the first 3 entries. */

2393

2394

if ((options & PCRE_CASELESS) != 0 && posix_class <= 2)

2395

posix_class = 0;

2396

2397

/* Or into the map we are building up to 3 of the static class

2398

tables, or their negations. The [:blank:] class sets up the same

2399

chars as the [:space:] class (all white space). We remove the vertical

2400

white space chars afterwards. */

2401

2402

posix_class *= 3;

2403

for (i = 0; i < 3; i++)

2404

{

2405

BOOL blankclass = strncmp((char *)ptr, "blank", 5) == 0;

2406

int taboffset = posix_class_maps[posix_class + i];

2407

if (taboffset < 0) break;

2408

if (local_negate)

2409

{

2410

if (i == 0)

2411

for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+taboffset];

2412

else

2413

for (c = 0; c < 32; c++) classbits[c] &= ~cbits[c+taboffset];

2414

if (blankclass) classbits[1] |= 0x3c;

2415

}

2416

else

2417

{

2418

for (c = 0; c < 32; c++) classbits[c] |= cbits[c+taboffset];

2419

if (blankclass) classbits[1] &= ~0x3c;

2420

}

2421

}

2422

2423

ptr = tempptr + 1;

2424

class_charcount = 10; /* Set > 1; assumes more than 1 per class */

2425

continue; /* End of POSIX syntax handling */

2426

}

2427

2428

/* Backslash may introduce a single character, or it may introduce one

2429

of the specials, which just set a flag. Escaped items are checked for

2430

validity in the pre-compiling pass. The sequence \b is a special case.

2431

Inside a class (and only there) it is treated as backspace. Elsewhere

2432

it marks a word boundary. Other escapes have preset maps ready to

2433

or into the one we are building. We assume they have more than one

2434

character in them, so set class_charcount bigger than one. */

2435

2436

if (c == '\\')

2437

{

2438

c = check_escape(&ptr, errorptr, *brackets, options, TRUE);

2439

2440

if (-c == ESC_b) c = '\b'; /* \b is backslash in a class */

2441

else if (-c == ESC_X) c = 'X'; /* \X is literal X in a class */

2442

else if (-c == ESC_Q) /* Handle start of quoted string */

2443

{

2444

if (ptr[1] == '\\' && ptr[2] == 'E')

2445

{

2446

ptr += 2; /* avoid empty string */

2447

}

2448

else inescq = TRUE;

2449

continue;

2450

}

2451

2452

if (c < 0)

2453

{

2454

2455

class_charcount += 2; /* Greater than 1 is what matters */

2456

switch (-c)

2457

{

2458

case ESC_d:

2459

for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_digit];

2460

continue;

2461

2462

case ESC_D:

2463

for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit];

2464

continue;

2465

2466

case ESC_w:

2467

for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_word];

2468

continue;

2469

2470

case ESC_W:

2471

for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word];

2472

continue;

2473

2474

case ESC_s:

2475

for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_space];

2476

classbits[1] &= ~0x08; /* Perl 5.004 onwards omits VT from \s */

2477

continue;

2478

2479

case ESC_S:

2480

for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space];

2481

classbits[1] |= 0x08; /* Perl 5.004 onwards omits VT from \s */

2482

continue;

2483

2484

#ifdef SUPPORT_UCP

2485

case ESC_p:

2486

case ESC_P:

2487

{

2488

BOOL negated;

2489

int property = get_ucp(&ptr, &negated, errorptr);

2490

if (property < 0) goto FAILED;

2491

class_utf8 = TRUE;

2492

*class_utf8data++ = ((-c == ESC_p) != negated)?

2493

XCL_PROP : XCL_NOTPROP;

2494

*class_utf8data++ = property;

2495

class_charcount -= 2; /* Not a < 256 character */

2496

}

2497

continue;

2498

#endif

2499

2500

/* Unrecognized escapes are faulted if PCRE is running in its

2501

strict mode. By default, for compatibility with Perl, they are

2502

treated as literals. */

2503

2504

default:

2505

if ((options & PCRE_EXTRA) != 0)

2506

{

2507

*errorptr = ERR7;

2508

goto FAILED;

2509

}

2510

c = *ptr; /* The final character */

2511

class_charcount -= 2; /* Undo the default count from above */

2512

}

2513

}

2514

2515

/* Fall through if we have a single character (c >= 0). This may be

2516

> 256 in UTF-8 mode. */

2517

2518

} /* End of backslash handling */

2519

2520

/* A single character may be followed by '-' to form a range. However,

2521

Perl does not permit ']' to be the end of the range. A '-' character

2522

here is treated as a literal. */

2523

2524

if (ptr[1] == '-' && ptr[2] != ']')

2525

{

2526

int d;

2527

ptr += 2;

2528

2529

#ifdef SUPPORT_UTF8

2530

if (utf8)

2531

{ /* Braces are required because the */

2532

GETCHARLEN(d, ptr, ptr); /* macro generates multiple statements */

2533

}

2534

else

2535

#endif

2536

d = *ptr; /* Not UTF-8 mode */

2537

2538

/* The second part of a range can be a single-character escape, but

2539

not any of the other escapes. Perl 5.6 treats a hyphen as a literal

2540

in such circumstances. */

2541

2542

if (d == '\\')

2543

{

2544

const uschar *oldptr = ptr;

2545

d = check_escape(&ptr, errorptr, *brackets, options, TRUE);

2546

2547

/* \b is backslash; \X is literal X; any other special means the '-'

2548

was literal */

2549

2550

if (d < 0)

2551

{

2552

if (d == -ESC_b) d = '\b';

2553

else if (d == -ESC_X) d = 'X'; else

2554

{

2555

ptr = oldptr - 2;

2556

goto LONE_SINGLE_CHARACTER; /* A few lines below */

2557

}

2558

}

2559

}

2560

2561

/* The check that the two values are in the correct order happens in

2562

the pre-pass. Optimize one-character ranges */

2563

2564

if (d == c) goto LONE_SINGLE_CHARACTER; /* A few lines below */

2565

2566

/* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless

2567

matching, we have to use an XCLASS with extra data items. Caseless

2568

matching for characters > 127 is available only if UCP support is

2569

available. */

2570

2571

#ifdef SUPPORT_UTF8

2572

if (utf8 && (d > 255 || ((options & PCRE_CASELESS) != 0 && d > 127)))

2573

{

2574

class_utf8 = TRUE;

2575

2576

/* With UCP support, we can find the other case equivalents of

2577

the relevant characters. There may be several ranges. Optimize how

2578

they fit with the basic range. */

2579

2580

#ifdef SUPPORT_UCP

2581

if ((options & PCRE_CASELESS) != 0)

2582

{

2583

int occ, ocd;

2584

int cc = c;

2585

int origd = d;

2586

while (get_othercase_range(&cc, origd, &occ, &ocd))

2587

{

2588

if (occ >= c && ocd <= d) continue; /* Skip embedded ranges */

2589

2590

if (occ < c && ocd >= c - 1) /* Extend the basic range */

2591

{ /* if there is overlap, */

2592

c = occ; /* noting that if occ < c */

2593

continue; /* we can't have ocd > d */

2594

} /* because a subrange is */

2595

if (ocd > d && occ <= d + 1) /* always shorter than */

2596

{ /* the basic range. */

2597

d = ocd;

2598

continue;

2599

}

2600

2601

if (occ == ocd)

2602

{

2603

*class_utf8data++ = XCL_SINGLE;

2604

}

2605

else

2606

{

2607

*class_utf8data++ = XCL_RANGE;

2608

class_utf8data += ord2utf8(occ, class_utf8data);

2609

}

2610

class_utf8data += ord2utf8(ocd, class_utf8data);

2611

}

2612

}

2613

#endif /* SUPPORT_UCP */

2614

2615

/* Now record the original range, possibly modified for UCP caseless

2616

overlapping ranges. */

2617

2618

*class_utf8data++ = XCL_RANGE;

2619

class_utf8data += ord2utf8(c, class_utf8data);

2620

class_utf8data += ord2utf8(d, class_utf8data);

2621

2622

/* With UCP support, we are done. Without UCP support, there is no

2623

caseless matching for UTF-8 characters > 127; we can use the bit map

2624

for the smaller ones. */

2625

2626

#ifdef SUPPORT_UCP

2627

continue; /* With next character in the class */

2628

#else

2629

if ((options & PCRE_CASELESS) == 0 || c > 127) continue;

2630

2631

/* Adjust upper limit and fall through to set up the map */

2632

2633

d = 127;

2634

2635

#endif /* SUPPORT_UCP */

2636

}

2637

#endif /* SUPPORT_UTF8 */

2638

2639

/* We use the bit map for all cases when not in UTF-8 mode; else

2640

ranges that lie entirely within 0-127 when there is UCP support; else

2641

for partial ranges without UCP support. */

2642

2643

for (; c <= d; c++)

2644

{

2645

classbits[c/8] |= (1 << (c&7));

2646

if ((options & PCRE_CASELESS) != 0)

2647

{

2648

int uc = cd->fcc[c]; /* flip case */

2649

classbits[uc/8] |= (1 << (uc&7));

2650

}

2651

class_charcount++; /* in case a one-char range */

2652

class_lastchar = c;

2653

}

2654

2655

continue; /* Go get the next char in the class */

2656

}

2657

2658

/* Handle a lone single character - we can get here for a normal

2659

non-escape char, or after \ that introduces a single character or for an

2660

apparent range that isn't. */

2661

2662

LONE_SINGLE_CHARACTER:

2663

2664

/* Handle a character that cannot go in the bit map */

2665

2666

#ifdef SUPPORT_UTF8

2667

if (utf8 && (c > 255 || ((options & PCRE_CASELESS) != 0 && c > 127)))

2668

{

2669

class_utf8 = TRUE;

2670

*class_utf8data++ = XCL_SINGLE;

2671

class_utf8data += ord2utf8(c, class_utf8data);

2672

2673

#ifdef SUPPORT_UCP

2674

if ((options & PCRE_CASELESS) != 0)

2675

{

2676

int chartype;

2677

int othercase;

2678

if (ucp_findchar(c, &chartype, &othercase) >= 0 && othercase > 0)

2679

{

2680

*class_utf8data++ = XCL_SINGLE;

2681

class_utf8data += ord2utf8(othercase, class_utf8data);

2682

}

2683

}

2684

#endif /* SUPPORT_UCP */

2685

2686

}

2687

else

2688

#endif /* SUPPORT_UTF8 */

2689

2690

/* Handle a single-byte character */

2691

{

2692

classbits[c/8] |= (1 << (c&7));

2693

if ((options & PCRE_CASELESS) != 0)

2694

{

2695

c = cd->fcc[c]; /* flip case */

2696

classbits[c/8] |= (1 << (c&7));

2697

}

2698

class_charcount++;

2699

class_lastchar = c;

2700

}

2701

}

2702

2703

/* Loop until ']' reached; the check for end of string happens inside the

2704

loop. This "while" is the end of the "do" above. */

2705

2706

while ((c = *(++ptr)) != ']' || inescq);

2707

2708

/* If class_charcount is 1, we saw precisely one character whose value is

2709

less than 256. In non-UTF-8 mode we can always optimize. In UTF-8 mode, we

2710

can optimize the negative case only if there were no characters >= 128

2711

because OP_NOT and the related opcodes like OP_NOTSTAR operate on

2712

single-bytes only. This is an historical hangover. Maybe one day we can

2713

tidy these opcodes to handle multi-byte characters.

2714

2715

The optimization throws away the bit map. We turn the item into a

2716

1-character OP_CHAR[NC] if it's positive, or OP_NOT if it's negative. Note

2717

that OP_NOT does not support multibyte characters. In the positive case, it

2718

can cause firstbyte to be set. Otherwise, there can be no first char if

2719

this item is first, whatever repeat count may follow. In the case of

2720

reqbyte, save the previous value for reinstating. */

2721

2722

#ifdef SUPPORT_UTF8

2723

if (class_charcount == 1 &&

2724

(!utf8 ||

2725

(!class_utf8 && (!negate_class || class_lastchar < 128))))

2726

2727

#else

2728

if (class_charcount == 1)

2729

#endif

2730

{

2731

zeroreqbyte = reqbyte;

2732

2733

/* The OP_NOT opcode works on one-byte characters only. */

2734

2735

if (negate_class)

2736

{

2737

if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;

2738

zerofirstbyte = firstbyte;

2739

*code++ = OP_NOT;

2740

*code++ = class_lastchar;

2741

break;

2742

}

2743

2744

/* For a single, positive character, get the value into mcbuffer, and

2745

then we can handle this with the normal one-character code. */

2746

2747

#ifdef SUPPORT_UTF8

2748

if (utf8 && class_lastchar > 127)

2749

mclength = ord2utf8(class_lastchar, mcbuffer);

2750

else

2751

#endif

2752

{

2753

mcbuffer[0] = class_lastchar;

2754

mclength = 1;

2755

}

2756

goto ONE_CHAR;

2757

} /* End of 1-char optimization */

2758

2759

/* The general case - not the one-char optimization. If this is the first

2760

thing in the branch, there can be no first char setting, whatever the

2761

repeat count. Any reqbyte setting must remain unchanged after any kind of

2762

repeat. */

2763

2764

if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;

2765

zerofirstbyte = firstbyte;

2766

zeroreqbyte = reqbyte;

2767

2768

/* If there are characters with values > 255, we have to compile an

2769

extended class, with its own opcode. If there are no characters < 256,

2770

we can omit the bitmap. */

2771

2772

#ifdef SUPPORT_UTF8

2773

if (class_utf8)

2774

{

2775

*class_utf8data++ = XCL_END; /* Marks the end of extra data */

2776

*code++ = OP_XCLASS;

2777

code += LINK_SIZE;

2778

*code = negate_class? XCL_NOT : 0;

2779

2780

/* If the map is required, install it, and move on to the end of

2781

the extra data */

2782

2783

if (class_charcount > 0)

2784

{

2785

*code++ |= XCL_MAP;

2786

memcpy(code, classbits, 32);

2787

code = class_utf8data;

2788

}

2789

2790

/* If the map is not required, slide down the extra data. */

2791

2792

else

2793

{

2794

int len = class_utf8data - (code + 33);

2795

memmove(code + 1, code + 33, len);

2796

code += len + 1;

2797

}

2798

2799

/* Now fill in the complete length of the item */

2800

2801

PUT(previous, 1, code - previous);

2802

break; /* End of class handling */

2803

}

2804

#endif

2805

2806

/* If there are no characters > 255, negate the 32-byte map if necessary,

2807

and copy it into the code vector. If this is the first thing in the branch,

2808

there can be no first char setting, whatever the repeat count. Any reqbyte

2809

setting must remain unchanged after any kind of repeat. */

2810

2811

if (negate_class)

2812

{

2813

*code++ = OP_NCLASS;

2814

for (c = 0; c < 32; c++) code[c] = ~classbits[c];

2815

}

2816

else

2817

{

2818

*code++ = OP_CLASS;

2819

memcpy(code, classbits, 32);

2820

}

2821

code += 32;

2822

break;

2823

2824

/* Various kinds of repeat; '{' is not necessarily a quantifier, but this

2825

has been tested above. */

2826

2827

case '{':

2828

if (!is_quantifier) goto NORMAL_CHAR;

2829

ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr);

2830

if (*errorptr != NULL) goto FAILED;

2831

goto REPEAT;

2832

2833

case '*':

2834

repeat_min = 0;

2835

repeat_max = -1;

2836

goto REPEAT;

2837

2838

case '+':

2839

repeat_min = 1;

2840

repeat_max = -1;

2841

goto REPEAT;

2842

2843

case '?':

2844

repeat_min = 0;

2845

repeat_max = 1;

2846

2847

REPEAT:

2848

if (previous == NULL)

2849

{

2850

*errorptr = ERR9;

2851

goto FAILED;

2852

}

2853

2854

if (repeat_min == 0)

2855

{

2856

firstbyte = zerofirstbyte; /* Adjust for zero repeat */

2857

reqbyte = zeroreqbyte; /* Ditto */

2858

}

2859

2860

/* Remember whether this is a variable length repeat */

2861

2862

reqvary = (repeat_min == repeat_max)? 0 : REQ_VARY;

2863

2864

op_type = 0; /* Default single-char op codes */

2865

possessive_quantifier = FALSE; /* Default not possessive quantifier */

2866

2867

/* Save start of previous item, in case we have to move it up to make space

2868

for an inserted OP_ONCE for the additional '+' extension. */

2869

2870

tempcode = previous;

2871

2872

/* If the next character is '+', we have a possessive quantifier. This

2873

implies greediness, whatever the setting of the PCRE_UNGREEDY option.

2874

If the next character is '?' this is a minimizing repeat, by default,

2875

but if PCRE_UNGREEDY is set, it works the other way round. We change the

2876

repeat type to the non-default. */

2877

2878

if (ptr[1] == '+')

2879

{

2880

repeat_type = 0; /* Force greedy */

2881

possessive_quantifier = TRUE;

2882

ptr++;

2883

}

2884

else if (ptr[1] == '?')

2885

{

2886

repeat_type = greedy_non_default;

2887

ptr++;

2888

}

2889

else repeat_type = greedy_default;

2890

2891

/* If previous was a recursion, we need to wrap it inside brackets so that

2892

it can be replicated if necessary. */

2893

2894

if (*previous == OP_RECURSE)

2895

{

2896

memmove(previous + 1 + LINK_SIZE, previous, 1 + LINK_SIZE);

2897

code += 1 + LINK_SIZE;

2898

*previous = OP_BRA;

2899

PUT(previous, 1, code - previous);

2900

*code = OP_KET;

2901

PUT(code, 1, code - previous);

2902

code += 1 + LINK_SIZE;

2903

}

2904

2905

/* If previous was a character match, abolish the item and generate a

2906

repeat item instead. If a char item has a minumum of more than one, ensure

2907

that it is set in reqbyte - it might not be if a sequence such as x{3} is

2908

the first thing in a branch because the x will have gone into firstbyte

2909

instead. */

2910

2911

if (*previous == OP_CHAR || *previous == OP_CHARNC)

2912

{

2913

/* Deal with UTF-8 characters that take up more than one byte. It's

2914

easier to write this out separately than try to macrify it. Use c to

2915

hold the length of the character in bytes, plus 0x80 to flag that it's a

2916

length rather than a small character. */

2917

2918

#ifdef SUPPORT_UTF8

2919

if (utf8 && (code[-1] & 0x80) != 0)

2920

{

2921

uschar *lastchar = code - 1;

2922

while((*lastchar & 0xc0) == 0x80) lastchar--;

2923

c = code - lastchar; /* Length of UTF-8 character */

2924

memcpy(utf8_char, lastchar, c); /* Save the char */

2925

c |= 0x80; /* Flag c as a length */

2926

}

2927

else

2928

#endif

2929

2930

/* Handle the case of a single byte - either with no UTF8 support, or

2931

with UTF-8 disabled, or for a UTF-8 character < 128. */

2932

2933

{

2934

c = code[-1];

2935

if (repeat_min > 1) reqbyte = c | req_caseopt | cd->req_varyopt;

2936

}

2937

2938

goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */

2939

}

2940

2941

/* If previous was a single negated character ([^a] or similar), we use

2942

one of the special opcodes, replacing it. The code is shared with single-

2943

character repeats by setting opt_type to add a suitable offset into

2944

repeat_type. OP_NOT is currently used only for single-byte chars. */

2945

2946

else if (*previous == OP_NOT)

2947

{

2948

op_type = OP_NOTSTAR - OP_STAR; /* Use "not" opcodes */

2949

c = previous[1];

2950

goto OUTPUT_SINGLE_REPEAT;

2951

}

2952

2953

/* If previous was a character type match (\d or similar), abolish it and

2954

create a suitable repeat item. The code is shared with single-character

2955

repeats by setting op_type to add a suitable offset into repeat_type. Note

2956

the the Unicode property types will be present only when SUPPORT_UCP is

2957

defined, but we don't wrap the little bits of code here because it just

2958

makes it horribly messy. */

2959

2960

else if (*previous < OP_EODN)

2961

{

2962

uschar *oldcode;

2963

int prop_type;

2964

op_type = OP_TYPESTAR - OP_STAR; /* Use type opcodes */

2965

c = *previous;

2966

2967

OUTPUT_SINGLE_REPEAT:

2968

prop_type = (*previous == OP_PROP || *previous == OP_NOTPROP)?

2969

previous[1] : -1;

2970

2971

oldcode = code;

2972

code = previous; /* Usually overwrite previous item */

2973

2974

/* If the maximum is zero then the minimum must also be zero; Perl allows

2975

this case, so we do too - by simply omitting the item altogether. */

2976

2977

if (repeat_max == 0) goto END_REPEAT;

2978

2979

/* All real repeats make it impossible to handle partial matching (maybe

2980

one day we will be able to remove this restriction). */

2981

2982

if (repeat_max != 1) cd->nopartial = TRUE;

2983

2984

/* Combine the op_type with the repeat_type */

2985

2986

repeat_type += op_type;

2987

2988

/* A minimum of zero is handled either as the special case * or ?, or as

2989

an UPTO, with the maximum given. */

2990

2991

if (repeat_min == 0)

2992

{

2993

if (repeat_max == -1) *code++ = OP_STAR + repeat_type;

2994

else if (repeat_max == 1) *code++ = OP_QUERY + repeat_type;

2995

else

2996

{

2997

*code++ = OP_UPTO + repeat_type;

2998

PUT2INC(code, 0, repeat_max);

2999

}

3000

}

3001

3002

/* A repeat minimum of 1 is optimized into some special cases. If the

3003

maximum is unlimited, we use OP_PLUS. Otherwise, the original item it

3004

left in place and, if the maximum is greater than 1, we use OP_UPTO with

3005

one less than the maximum. */

3006

3007

else if (repeat_min == 1)

3008

{

3009

if (repeat_max == -1)

3010

*code++ = OP_PLUS + repeat_type;

3011

else

3012

{

3013

code = oldcode; /* leave previous item in place */

3014

if (repeat_max == 1) goto END_REPEAT;

3015

*code++ = OP_UPTO + repeat_type;

3016

PUT2INC(code, 0, repeat_max - 1);

3017

}

3018

}

3019

3020

/* The case {n,n} is just an EXACT, while the general case {n,m} is

3021

handled as an EXACT followed by an UPTO. */

3022

3023

else

3024

{

3025

*code++ = OP_EXACT + op_type; /* NB EXACT doesn't have repeat_type */

3026

PUT2INC(code, 0, repeat_min);

3027

3028

/* If the maximum is unlimited, insert an OP_STAR. Before doing so,

3029

we have to insert the character for the previous code. For a repeated

3030

Unicode property match, there is an extra byte that defines the

3031

required property. In UTF-8 mode, long characters have their length in

3032

c, with the 0x80 bit as a flag. */

3033

3034

if (repeat_max < 0)

3035

{

3036

#ifdef SUPPORT_UTF8

3037

if (utf8 && c >= 128)

3038

{

3039

memcpy(code, utf8_char, c & 7);

3040

code += c & 7;

3041

}

3042

else

3043

#endif

3044

{

3045

*code++ = c;

3046

if (prop_type >= 0) *code++ = prop_type;

3047

}

3048

*code++ = OP_STAR + repeat_type;

3049

}

3050

3051

/* Else insert an UPTO if the max is greater than the min, again

3052

preceded by the character, for the previously inserted code. */

3053

3054

else if (repeat_max != repeat_min)

3055

{

3056

#ifdef SUPPORT_UTF8

3057

if (utf8 && c >= 128)

3058

{

3059

memcpy(code, utf8_char, c & 7);

3060

code += c & 7;

3061

}

3062

else

3063

#endif

3064

*code++ = c;

3065

if (prop_type >= 0) *code++ = prop_type;

3066

repeat_max -= repeat_min;

3067

*code++ = OP_UPTO + repeat_type;

3068

PUT2INC(code, 0, repeat_max);

3069

}

3070

}

3071

3072

/* The character or character type itself comes last in all cases. */

3073

3074

#ifdef SUPPORT_UTF8

3075

if (utf8 && c >= 128)

3076

{

3077

memcpy(code, utf8_char, c & 7);

3078

code += c & 7;

3079

}

3080

else

3081

#endif

3082

*code++ = c;

3083

3084

/* For a repeated Unicode property match, there is an extra byte that

3085

defines the required property. */

3086

3087

#ifdef SUPPORT_UCP

3088

if (prop_type >= 0) *code++ = prop_type;

3089

#endif

3090

}

3091

3092

/* If previous was a character class or a back reference, we put the repeat

3093

stuff after it, but just skip the item if the repeat was {0,0}. */

3094

3095

else if (*previous == OP_CLASS ||

3096

*previous == OP_NCLASS ||

3097

#ifdef SUPPORT_UTF8

3098

*previous == OP_XCLASS ||

3099

#endif

3100

*previous == OP_REF)

3101

{

3102

if (repeat_max == 0)

3103

{

3104

code = previous;

3105

goto END_REPEAT;

3106

}

3107

3108

/* All real repeats make it impossible to handle partial matching (maybe

3109

one day we will be able to remove this restriction). */

3110

3111

if (repeat_max != 1) cd->nopartial = TRUE;

3112

3113

if (repeat_min == 0 && repeat_max == -1)

3114

*code++ = OP_CRSTAR + repeat_type;

3115

else if (repeat_min == 1 && repeat_max == -1)

3116

*code++ = OP_CRPLUS + repeat_type;

3117

else if (repeat_min == 0 && repeat_max == 1)

3118

*code++ = OP_CRQUERY + repeat_type;

3119

else

3120

{

3121

*code++ = OP_CRRANGE + repeat_type;

3122

PUT2INC(code, 0, repeat_min);

3123

if (repeat_max == -1) repeat_max = 0; /* 2-byte encoding for max */

3124

PUT2INC(code, 0, repeat_max);

3125

}

3126

}

3127

3128

/* If previous was a bracket group, we may have to replicate it in certain

3129

cases. */

3130

3131

else if (*previous >= OP_BRA || *previous == OP_ONCE ||

3132

*previous == OP_COND)

3133

{

3134

3135

int ketoffset = 0;

3136

int len = code - previous;

3137

uschar *bralink = NULL;

3138

3139

/* If the maximum repeat count is unlimited, find the end of the bracket

3140

by scanning through from the start, and compute the offset back to it

3141

from the current code pointer. There may be an OP_OPT setting following

3142

the final KET, so we can't find the end just by going back from the code

3143

pointer. */

3144

3145

if (repeat_max == -1)

3146

{

3147

3148

do ket += GET(ket, 1); while (*ket != OP_KET);

3149

ketoffset = code - ket;

3150

}

3151

3152

/* The case of a zero minimum is special because of the need to stick

3153

OP_BRAZERO in front of it, and because the group appears once in the

3154

data, whereas in other cases it appears the minimum number of times. For

3155

this reason, it is simplest to treat this case separately, as otherwise

3156

the code gets far too messy. There are several special subcases when the

3157

minimum is zero. */

3158

3159

if (repeat_min == 0)

3160

{

3161

/* If the maximum is also zero, we just omit the group from the output

3162

altogether. */

3163

3164

if (repeat_max == 0)

3165

{

3166

code = previous;

3167

goto END_REPEAT;

3168

}

3169

3170

/* If the maximum is 1 or unlimited, we just have to stick in the

3171

BRAZERO and do no more at this point. However, we do need to adjust

3172

any OP_RECURSE calls inside the group that refer to the group itself or

3173

any internal group, because the offset is from the start of the whole

3174

regex. Temporarily terminate the pattern while doing this. */

3175

3176

if (repeat_max <= 1)

3177

{

3178

*code = OP_END;

3179

adjust_recurse(previous, 1, utf8, cd);

3180

memmove(previous+1, previous, len);

3181

code++;

3182

*previous++ = OP_BRAZERO + repeat_type;

3183

}

3184

3185

/* If the maximum is greater than 1 and limited, we have to replicate

3186

in a nested fashion, sticking OP_BRAZERO before each set of brackets.

3187

The first one has to be handled carefully because it's the original

3188

copy, which has to be moved up. The remainder can be handled by code

3189

that is common with the non-zero minimum case below. We have to

3190

adjust the value or repeat_max, since one less copy is required. Once

3191

again, we may have to adjust any OP_RECURSE calls inside the group. */

3192

3193

else

3194

{

3195

int offset;

3196

*code = OP_END;

3197

adjust_recurse(previous, 2 + LINK_SIZE, utf8, cd);

3198

memmove(previous + 2 + LINK_SIZE, previous, len);

3199

code += 2 + LINK_SIZE;

3200

*previous++ = OP_BRAZERO + repeat_type;

3201

*previous++ = OP_BRA;

3202

3203

/* We chain together the bracket offset fields that have to be

3204

filled in later when the ends of the brackets are reached. */

3205

3206

offset = (bralink == NULL)? 0 : previous - bralink;

3207

bralink = previous;

3208

PUTINC(previous, 0, offset);

3209

}

3210

3211

repeat_max--;

3212

}

3213

3214

/* If the minimum is greater than zero, replicate the group as many

3215

times as necessary, and adjust the maximum to the number of subsequent

3216

copies that we need. If we set a first char from the group, and didn't

3217

set a required char, copy the latter from the former. */

3218

3219

else

3220

{

3221

if (repeat_min > 1)

3222

{

3223

if (groupsetfirstbyte && reqbyte < 0) reqbyte = firstbyte;

3224

for (i = 1; i < repeat_min; i++)

3225

{

3226

memcpy(code, previous, len);

3227

code += len;

3228

}

3229

}

3230

if (repeat_max > 0) repeat_max -= repeat_min;

3231

}

3232

3233

/* This code is common to both the zero and non-zero minimum cases. If

3234

the maximum is limited, it replicates the group in a nested fashion,

3235

remembering the bracket starts on a stack. In the case of a zero minimum,

3236

the first one was set up above. In all cases the repeat_max now specifies

3237

the number of additional copies needed. */

3238

3239

if (repeat_max >= 0)

3240

{

3241

for (i = repeat_max - 1; i >= 0; i--)

3242

{

3243

*code++ = OP_BRAZERO + repeat_type;

3244

3245

/* All but the final copy start a new nesting, maintaining the

3246

chain of brackets outstanding. */

3247

3248

if (i != 0)

3249

{

3250

int offset;

3251

*code++ = OP_BRA;

3252

offset = (bralink == NULL)? 0 : code - bralink;

3253

bralink = code;

3254

PUTINC(code, 0, offset);

3255

}

3256

3257

memcpy(code, previous, len);

3258

code += len;

3259

}

3260

3261

/* Now chain through the pending brackets, and fill in their length

3262

fields (which are holding the chain links pro tem). */

3263

3264

while (bralink != NULL)

3265

{

3266

int oldlinkoffset;

3267

int offset = code - bralink + 1;

3268

uschar *bra = code - offset;

3269

oldlinkoffset = GET(bra, 1);

3270

bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;

3271

*code++ = OP_KET;

3272

PUTINC(code, 0, offset);

3273

PUT(bra, 1, offset);

3274

}

3275

}

3276

3277

/* If the maximum is unlimited, set a repeater in the final copy. We

3278

can't just offset backwards from the current code point, because we

3279

don't know if there's been an options resetting after the ket. The

3280

correct offset was computed above. */

3281

3282

else code[-ketoffset] = OP_KETRMAX + repeat_type;

3283

}

3284

3285

/* Else there's some kind of shambles */

3286

3287

else

3288

{

3289

*errorptr = ERR11;

3290

goto FAILED;

3291

}

3292

3293

/* If the character following a repeat is '+', we wrap the entire repeated

3294

item inside OP_ONCE brackets. This is just syntactic sugar, taken from

3295

Sun's Java package. The repeated item starts at tempcode, not at previous,

3296

which might be the first part of a string whose (former) last char we

3297

repeated. However, we don't support '+' after a greediness '?'. */

3298

3299

if (possessive_quantifier)

3300

{

3301

int len = code - tempcode;

3302

memmove(tempcode + 1+LINK_SIZE, tempcode, len);

3303

code += 1 + LINK_SIZE;

3304

len += 1 + LINK_SIZE;

3305

tempcode[0] = OP_ONCE;

3306

*code++ = OP_KET;

3307

PUTINC(code, 0, len);

3308

PUT(tempcode, 1, len);

3309

}

3310

3311

/* In all case we no longer have a previous item. We also set the

3312

"follows varying string" flag for subsequently encountered reqbytes if

3313

it isn't already set and we have just passed a varying length item. */

3314

3315

END_REPEAT:

3316

previous = NULL;

3317

cd->req_varyopt |= reqvary;

3318

break;

3319

3320

3321

/* Start of nested bracket sub-expression, or comment or lookahead or

3322

lookbehind or option setting or condition. First deal with special things

3323

that can come after a bracket; all are introduced by ?, and the appearance

3324

of any of them means that this is not a referencing group. They were

3325

checked for validity in the first pass over the string, so we don't have to

3326

check for syntax errors here. */

3327

3328

case '(':

3329

newoptions = options;

3330

skipbytes = 0;

3331

3332

if (*(++ptr) == '?')

3333

{

3334

int set, unset;

3335

int *optset;

3336

3337

switch (*(++ptr))

3338

{

3339

case '#': /* Comment; skip to ket */

3340

ptr++;

3341

while (*ptr != ')') ptr++;

3342

continue;

3343

3344

case ':': /* Non-extracting bracket */

3345

bravalue = OP_BRA;

3346

ptr++;

3347

break;

3348

3349

case '(':

3350

bravalue = OP_COND; /* Conditional group */

3351

3352

/* Condition to test for recursion */

3353

3354

if (ptr[1] == 'R')

3355

{

3356

code[1+LINK_SIZE] = OP_CREF;

3357

PUT2(code, 2+LINK_SIZE, CREF_RECURSE);

3358

skipbytes = 3;

3359

ptr += 3;

3360

}

3361

3362

/* Condition to test for a numbered subpattern match. We know that

3363

if a digit follows ( then there will just be digits until ) because

3364

the syntax was checked in the first pass. */

3365

3366

else if ((digitab[ptr[1]] && ctype_digit) != 0)

3367

{

3368

int condref; /* Don't amalgamate; some compilers */

3369

condref = *(++ptr) - '0'; /* grumble at autoincrement in declaration */

3370

while (*(++ptr) != ')') condref = condref*10 + *ptr - '0';

3371

if (condref == 0)

3372

{

3373

*errorptr = ERR35;

3374

goto FAILED;

3375

}

3376

ptr++;

3377

code[1+LINK_SIZE] = OP_CREF;

3378

PUT2(code, 2+LINK_SIZE, condref);

3379

skipbytes = 3;

3380

}

3381

/* For conditions that are assertions, we just fall through, having

3382

set bravalue above. */

3383

break;

3384

3385

case '=': /* Positive lookahead */

3386

bravalue = OP_ASSERT;

3387

ptr++;

3388

break;

3389

3390

case '!': /* Negative lookahead */

3391

bravalue = OP_ASSERT_NOT;

3392

ptr++;

3393

break;

3394

3395

case '<': /* Lookbehinds */

3396

switch (*(++ptr))

3397

{

3398

case '=': /* Positive lookbehind */

3399

bravalue = OP_ASSERTBACK;

3400

ptr++;

3401

break;

3402

3403

case '!': /* Negative lookbehind */

3404

bravalue = OP_ASSERTBACK_NOT;

3405

ptr++;

3406

break;

3407

}

3408

break;

3409

3410

case '>': /* One-time brackets */

3411

bravalue = OP_ONCE;

3412

ptr++;

3413

break;

3414

3415

case 'C': /* Callout - may be followed by digits; */

3416

previous_callout = code; /* Save for later completion */

3417

after_manual_callout = 1; /* Skip one item before completing */

3418

*code++ = OP_CALLOUT; /* Already checked that the terminating */

3419

{ /* closing parenthesis is present. */

3420

int n = 0;

3421

while ((digitab[*(++ptr)] & ctype_digit) != 0)

3422

n = n * 10 + *ptr - '0';

3423

if (n > 255)

3424

{

3425

*errorptr = ERR38;

3426

goto FAILED;

3427

}

3428

*code++ = n;

3429

PUT(code, 0, ptr - cd->start_pattern + 1); /* Pattern offset */

3430

PUT(code, LINK_SIZE, 0); /* Default length */

3431

code += 2 * LINK_SIZE;

3432

}

3433

previous = NULL;

3434

continue;

3435

3436

case 'P': /* Named subpattern handling */

3437

if (*(++ptr) == '<') /* Definition */

3438

{

3439

int i, namelen;

3440

uschar *slot = cd->name_table;

3441

const uschar *name; /* Don't amalgamate; some compilers */

3442

name = ++ptr; /* grumble at autoincrement in declaration */

3443

3444

while (*ptr++ != '>');

3445

namelen = ptr - name - 1;

3446

3447

for (i = 0; i < cd->names_found; i++)

3448

{

3449

int crc = memcmp(name, slot+2, namelen);

3450

if (crc == 0)

3451

{

3452

if (slot[2+namelen] == 0)

3453

{

3454

*errorptr = ERR43;

3455

goto FAILED;

3456

}

3457

crc = -1; /* Current name is substring */

3458

}

3459

if (crc < 0)

3460

{

3461

memmove(slot + cd->name_entry_size, slot,

3462

(cd->names_found - i) * cd->name_entry_size);

3463

break;

3464

}

3465

slot += cd->name_entry_size;

3466

}

3467

3468

PUT2(slot, 0, *brackets + 1);

3469

memcpy(slot + 2, name, namelen);

3470

slot[2+namelen] = 0;

3471

cd->names_found++;

3472

goto NUMBERED_GROUP;

3473

}

3474

3475

if (*ptr == '=' || *ptr == '>') /* Reference or recursion */

3476

{

3477

int i, namelen;

3478

int type = *ptr++;

3479

const uschar *name = ptr;

3480

uschar *slot = cd->name_table;

3481

3482

while (*ptr != ')') ptr++;

3483

namelen = ptr - name;

3484

3485

for (i = 0; i < cd->names_found; i++)

3486

{

3487

if (strncmp((char *)name, (char *)slot+2, namelen) == 0) break;

3488

slot += cd->name_entry_size;

3489

}

3490

if (i >= cd->names_found)

3491

{

3492

*errorptr = ERR15;

3493

goto FAILED;

3494

}

3495

3496

recno = GET2(slot, 0);

3497

3498

if (type == '>') goto HANDLE_RECURSION; /* A few lines below */

3499

3500

/* Back reference */

3501

3502

previous = code;

3503

*code++ = OP_REF;

3504

PUT2INC(code, 0, recno);

3505

cd->backref_map |= (recno < 32)? (1 << recno) : 1;

3506

if (recno > cd->top_backref) cd->top_backref = recno;

3507

continue;

3508

}

3509

3510

/* Should never happen */

3511

break;

3512

3513

case 'R': /* Pattern recursion */

3514

ptr++; /* Same as (?0) */

3515

/* Fall through */

3516

3517

/* Recursion or "subroutine" call */

3518

3519

case '0': case '1': case '2': case '3': case '4':

3520

case '5': case '6': case '7': case '8': case '9':

3521

{

3522

const uschar *called;

3523

recno = 0;

3524

while((digitab[*ptr] & ctype_digit) != 0)

3525

recno = recno * 10 + *ptr++ - '0';

3526

3527

/* Come here from code above that handles a named recursion */

3528

3529

HANDLE_RECURSION:

3530

3531

previous = code;

3532

3533

/* Find the bracket that is being referenced. Temporarily end the

3534

regex in case it doesn't exist. */

3535

3536

*code = OP_END;

3537

called = (recno == 0)?

3538

cd->start_code : find_bracket(cd->start_code, utf8, recno);

3539

3540

if (called == NULL)

3541

{

3542

*errorptr = ERR15;

3543

goto FAILED;

3544

}

3545

3546

/* If the subpattern is still open, this is a recursive call. We

3547

check to see if this is a left recursion that could loop for ever,

3548

and diagnose that case. */

3549

3550

if (GET(called, 1) == 0 && could_be_empty(called, code, bcptr, utf8))

3551

{

3552

*errorptr = ERR40;

3553

goto FAILED;

3554

}

3555

3556

/* Insert the recursion/subroutine item */

3557

3558

*code = OP_RECURSE;

3559

PUT(code, 1, called - cd->start_code);

3560

code += 1 + LINK_SIZE;

3561

}

3562

continue;

3563

3564

/* Character after (? not specially recognized */

3565

3566

default: /* Option setting */

3567

set = unset = 0;

3568

optset = &set;

3569

3570

while (*ptr != ')' && *ptr != ':')

3571

{

3572

switch (*ptr++)

3573

{

3574

case '-': optset = &unset; break;

3575

3576

case 'i': *optset |= PCRE_CASELESS; break;

3577

case 'm': *optset |= PCRE_MULTILINE; break;

3578

case 's': *optset |= PCRE_DOTALL; break;

3579

case 'x': *optset |= PCRE_EXTENDED; break;

3580

case 'U': *optset |= PCRE_UNGREEDY; break;

3581

case 'X': *optset |= PCRE_EXTRA; break;

3582

}

3583

}

3584

3585

/* Set up the changed option bits, but don't change anything yet. */

3586

3587

newoptions = (options | set) & (~unset);

3588

3589

/* If the options ended with ')' this is not the start of a nested

3590

group with option changes, so the options change at this level. Compile

3591

code to change the ims options if this setting actually changes any of

3592

them. We also pass the new setting back so that it can be put at the

3593

start of any following branches, and when this group ends (if we are in

3594

a group), a resetting item can be compiled.

3595

3596

Note that if this item is right at the start of the pattern, the

3597

options will have been abstracted and made global, so there will be no

3598

change to compile. */

3599

3600

if (*ptr == ')')

3601

{

3602

if ((options & PCRE_IMS) != (newoptions & PCRE_IMS))

3603

{

3604

*code++ = OP_OPT;

3605

*code++ = newoptions & PCRE_IMS;

3606

}

3607

3608

/* Change options at this level, and pass them back for use

3609

in subsequent branches. Reset the greedy defaults and the case

3610

value for firstbyte and reqbyte. */

3611

3612

*optionsptr = options = newoptions;

3613

greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);

3614

greedy_non_default = greedy_default ^ 1;

3615

req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;

3616

3617

previous = NULL; /* This item can't be repeated */

3618

continue; /* It is complete */

3619

}

3620

3621

/* If the options ended with ':' we are heading into a nested group

3622

with possible change of options. Such groups are non-capturing and are

3623

not assertions of any kind. All we need to do is skip over the ':';

3624

the newoptions value is handled below. */

3625

3626

bravalue = OP_BRA;

3627

ptr++;

3628

}

3629

}

3630

3631

/* If PCRE_NO_AUTO_CAPTURE is set, all unadorned brackets become

3632

non-capturing and behave like (?:...) brackets */

3633

3634

else if ((options & PCRE_NO_AUTO_CAPTURE) != 0)

3635

{

3636

bravalue = OP_BRA;

3637

}

3638

3639

/* Else we have a referencing group; adjust the opcode. If the bracket

3640

number is greater than EXTRACT_BASIC_MAX, we set the opcode one higher, and

3641

arrange for the true number to follow later, in an OP_BRANUMBER item. */

3642

3643

else

3644

{

3645

NUMBERED_GROUP:

3646

if (++(*brackets) > EXTRACT_BASIC_MAX)

3647

{

3648

bravalue = OP_BRA + EXTRACT_BASIC_MAX + 1;

3649

code[1+LINK_SIZE] = OP_BRANUMBER;

3650

PUT2(code, 2+LINK_SIZE, *brackets);

3651

skipbytes = 3;

3652

}

3653

else bravalue = OP_BRA + *brackets;

3654

}

3655

3656

/* Process nested bracketed re. Assertions may not be repeated, but other

3657

kinds can be. We copy code into a non-register variable in order to be able

3658

to pass its address because some compilers complain otherwise. Pass in a

3659

new setting for the ims options if they have changed. */

3660

3661

previous = (bravalue >= OP_ONCE)? code : NULL;

3662

*code = bravalue;

3663

tempcode = code;

3664

tempreqvary = cd->req_varyopt; /* Save value before bracket */

3665

3666

if (!compile_regex(

3667

newoptions, /* The complete new option state */

3668

options & PCRE_IMS, /* The previous ims option state */

3669

brackets, /* Extracting bracket count */

3670

&tempcode, /* Where to put code (updated) */

3671

&ptr, /* Input pointer (updated) */

3672

errorptr, /* Where to put an error message */

3673

(bravalue == OP_ASSERTBACK ||

3674

bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */

3675

skipbytes, /* Skip over OP_COND/OP_BRANUMBER */

3676

&subfirstbyte, /* For possible first char */

3677

&subreqbyte, /* For possible last char */

3678

bcptr, /* Current branch chain */

3679

cd)) /* Tables block */

3680

goto FAILED;

3681

3682

/* At the end of compiling, code is still pointing to the start of the

3683

group, while tempcode has been updated to point past the end of the group

3684

and any option resetting that may follow it. The pattern pointer (ptr)

3685

is on the bracket. */

3686

3687

/* If this is a conditional bracket, check that there are no more than

3688

two branches in the group. */

3689

3690

else if (bravalue == OP_COND)

3691

{

3692

uschar *tc = code;

3693

condcount = 0;

3694

3695

do {

3696

condcount++;

3697

tc += GET(tc,1);

3698

}

3699

while (*tc != OP_KET);

3700

3701

if (condcount > 2)

3702

{

3703

*errorptr = ERR27;

3704

goto FAILED;

3705

}

3706

3707

/* If there is just one branch, we must not make use of its firstbyte or

3708

reqbyte, because this is equivalent to an empty second branch. */

3709

3710

if (condcount == 1) subfirstbyte = subreqbyte = REQ_NONE;

3711

}

3712

3713

/* Handle updating of the required and first characters. Update for normal

3714

brackets of all kinds, and conditions with two branches (see code above).

3715

If the bracket is followed by a quantifier with zero repeat, we have to

3716

back off. Hence the definition of zeroreqbyte and zerofirstbyte outside the

3717

main loop so that they can be accessed for the back off. */

3718

3719

zeroreqbyte = reqbyte;

3720

zerofirstbyte = firstbyte;

3721

groupsetfirstbyte = FALSE;

3722

3723

if (bravalue >= OP_BRA || bravalue == OP_ONCE || bravalue == OP_COND)

3724

{

3725

/* If we have not yet set a firstbyte in this branch, take it from the

3726

subpattern, remembering that it was set here so that a repeat of more

3727

than one can replicate it as reqbyte if necessary. If the subpattern has

3728

no firstbyte, set "none" for the whole branch. In both cases, a zero

3729

repeat forces firstbyte to "none". */

3730

3731

if (firstbyte == REQ_UNSET)

3732

{

3733

if (subfirstbyte >= 0)

3734

{

3735

firstbyte = subfirstbyte;

3736

groupsetfirstbyte = TRUE;

3737

}

3738

else firstbyte = REQ_NONE;

3739

zerofirstbyte = REQ_NONE;

3740

}

3741

3742

/* If firstbyte was previously set, convert the subpattern's firstbyte

3743

into reqbyte if there wasn't one, using the vary flag that was in

3744

existence beforehand. */

3745

3746

else if (subfirstbyte >= 0 && subreqbyte < 0)

3747

subreqbyte = subfirstbyte | tempreqvary;

3748

3749

/* If the subpattern set a required byte (or set a first byte that isn't

3750

really the first byte - see above), set it. */

3751

3752

if (subreqbyte >= 0) reqbyte = subreqbyte;

3753

}

3754

3755

/* For a forward assertion, we take the reqbyte, if set. This can be

3756

helpful if the pattern that follows the assertion doesn't set a different

3757

char. For example, it's useful for /(?=abcde).+/. We can't set firstbyte

3758

for an assertion, however because it leads to incorrect effect for patterns

3759

such as /(?=a)a.+/ when the "real" "a" would then become a reqbyte instead

3760

of a firstbyte. This is overcome by a scan at the end if there's no

3761

firstbyte, looking for an asserted first char. */

3762

3763

else if (bravalue == OP_ASSERT && subreqbyte >= 0) reqbyte = subreqbyte;

3764

3765

/* Now update the main code pointer to the end of the group. */

3766

3767

code = tempcode;

3768

3769

/* Error if hit end of pattern */

3770

3771

if (*ptr != ')')

3772

{

3773

*errorptr = ERR14;

3774

goto FAILED;

3775

}

3776

break;

3777

3778

/* Check \ for being a real metacharacter; if not, fall through and handle

3779

it as a data character at the start of a string. Escape items are checked

3780

for validity in the pre-compiling pass. */

3781

3782

case '\\':

3783

tempptr = ptr;

3784

c = check_escape(&ptr, errorptr, *brackets, options, FALSE);

3785

3786

/* Handle metacharacters introduced by \. For ones like \d, the ESC_ values

3787

are arranged to be the negation of the corresponding OP_values. For the

3788

back references, the values are ESC_REF plus the reference number. Only

3789

back references and those types that consume a character may be repeated.

3790

We can test for values between ESC_b and ESC_Z for the latter; this may

3791

have to change if any new ones are ever created. */

3792

3793

if (c < 0)

3794

{

3795

if (-c == ESC_Q) /* Handle start of quoted string */

3796

{

3797

if (ptr[1] == '\\' && ptr[2] == 'E') ptr += 2; /* avoid empty string */

3798

else inescq = TRUE;

3799

continue;

3800

}

3801

3802

/* For metasequences that actually match a character, we disable the

3803

setting of a first character if it hasn't already been set. */

3804

3805

if (firstbyte == REQ_UNSET && -c > ESC_b && -c < ESC_Z)

3806

firstbyte = REQ_NONE;

3807

3808

/* Set values to reset to if this is followed by a zero repeat. */

3809

3810

zerofirstbyte = firstbyte;

3811

zeroreqbyte = reqbyte;

3812

3813

/* Back references are handled specially */

3814

3815

if (-c >= ESC_REF)

3816

{

3817

int number = -c - ESC_REF;

3818

previous = code;

3819

*code++ = OP_REF;

3820

PUT2INC(code, 0, number);

3821

}

3822

3823

/* So are Unicode property matches, if supported. We know that get_ucp

3824

won't fail because it was tested in the pre-pass. */

3825

3826

#ifdef SUPPORT_UCP

3827

else if (-c == ESC_P || -c == ESC_p)

3828

{

3829

BOOL negated;

3830

int value = get_ucp(&ptr, &negated, errorptr);

3831

previous = code;

3832

*code++ = ((-c == ESC_p) != negated)? OP_PROP : OP_NOTPROP;

3833

*code++ = value;

3834

}

3835

#endif

3836

3837

/* For the rest, we can obtain the OP value by negating the escape

3838

value */

3839

3840

else

3841

{

3842

previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;

3843

*code++ = -c;

3844

}

3845

continue;

3846

}

3847

3848

/* We have a data character whose value is in c. In UTF-8 mode it may have

3849

a value > 127. We set its representation in the length/buffer, and then

3850

handle it as a data character. */

3851

3852

#ifdef SUPPORT_UTF8

3853

if (utf8 && c > 127)

3854

mclength = ord2utf8(c, mcbuffer);

3855

else

3856

#endif

3857

3858

{

3859

mcbuffer[0] = c;

3860

mclength = 1;

3861

}

3862

3863

goto ONE_CHAR;

3864

3865

/* Handle a literal character. It is guaranteed not to be whitespace or #

3866

when the extended flag is set. If we are in UTF-8 mode, it may be a

3867

multi-byte literal character. */

3868

3869

default:

3870

NORMAL_CHAR:

3871

mclength = 1;

3872

mcbuffer[0] = c;

3873

3874

#ifdef SUPPORT_UTF8

3875

if (utf8 && (c & 0xc0) == 0xc0)

3876

{

3877

while ((ptr[1] & 0xc0) == 0x80)

3878

mcbuffer[mclength++] = *(++ptr);

3879

}

3880

#endif

3881

3882

/* At this point we have the character's bytes in mcbuffer, and the length

3883

in mclength. When not in UTF-8 mode, the length is always 1. */

3884

3885

ONE_CHAR:

3886

previous = code;

3887

*code++ = ((options & PCRE_CASELESS) != 0)? OP_CHARNC : OP_CHAR;

3888

for (c = 0; c < mclength; c++) *code++ = mcbuffer[c];

3889

3890

/* Set the first and required bytes appropriately. If no previous first

3891

byte, set it from this character, but revert to none on a zero repeat.

3892

Otherwise, leave the firstbyte value alone, and don't change it on a zero

3893

repeat. */

3894

3895

if (firstbyte == REQ_UNSET)

3896

{

3897

zerofirstbyte = REQ_NONE;

3898

zeroreqbyte = reqbyte;

3899

3900

/* If the character is more than one byte long, we can set firstbyte

3901

only if it is not to be matched caselessly. */

3902

3903

if (mclength == 1 || req_caseopt == 0)

3904

{

3905

firstbyte = mcbuffer[0] | req_caseopt;

3906

if (mclength != 1) reqbyte = code[-1] | cd->req_varyopt;

3907

}

3908

else firstbyte = reqbyte = REQ_NONE;

3909

}

3910

3911

/* firstbyte was previously set; we can set reqbyte only the length is

3912

1 or the matching is caseful. */

3913

3914

else

3915

{

3916

zerofirstbyte = firstbyte;

3917

zeroreqbyte = reqbyte;

3918

if (mclength == 1 || req_caseopt == 0)

3919

reqbyte = code[-1] | req_caseopt | cd->req_varyopt;

3920

}

3921

3922

break; /* End of literal character handling */

3923

}

3924

} /* end of big loop */

3925

3926

/* Control never reaches here by falling through, only by a goto for all the

3927

error states. Pass back the position in the pattern so that it can be displayed

3928

to the user for diagnosing the error. */

3929

3930

FAILED:

3931

*ptrptr = ptr;

3932

return FALSE;

3933

}

3934

3935

3936

3937

3938

/*************************************************

3939

* Compile sequence of alternatives *

3940

*************************************************/

3941

3942

/* On entry, ptr is pointing past the bracket character, but on return

3943

it points to the closing bracket, or vertical bar, or end of string.

3944

The code variable is pointing at the byte into which the BRA operator has been

3945

stored. If the ims options are changed at the start (for a (?ims: group) or

3946

during any branch, we need to insert an OP_OPT item at the start of every

3947

following branch to ensure they get set correctly at run time, and also pass

3948

the new options into every subsequent branch compile.

3949

3950

Argument:

3951

options option bits, including any changes for this subpattern

3952

oldims previous settings of ims option bits

3953

brackets -> int containing the number of extracting brackets used

3954

codeptr -> the address of the current code pointer

3955

ptrptr -> the address of the current pattern pointer

3956

errorptr -> pointer to error message

3957

lookbehind TRUE if this is a lookbehind assertion

3958

skipbytes skip this many bytes at start (for OP_COND, OP_BRANUMBER)

3959

firstbyteptr place to put the first required character, or a negative number

3960

reqbyteptr place to put the last required character, or a negative number

3961

bcptr pointer to the chain of currently open branches

3962

cd points to the data block with tables pointers etc.

3963

3964

Returns: TRUE on success

3965

3966

3967

static BOOL

3968

compile_regex(int options, int oldims, int *brackets, uschar **codeptr,

3969

const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int skipbytes,

3970

int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr, compile_data *cd)

3971

{

3972

const uschar *ptr = *ptrptr;

3973

uschar *code = *codeptr;

3974

uschar *last_branch = code;

3975

uschar *start_bracket = code;

3976

uschar *reverse_count = NULL;

3977

int firstbyte, reqbyte;

3978

int branchfirstbyte, branchreqbyte;

3979

branch_chain bc;

3980

3981

bc.outer = bcptr;

3982

bc.current = code;

3983

3984

firstbyte = reqbyte = REQ_UNSET;

3985

3986

/* Offset is set zero to mark that this bracket is still open */

3987

3988

PUT(code, 1, 0);

3989

code += 1 + LINK_SIZE + skipbytes;

3990

3991

/* Loop for each alternative branch */

3992

3993

for (;;)

3994

{

3995

/* Handle a change of ims options at the start of the branch */

3996

3997

if ((options & PCRE_IMS) != oldims)

3998

{

3999

*code++ = OP_OPT;

4000

*code++ = options & PCRE_IMS;

4001

}

4002

4003

/* Set up dummy OP_REVERSE if lookbehind assertion */

4004

4005

if (lookbehind)

4006

{

4007

*code++ = OP_REVERSE;

4008

reverse_count = code;

4009

PUTINC(code, 0, 0);

4010

}

4011

4012

/* Now compile the branch */

4013

4014

if (!compile_branch(&options, brackets, &code, &ptr, errorptr,

4015

&branchfirstbyte, &branchreqbyte, &bc, cd))

4016

{

4017

*ptrptr = ptr;

4018

return FALSE;

4019

}

4020

4021

/* If this is the first branch, the firstbyte and reqbyte values for the

4022

branch become the values for the regex. */

4023

4024

if (*last_branch != OP_ALT)

4025

{

4026

firstbyte = branchfirstbyte;

4027

reqbyte = branchreqbyte;

4028

}

4029

4030

/* If this is not the first branch, the first char and reqbyte have to

4031

match the values from all the previous branches, except that if the previous

4032

value for reqbyte didn't have REQ_VARY set, it can still match, and we set

4033

REQ_VARY for the regex. */

4034

4035

else

4036

{

4037

/* If we previously had a firstbyte, but it doesn't match the new branch,

4038

we have to abandon the firstbyte for the regex, but if there was previously

4039

no reqbyte, it takes on the value of the old firstbyte. */

4040

4041

if (firstbyte >= 0 && firstbyte != branchfirstbyte)

4042

{

4043

if (reqbyte < 0) reqbyte = firstbyte;

4044

firstbyte = REQ_NONE;

4045

}

4046

4047

/* If we (now or from before) have no firstbyte, a firstbyte from the

4048

branch becomes a reqbyte if there isn't a branch reqbyte. */

4049

4050

if (firstbyte < 0 && branchfirstbyte >= 0 && branchreqbyte < 0)

4051

branchreqbyte = branchfirstbyte;

4052

4053

/* Now ensure that the reqbytes match */

4054

4055

if ((reqbyte & ~REQ_VARY) != (branchreqbyte & ~REQ_VARY))

4056

reqbyte = REQ_NONE;

4057

else reqbyte |= branchreqbyte; /* To "or" REQ_VARY */

4058

}

4059

4060

/* If lookbehind, check that this branch matches a fixed-length string,

4061

and put the length into the OP_REVERSE item. Temporarily mark the end of

4062

the branch with OP_END. */

4063

4064

if (lookbehind)

4065

{

4066

int length;

4067

*code = OP_END;

4068

length = find_fixedlength(last_branch, options);

4069

DPRINTF(("fixed length = %d\n", length));

4070

if (length < 0)

4071

{

4072

*errorptr = (length == -2)? ERR36 : ERR25;

4073

*ptrptr = ptr;

4074

return FALSE;

4075

}

4076

PUT(reverse_count, 0, length);

4077

}

4078

4079

/* Reached end of expression, either ')' or end of pattern. Go back through

4080

the alternative branches and reverse the chain of offsets, with the field in

4081

the BRA item now becoming an offset to the first alternative. If there are

4082

no alternatives, it points to the end of the group. The length in the

4083

terminating ket is always the length of the whole bracketed item. If any of

4084

the ims options were changed inside the group, compile a resetting op-code

4085

following, except at the very end of the pattern. Return leaving the pointer

4086

at the terminating char. */

4087

4088

if (*ptr != '|')

4089

{

4090

int length = code - last_branch;

4091

4092

{

4093

int prev_length = GET(last_branch, 1);

4094

PUT(last_branch, 1, length);

4095

length = prev_length;

4096

last_branch -= length;

4097

}

4098

while (length > 0);

4099

4100

/* Fill in the ket */

4101

4102

*code = OP_KET;

4103

PUT(code, 1, code - start_bracket);

4104

code += 1 + LINK_SIZE;

4105

4106

/* Resetting option if needed */

4107

4108

if ((options & PCRE_IMS) != oldims && *ptr == ')')

4109

{

4110

*code++ = OP_OPT;

4111

*code++ = oldims;

4112

}

4113

4114

/* Set values to pass back */

4115

4116

*codeptr = code;

4117

*ptrptr = ptr;

4118

*firstbyteptr = firstbyte;

4119

*reqbyteptr = reqbyte;

4120

return TRUE;

4121

}

4122

4123

/* Another branch follows; insert an "or" node. Its length field points back

4124

to the previous branch while the bracket remains open. At the end the chain

4125

is reversed. It's done like this so that the start of the bracket has a

4126

zero offset until it is closed, making it possible to detect recursion. */

4127

4128

*code = OP_ALT;

4129

PUT(code, 1, code - last_branch);

4130

bc.current = last_branch = code;

4131

code += 1 + LINK_SIZE;

4132

ptr++;

4133

}

4134

/* Control never reaches here */

4135

}

4136

4137

4138

4139

4140

/*************************************************

4141

* Check for anchored expression *

4142

*************************************************/

4143

4144

/* Try to find out if this is an anchored regular expression. Consider each

4145

alternative branch. If they all start with OP_SOD or OP_CIRC, or with a bracket

4146

all of whose alternatives start with OP_SOD or OP_CIRC (recurse ad lib), then

4147

it's anchored. However, if this is a multiline pattern, then only OP_SOD

4148

counts, since OP_CIRC can match in the middle.

4149

4150

We can also consider a regex to be anchored if OP_SOM starts all its branches.

4151

This is the code for \G, which means "match at start of match position, taking

4152

into account the match offset".

4153

4154

A branch is also implicitly anchored if it starts with .* and DOTALL is set,

4155

because that will try the rest of the pattern at all possible matching points,

4156

so there is no point trying again.... er ....

4157

4158

.... except when the .* appears inside capturing parentheses, and there is a

4159

subsequent back reference to those parentheses. We haven't enough information

4160

to catch that case precisely.

4161

4162

At first, the best we could do was to detect when .* was in capturing brackets

4163

and the highest back reference was greater than or equal to that level.

4164

However, by keeping a bitmap of the first 31 back references, we can catch some

4165

of the more common cases more precisely.

4166

4167

Arguments:

4168

code points to start of expression (the bracket)

4169

options points to the options setting

4170

bracket_map a bitmap of which brackets we are inside while testing; this

4171

handles up to substring 31; after that we just have to take

4172

the less precise approach

4173

backref_map the back reference bitmap

4174

4175

Returns: TRUE or FALSE

4176

4177

4178

static BOOL

4179

is_anchored(register const uschar *code, int *options, unsigned int bracket_map,

4180

unsigned int backref_map)

4181

{

4182

do {

4183

const uschar *scode =

4184

first_significant_code(code + 1+LINK_SIZE, options, PCRE_MULTILINE, FALSE);

4185

4186

4187

/* Capturing brackets */

4188

4189

if (op > OP_BRA)

4190

{

4191

int new_map;

4192

op -= OP_BRA;

4193

if (op > EXTRACT_BASIC_MAX) op = GET2(scode, 2+LINK_SIZE);

4194

new_map = bracket_map | ((op < 32)? (1 << op) : 1);

4195

if (!is_anchored(scode, options, new_map, backref_map)) return FALSE;

4196

}

4197

4198

/* Other brackets */

4199

4200

else if (op == OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)

4201

{

4202

if (!is_anchored(scode, options, bracket_map, backref_map)) return FALSE;

4203

}

4204

4205

/* .* is not anchored unless DOTALL is set and it isn't in brackets that

4206

are or may be referenced. */

4207

4208

else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR) &&

4209

(*options & PCRE_DOTALL) != 0)

4210

{

4211

if (scode[1] != OP_ANY || (bracket_map & backref_map) != 0) return FALSE;

4212

}

4213

4214

/* Check for explicit anchoring */

4215

4216

else if (op != OP_SOD && op != OP_SOM &&

4217

((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC))

4218

return FALSE;

4219

code += GET(code, 1);

4220

}

4221

while (*code == OP_ALT); /* Loop for each alternative */

4222

return TRUE;

4223

}

4224

4225

4226

4227

/*************************************************

4228

* Check for starting with ^ or .* *

4229

*************************************************/

4230

4231

/* This is called to find out if every branch starts with ^ or .* so that

4232

"first char" processing can be done to speed things up in multiline

4233

matching and for non-DOTALL patterns that start with .* (which must start at

4234

the beginning or after \n). As in the case of is_anchored() (see above), we

4235

have to take account of back references to capturing brackets that contain .*

4236

because in that case we can't make the assumption.

4237

4238

Arguments:

4239

code points to start of expression (the bracket)

4240

bracket_map a bitmap of which brackets we are inside while testing; this

4241

handles up to substring 31; after that we just have to take

4242

the less precise approach

4243

backref_map the back reference bitmap

4244

4245

Returns: TRUE or FALSE

4246

4247

4248

static BOOL

4249

is_startline(const uschar *code, unsigned int bracket_map,

4250

unsigned int backref_map)

4251

{

4252

do {

4253

const uschar *scode = first_significant_code(code + 1+LINK_SIZE, NULL, 0,

4254

FALSE);

4255

4256

4257

/* Capturing brackets */

4258

4259

if (op > OP_BRA)

4260

{

4261

int new_map;

4262

op -= OP_BRA;

4263

if (op > EXTRACT_BASIC_MAX) op = GET2(scode, 2+LINK_SIZE);

4264

new_map = bracket_map | ((op < 32)? (1 << op) : 1);

4265

if (!is_startline(scode, new_map, backref_map)) return FALSE;

4266

}

4267

4268

/* Other brackets */

4269

4270

else if (op == OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)

4271

{ if (!is_startline(scode, bracket_map, backref_map)) return FALSE; }

4272

4273

/* .* means "start at start or after \n" if it isn't in brackets that

4274

may be referenced. */

4275

4276

else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)

4277

{

4278

if (scode[1] != OP_ANY || (bracket_map & backref_map) != 0) return FALSE;

4279

}

4280

4281

/* Check for explicit circumflex */

4282

4283

else if (op != OP_CIRC) return FALSE;

4284

4285

/* Move on to the next alternative */

4286

4287

code += GET(code, 1);

4288

}

4289

while (*code == OP_ALT); /* Loop for each alternative */

4290

return TRUE;

4291

}

4292

4293

4294

4295

/*************************************************

4296

* Check for asserted fixed first char *

4297

*************************************************/

4298

4299

/* During compilation, the "first char" settings from forward assertions are

4300

discarded, because they can cause conflicts with actual literals that follow.

4301

However, if we end up without a first char setting for an unanchored pattern,

4302

it is worth scanning the regex to see if there is an initial asserted first

4303

char. If all branches start with the same asserted char, or with a bracket all

4304

of whose alternatives start with the same asserted char (recurse ad lib), then

4305

we return that char, otherwise -1.

4306

4307

Arguments:

4308

code points to start of expression (the bracket)

4309

options pointer to the options (used to check casing changes)

4310

inassert TRUE if in an assertion

4311

4312

Returns: -1 or the fixed first char

4313

4314

4315

static int

4316

find_firstassertedchar(const uschar *code, int *options, BOOL inassert)

4317

{

4318

4319

do {

4320

int d;

4321

const uschar *scode =

4322

first_significant_code(code + 1+LINK_SIZE, options, PCRE_CASELESS, TRUE);

4323

4324

4325

if (op >= OP_BRA) op = OP_BRA;

4326

4327

switch(op)

4328

{

4329

default:

4330

return -1;

4331

4332

case OP_BRA:

4333

case OP_ASSERT:

4334

case OP_ONCE:

4335

case OP_COND:

4336

if ((d = find_firstassertedchar(scode, options, op == OP_ASSERT)) < 0)

4337

return -1;

4338

if (c < 0) c = d; else if (c != d) return -1;

4339

break;

4340

4341

case OP_EXACT: /* Fall through */

4342

scode += 2;

4343

4344

case OP_CHAR:

4345

case OP_CHARNC:

4346

case OP_PLUS:

4347

case OP_MINPLUS:

4348

if (!inassert) return -1;

4349

if (c < 0)

4350

{

4351

c = scode[1];

4352

if ((*options & PCRE_CASELESS) != 0) c |= REQ_CASELESS;

4353

}

4354

else if (c != scode[1]) return -1;

4355

break;

4356

}

4357

4358

code += GET(code, 1);

4359

}

4360

while (*code == OP_ALT);

4361

return c;

4362

}

4363

4364

4365

4366

4367

#ifdef SUPPORT_UTF8

4368

/*************************************************

4369

* Validate a UTF-8 string *

4370

*************************************************/

4371

4372

/* This function is called (optionally) at the start of compile or match, to

4373

validate that a supposed UTF-8 string is actually valid. The early check means

4374

that subsequent code can assume it is dealing with a valid string. The check

4375

can be turned off for maximum performance, but then consequences of supplying

4376

an invalid string are then undefined.

4377

4378

Arguments:

4379

string points to the string

4380

length length of string, or -1 if the string is zero-terminated

4381

4382

Returns: < 0 if the string is a valid UTF-8 string

4383

>= 0 otherwise; the value is the offset of the bad byte

4384

4385

4386

static int

4387

valid_utf8(const uschar *string, int length)

4388

{

4389

4390

4391

if (length < 0)

4392

{

4393

for (p = string; *p != 0; p++);

4394

length = p - string;

4395

}

4396

4397

for (p = string; length-- > 0; p++)

4398

{

4399

4400

4401

if (c < 128) continue;

4402

if ((c & 0xc0) != 0xc0) return p - string;

4403

ab = utf8_table4[c & 0x3f]; /* Number of additional bytes */

4404

if (length < ab) return p - string;

4405

length -= ab;

4406

4407

/* Check top bits in the second byte */

4408

if ((*(++p) & 0xc0) != 0x80) return p - string;

4409

4410

/* Check for overlong sequences for each different length */

4411

switch (ab)

4412

{

4413

/* Check for xx00 000x */

4414

case 1:

4415

if ((c & 0x3e) == 0) return p - string;

4416

continue; /* We know there aren't any more bytes to check */

4417

4418

/* Check for 1110 0000, xx0x xxxx */

4419

case 2:

4420

if (c == 0xe0 && (*p & 0x20) == 0) return p - string;

4421

break;

4422

4423

/* Check for 1111 0000, xx00 xxxx */

4424

case 3:

4425

if (c == 0xf0 && (*p & 0x30) == 0) return p - string;

4426

break;

4427

4428

/* Check for 1111 1000, xx00 0xxx */

4429

case 4:

4430

if (c == 0xf8 && (*p & 0x38) == 0) return p - string;

4431

break;

4432

4433

/* Check for leading 0xfe or 0xff, and then for 1111 1100, xx00 00xx */

4434

case 5:

4435

if (c == 0xfe || c == 0xff ||

4436

(c == 0xfc && (*p & 0x3c) == 0)) return p - string;

4437

break;

4438

}

4439

4440

/* Check for valid bytes after the 2nd, if any; all must start 10 */

4441

while (--ab > 0)

4442

{

4443

if ((*(++p) & 0xc0) != 0x80) return p - string;

4444

}

4445

}

4446

4447

return -1;

4448

}

4449

#endif

4450

4451

4452

4453

/*************************************************

4454

* Compile a Regular Expression *

4455

*************************************************/

4456

4457

/* This function takes a string and returns a pointer to a block of store

4458

holding a compiled version of the expression.

4459

4460

Arguments:

4461

pattern the regular expression

4462

options various option bits

4463

errorptr pointer to pointer to error text

4464

erroroffset ptr offset in pattern where error was detected

4465

tables pointer to character tables or NULL

4466

4467

Returns: pointer to compiled data block, or NULL on error,

4468

with errorptr and erroroffset set

4469

4470

4471

EXPORT pcre *

4472

pcre_compile(const char *pattern, int options, const char **errorptr,

4473

int *erroroffset, const unsigned char *tables)

4474

{

4475

real_pcre *re;

4476

int length = 1 + LINK_SIZE; /* For initial BRA plus length */

4477

int c, firstbyte, reqbyte;

4478

int bracount = 0;

4479

int branch_extra = 0;

4480

int branch_newextra;

4481

int item_count = -1;

4482

int name_count = 0;

4483

int max_name_size = 0;

4484

int lastitemlength = 0;

4485

#ifdef SUPPORT_UTF8

4486

BOOL utf8;

4487

BOOL class_utf8;

4488

#endif

4489

BOOL inescq = FALSE;

4490

unsigned int brastackptr = 0;

4491

size_t size;

4492

uschar *code;

4493

const uschar *codestart;

4494

const uschar *ptr;

4495

compile_data compile_block;

4496

int brastack[BRASTACK_SIZE];

4497

uschar bralenstack[BRASTACK_SIZE];

4498

4499

/* We can't pass back an error message if errorptr is NULL; I guess the best we

4500

can do is just return NULL. */

4501

4502

if (errorptr == NULL) return NULL;

4503

*errorptr = NULL;

4504

4505

/* However, we can give a message for this error */

4506

4507

if (erroroffset == NULL)

4508

{

4509

*errorptr = ERR16;

4510

return NULL;

4511

}

4512

*erroroffset = 0;

4513

4514

/* Can't support UTF8 unless PCRE has been compiled to include the code. */

4515

4516

#ifdef SUPPORT_UTF8

4517

utf8 = (options & PCRE_UTF8) != 0;

4518

if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&

4519

(*erroroffset = valid_utf8((uschar *)pattern, -1)) >= 0)

4520

{

4521

*errorptr = ERR44;

4522

return NULL;

4523

}

4524

#else

4525

if ((options & PCRE_UTF8) != 0)

4526

{

4527

*errorptr = ERR32;

4528

return NULL;

4529

}

4530

#endif

4531

4532

if ((options & ~PUBLIC_OPTIONS) != 0)

4533

{

4534

*errorptr = ERR17;

4535

return NULL;

4536

}

4537

4538

/* Set up pointers to the individual character tables */

4539

4540

if (tables == NULL) tables = pcre_default_tables;

4541

compile_block.lcc = tables + lcc_offset;

4542

compile_block.fcc = tables + fcc_offset;

4543

compile_block.cbits = tables + cbits_offset;

4544

compile_block.ctypes = tables + ctypes_offset;

4545

4546

/* Maximum back reference and backref bitmap. This is updated for numeric

4547

references during the first pass, but for named references during the actual

4548

compile pass. The bitmap records up to 31 back references to help in deciding

4549

whether (.*) can be treated as anchored or not. */

4550

4551

compile_block.top_backref = 0;

4552

compile_block.backref_map = 0;

4553

4554

/* Reflect pattern for debugging output */

4555

4556

DPRINTF(("------------------------------------------------------------------\n"));

4557

DPRINTF(("%s\n", pattern));

4558

4559

/* The first thing to do is to make a pass over the pattern to compute the

4560

amount of store required to hold the compiled code. This does not have to be

4561

perfect as long as errors are overestimates. At the same time we can detect any

4562

flag settings right at the start, and extract them. Make an attempt to correct

4563

for any counted white space if an "extended" flag setting appears late in the

4564

pattern. We can't be so clever for #-comments. */

4565

4566

ptr = (const uschar *)(pattern - 1);

4567

while ((c = *(++ptr)) != 0)

4568

{

4569

int min, max;

4570

int class_optcount;

4571

int bracket_length;

4572

int duplength;

4573

4574

/* If we are inside a \Q...\E sequence, all chars are literal */

4575

4576

if (inescq)

4577

{

4578

if ((options & PCRE_AUTO_CALLOUT) != 0) length += 2 + 2*LINK_SIZE;

4579

goto NORMAL_CHAR;

4580

}

4581

4582

/* Otherwise, first check for ignored whitespace and comments */

4583

4584

if ((options & PCRE_EXTENDED) != 0)

4585

{

4586

if ((compile_block.ctypes[c] & ctype_space) != 0) continue;

4587

if (c == '#')

4588

{

4589

/* The space before the ; is to avoid a warning on a silly compiler

4590

on the Macintosh. */

4591

while ((c = *(++ptr)) != 0 && c != NEWLINE) ;

4592

if (c == 0) break;

4593

continue;

4594

}

4595

}

4596

4597

item_count++; /* Is zero for the first non-comment item */

4598

4599

/* Allow space for auto callout before every item except quantifiers. */

4600

4601

if ((options & PCRE_AUTO_CALLOUT) != 0 &&

4602

c != '*' && c != '+' && c != '?' &&

4603

(c != '{' || !is_counted_repeat(ptr + 1)))

4604

length += 2 + 2*LINK_SIZE;

4605

4606

switch(c)

4607

{

4608

/* A backslashed item may be an escaped data character or it may be a

4609

character type. */

4610

4611

case '\\':

4612

c = check_escape(&ptr, errorptr, bracount, options, FALSE);

4613

if (*errorptr != NULL) goto PCRE_ERROR_RETURN;

4614

4615

lastitemlength = 1; /* Default length of last item for repeats */

4616

4617

if (c >= 0) /* Data character */

4618

{

4619

length += 2; /* For a one-byte character */

4620

4621

#ifdef SUPPORT_UTF8

4622

if (utf8 && c > 127)

4623

{

4624

int i;

4625

for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)

4626

if (c <= utf8_table1[i]) break;

4627

length += i;

4628

lastitemlength += i;

4629

}

4630

#endif

4631

4632

continue;

4633

}

4634

4635

/* If \Q, enter "literal" mode */

4636

4637

if (-c == ESC_Q)

4638

{

4639

inescq = TRUE;

4640

continue;

4641

}

4642

4643

/* \X is supported only if Unicode property support is compiled */

4644

4645

#ifndef SUPPORT_UCP

4646

if (-c == ESC_X)

4647

{

4648

*errorptr = ERR45;

4649

goto PCRE_ERROR_RETURN;

4650

}

4651

#endif

4652

4653

/* \P and \p are for Unicode properties, but only when the support has

4654

been compiled. Each item needs 2 bytes. */

4655

4656

else if (-c == ESC_P || -c == ESC_p)

4657

{

4658

#ifdef SUPPORT_UCP

4659

BOOL negated;

4660

length += 2;

4661

lastitemlength = 2;

4662

if (get_ucp(&ptr, &negated, errorptr) < 0) goto PCRE_ERROR_RETURN;

4663

continue;

4664

#else

4665

*errorptr = ERR45;

4666

goto PCRE_ERROR_RETURN;

4667

#endif

4668

}

4669

4670

/* Other escapes need one byte */

4671

4672

length++;

4673

4674

/* A back reference needs an additional 2 bytes, plus either one or 5

4675

bytes for a repeat. We also need to keep the value of the highest

4676

back reference. */

4677

4678

if (c <= -ESC_REF)

4679

{

4680

int refnum = -c - ESC_REF;

4681

compile_block.backref_map |= (refnum < 32)? (1 << refnum) : 1;

4682

if (refnum > compile_block.top_backref)

4683

compile_block.top_backref = refnum;

4684

length += 2; /* For single back reference */

4685

if (ptr[1] == '{' && is_counted_repeat(ptr+2))

4686

{

4687

ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);

4688

if (*errorptr != NULL) goto PCRE_ERROR_RETURN;

4689

if ((min == 0 && (max == 1 || max == -1)) ||

4690

(min == 1 && max == -1))

4691

length++;

4692

else length += 5;

4693

if (ptr[1] == '?') ptr++;

4694

}

4695

}

4696

continue;

4697

4698

case '^': /* Single-byte metacharacters */

4699

case '.':

4700

case '$':

4701

length++;

4702

lastitemlength = 1;

4703

continue;

4704

4705

case '*': /* These repeats won't be after brackets; */

4706

case '+': /* those are handled separately */

4707

case '?':

4708

length++;

4709

goto POSESSIVE; /* A few lines below */

4710

4711

/* This covers the cases of braced repeats after a single char, metachar,

4712

class, or back reference. */

4713

4714

case '{':

4715

if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR;

4716

ptr = read_repeat_counts(ptr+1, &min, &max, errorptr);

4717

if (*errorptr != NULL) goto PCRE_ERROR_RETURN;

4718

4719

/* These special cases just insert one extra opcode */

4720

4721

if ((min == 0 && (max == 1 || max == -1)) ||

4722

(min == 1 && max == -1))

4723

length++;

4724

4725

/* These cases might insert additional copies of a preceding character. */

4726

4727

else

4728

{

4729

if (min != 1)

4730

{

4731

length -= lastitemlength; /* Uncount the original char or metachar */

4732

if (min > 0) length += 3 + lastitemlength;

4733

}

4734

length += lastitemlength + ((max > 0)? 3 : 1);

4735

}

4736

4737

if (ptr[1] == '?') ptr++; /* Needs no extra length */

4738

4739

POSESSIVE: /* Test for possessive quantifier */

4740

if (ptr[1] == '+')

4741

{

4742

ptr++;

4743

length += 2 + 2*LINK_SIZE; /* Allow for atomic brackets */

4744

}

4745

continue;

4746

4747

/* An alternation contains an offset to the next branch or ket. If any ims

4748

options changed in the previous branch(es), and/or if we are in a

4749

lookbehind assertion, extra space will be needed at the start of the

4750

branch. This is handled by branch_extra. */

4751

4752

case '|':

4753

length += 1 + LINK_SIZE + branch_extra;

4754

continue;

4755

4756

/* A character class uses 33 characters provided that all the character

4757

values are less than 256. Otherwise, it uses a bit map for low valued

4758

characters, and individual items for others. Don't worry about character

4759

types that aren't allowed in classes - they'll get picked up during the

4760

compile. A character class that contains only one single-byte character

4761

uses 2 or 3 bytes, depending on whether it is negated or not. Notice this

4762

where we can. (In UTF-8 mode we can do this only for chars < 128.) */

4763

4764

case '[':

4765

if (*(++ptr) == '^')

4766

{

4767

class_optcount = 10; /* Greater than one */

4768

ptr++;

4769

}

4770

else class_optcount = 0;

4771

4772

#ifdef SUPPORT_UTF8

4773

class_utf8 = FALSE;

4774

#endif

4775

4776

/* Written as a "do" so that an initial ']' is taken as data */

4777

4778

if (*ptr != 0) do

4779

{

4780

/* Inside \Q...\E everything is literal except \E */

4781

4782

if (inescq)

4783

{

4784

if (*ptr != '\\' || ptr[1] != 'E') goto GET_ONE_CHARACTER;

4785

inescq = FALSE;

4786

ptr += 1;

4787

continue;

4788

}

4789

4790

/* Outside \Q...\E, check for escapes */

4791

4792

if (*ptr == '\\')

4793

{

4794

c = check_escape(&ptr, errorptr, bracount, options, TRUE);

4795

if (*errorptr != NULL) goto PCRE_ERROR_RETURN;

4796

4797

/* \b is backspace inside a class; \X is literal */

4798

4799

if (-c == ESC_b) c = '\b';

4800

else if (-c == ESC_X) c = 'X';

4801

4802

/* \Q enters quoting mode */

4803

4804

else if (-c == ESC_Q)

4805

{

4806

inescq = TRUE;

4807

continue;

4808

}

4809

4810

/* Handle escapes that turn into characters */

4811

4812

if (c >= 0) goto NON_SPECIAL_CHARACTER;

4813

4814

/* Escapes that are meta-things. The normal ones just affect the

4815

bit map, but Unicode properties require an XCLASS extended item. */

4816

4817

else

4818

{

4819

class_optcount = 10; /* \d, \s etc; make sure > 1 */

4820

#ifdef SUPPORT_UTF8

4821

if (-c == ESC_p || -c == ESC_P)

4822

{

4823

if (!class_utf8)

4824

{

4825

class_utf8 = TRUE;

4826

length += LINK_SIZE + 2;

4827

}

4828

length += 2;

4829

}

4830

#endif

4831

}

4832

}

4833

4834

/* Check the syntax for POSIX stuff. The bits we actually handle are

4835

checked during the real compile phase. */

4836

4837

else if (*ptr == '[' && check_posix_syntax(ptr, &ptr, &compile_block))

4838

{

4839

ptr++;

4840

class_optcount = 10; /* Make sure > 1 */

4841

}

4842

4843

/* Anything else increments the possible optimization count. We have to

4844

detect ranges here so that we can compute the number of extra ranges for

4845

caseless wide characters when UCP support is available. If there are wide

4846

characters, we are going to have to use an XCLASS, even for single

4847

characters. */

4848

4849

else

4850

{

4851

int d;

4852

4853

GET_ONE_CHARACTER:

4854

4855

#ifdef SUPPORT_UTF8

4856

if (utf8)

4857

{

4858

int extra = 0;

4859

GETCHARLEN(c, ptr, extra);

4860

ptr += extra;

4861

}

4862

else c = *ptr;

4863

#else

4864

c = *ptr;

4865

#endif

4866

4867

/* Come here from handling \ above when it escapes to a char value */

4868

4869

NON_SPECIAL_CHARACTER:

4870

class_optcount++;

4871

4872

d = -1;

4873

if (ptr[1] == '-')

4874

{

4875

uschar const *hyptr = ptr++;

4876

if (ptr[1] == '\\')

4877

{

4878

ptr++;

4879

d = check_escape(&ptr, errorptr, bracount, options, TRUE);

4880

if (*errorptr != NULL) goto PCRE_ERROR_RETURN;

4881

if (-d == ESC_b) d = '\b'; /* backspace */

4882

else if (-d == ESC_X) d = 'X'; /* literal X in a class */

4883

}

4884

else if (ptr[1] != 0 && ptr[1] != ']')

4885

{

4886

ptr++;

4887

#ifdef SUPPORT_UTF8

4888

if (utf8)

4889

{

4890

int extra = 0;

4891

GETCHARLEN(d, ptr, extra);

4892

ptr += extra;

4893

}

4894

else

4895

#endif

4896

d = *ptr;

4897

}

4898

if (d < 0) ptr = hyptr; /* go back to hyphen as data */

4899

}

4900

4901

/* If d >= 0 we have a range. In UTF-8 mode, if the end is > 255, or >

4902

127 for caseless matching, we will need to use an XCLASS. */

4903

4904

if (d >= 0)

4905

{

4906

class_optcount = 10; /* Ensure > 1 */

4907

if (d < c)

4908

{

4909

*errorptr = ERR8;

4910

goto PCRE_ERROR_RETURN;

4911

}

4912

4913

#ifdef SUPPORT_UTF8

4914

if (utf8 && (d > 255 || ((options & PCRE_CASELESS) != 0 && d > 127)))

4915

{

4916

uschar buffer[6];

4917

if (!class_utf8) /* Allow for XCLASS overhead */

4918

{

4919

class_utf8 = TRUE;

4920

length += LINK_SIZE + 2;

4921

}

4922

4923

#ifdef SUPPORT_UCP

4924

/* If we have UCP support, find out how many extra ranges are

4925

needed to map the other case of characters within this range. We

4926

have to mimic the range optimization here, because extending the

4927

range upwards might push d over a boundary that makes is use

4928

another byte in the UTF-8 representation. */

4929

4930

if ((options & PCRE_CASELESS) != 0)

4931

{

4932

int occ, ocd;

4933

int cc = c;

4934

int origd = d;

4935

while (get_othercase_range(&cc, origd, &occ, &ocd))

4936

{

4937

if (occ >= c && ocd <= d) continue; /* Skip embedded */

4938

4939

if (occ < c && ocd >= c - 1) /* Extend the basic range */

4940

{ /* if there is overlap, */

4941

c = occ; /* noting that if occ < c */

4942

continue; /* we can't have ocd > d */

4943

} /* because a subrange is */

4944

if (ocd > d && occ <= d + 1) /* always shorter than */

4945

{ /* the basic range. */

4946

d = ocd;

4947

continue;

4948

}

4949

4950

/* An extra item is needed */

4951

4952

length += 1 + ord2utf8(occ, buffer) +

4953

((occ == ocd)? 0 : ord2utf8(ocd, buffer));

4954

}

4955

}

4956

#endif /* SUPPORT_UCP */

4957

4958

/* The length of the (possibly extended) range */

4959

4960

length += 1 + ord2utf8(c, buffer) + ord2utf8(d, buffer);

4961

}

4962

#endif /* SUPPORT_UTF8 */

4963

4964

}

4965

4966

/* We have a single character. There is nothing to be done unless we

4967

are in UTF-8 mode. If the char is > 255, or 127 when caseless, we must

4968

allow for an XCL_SINGLE item, doubled for caselessness if there is UCP

4969

support. */

4970

4971

else

4972

{

4973

#ifdef SUPPORT_UTF8

4974

if (utf8 && (c > 255 || ((options & PCRE_CASELESS) != 0 && c > 127)))

4975

{

4976

uschar buffer[6];

4977

class_optcount = 10; /* Ensure > 1 */

4978

if (!class_utf8) /* Allow for XCLASS overhead */

4979

{

4980

class_utf8 = TRUE;

4981

length += LINK_SIZE + 2;

4982

}

4983

#ifdef SUPPORT_UCP

4984

length += (((options & PCRE_CASELESS) != 0)? 2 : 1) *

4985

(1 + ord2utf8(c, buffer));

4986

#else /* SUPPORT_UCP */

4987

length += 1 + ord2utf8(c, buffer);

4988

#endif /* SUPPORT_UCP */

4989

}

4990

#endif /* SUPPORT_UTF8 */

4991

}

4992

}

4993

}

4994

while (*(++ptr) != 0 && (inescq || *ptr != ']')); /* Concludes "do" above */

4995

4996

if (*ptr == 0) /* Missing terminating ']' */

4997

{

4998

*errorptr = ERR6;

4999

goto PCRE_ERROR_RETURN;

5000

}

5001

5002

/* We can optimize when there was only one optimizable character. Repeats

5003

for positive and negated single one-byte chars are handled by the general

5004

code. Here, we handle repeats for the class opcodes. */

5005

5006

if (class_optcount == 1) length += 3; else

5007

{

5008

length += 33;

5009

5010

/* A repeat needs either 1 or 5 bytes. If it is a possessive quantifier,

5011

we also need extra for wrapping the whole thing in a sub-pattern. */

5012

5013

if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2))

5014

{

5015

ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);

5016

if (*errorptr != NULL) goto PCRE_ERROR_RETURN;

5017

if ((min == 0 && (max == 1 || max == -1)) ||

5018

(min == 1 && max == -1))

5019

length++;

5020

else length += 5;

5021

if (ptr[1] == '+')

5022

{

5023

ptr++;

5024

length += 2 + 2*LINK_SIZE;

5025

}

5026

else if (ptr[1] == '?') ptr++;

5027

}

5028

}

5029

continue;

5030

5031

/* Brackets may be genuine groups or special things */

5032

5033

case '(':

5034

branch_newextra = 0;

5035

bracket_length = 1 + LINK_SIZE;

5036

5037

/* Handle special forms of bracket, which all start (? */

5038

5039

if (ptr[1] == '?')

5040

{

5041

int set, unset;

5042

int *optset;

5043

5044

switch (c = ptr[2])

5045

{

5046

/* Skip over comments entirely */

5047

case '#':

5048

ptr += 3;

5049

while (*ptr != 0 && *ptr != ')') ptr++;

5050

if (*ptr == 0)

5051

{

5052

*errorptr = ERR18;

5053

goto PCRE_ERROR_RETURN;

5054

}

5055

continue;

5056

5057

/* Non-referencing groups and lookaheads just move the pointer on, and

5058

then behave like a non-special bracket, except that they don't increment

5059

the count of extracting brackets. Ditto for the "once only" bracket,

5060

which is in Perl from version 5.005. */

5061

5062

case ':':

5063

case '=':

5064

case '!':

5065

case '>':

5066

ptr += 2;

5067

break;

5068

5069

/* (?R) specifies a recursive call to the regex, which is an extension

5070

to provide the facility which can be obtained by (?p{perl-code}) in

5071

Perl 5.6. In Perl 5.8 this has become (??{perl-code}).

5072

5073

From PCRE 4.00, items such as (?3) specify subroutine-like "calls" to

5074

the appropriate numbered brackets. This includes both recursive and

5075

non-recursive calls. (?R) is now synonymous with (?0). */

5076

5077

case 'R':

5078

ptr++;

5079

5080

case '0': case '1': case '2': case '3': case '4':

5081

case '5': case '6': case '7': case '8': case '9':

5082

ptr += 2;

5083

if (c != 'R')

5084

while ((digitab[*(++ptr)] & ctype_digit) != 0);

5085

if (*ptr != ')')

5086

{

5087

*errorptr = ERR29;

5088

goto PCRE_ERROR_RETURN;

5089

}

5090

length += 1 + LINK_SIZE;

5091

5092

/* If this item is quantified, it will get wrapped inside brackets so

5093

as to use the code for quantified brackets. We jump down and use the

5094

code that handles this for real brackets. */

5095

5096

if (ptr[1] == '+' || ptr[1] == '*' || ptr[1] == '?' || ptr[1] == '{')

5097

{

5098

length += 2 + 2 * LINK_SIZE; /* to make bracketed */

5099

duplength = 5 + 3 * LINK_SIZE;

5100

goto HANDLE_QUANTIFIED_BRACKETS;

5101

}

5102

continue;

5103

5104

/* (?C) is an extension which provides "callout" - to provide a bit of

5105

the functionality of the Perl (?{...}) feature. An optional number may

5106

follow (default is zero). */

5107

5108

case 'C':

5109

ptr += 2;

5110

while ((digitab[*(++ptr)] & ctype_digit) != 0);

5111

if (*ptr != ')')

5112

{

5113

*errorptr = ERR39;

5114

goto PCRE_ERROR_RETURN;

5115

}

5116

length += 2 + 2*LINK_SIZE;

5117

continue;

5118

5119

/* Named subpatterns are an extension copied from Python */

5120

5121

case 'P':

5122

ptr += 3;

5123

if (*ptr == '<')

5124

{

5125

const uschar *p; /* Don't amalgamate; some compilers */

5126

p = ++ptr; /* grumble at autoincrement in declaration */

5127

while ((compile_block.ctypes[*ptr] & ctype_word) != 0) ptr++;

5128

if (*ptr != '>')

5129

{

5130

*errorptr = ERR42;

5131

goto PCRE_ERROR_RETURN;

5132

}

5133

name_count++;

5134

if (ptr - p > max_name_size) max_name_size = (ptr - p);

5135

break;

5136

}

5137

5138

if (*ptr == '=' || *ptr == '>')

5139

{

5140

while ((compile_block.ctypes[*(++ptr)] & ctype_word) != 0);

5141

if (*ptr != ')')

5142

{

5143

*errorptr = ERR42;

5144

goto PCRE_ERROR_RETURN;

5145

}

5146

break;

5147

}

5148

5149

/* Unknown character after (?P */

5150

5151

*errorptr = ERR41;

5152

goto PCRE_ERROR_RETURN;

5153

5154

/* Lookbehinds are in Perl from version 5.005 */

5155

5156

case '<':

5157

ptr += 3;

5158

if (*ptr == '=' || *ptr == '!')

5159

{

5160

branch_newextra = 1 + LINK_SIZE;

5161

length += 1 + LINK_SIZE; /* For the first branch */

5162

break;

5163

}

5164

*errorptr = ERR24;

5165

goto PCRE_ERROR_RETURN;

5166

5167

/* Conditionals are in Perl from version 5.005. The bracket must either

5168

be followed by a number (for bracket reference) or by an assertion

5169

group, or (a PCRE extension) by 'R' for a recursion test. */

5170

5171

case '(':

5172

if (ptr[3] == 'R' && ptr[4] == ')')

5173

{

5174

ptr += 4;

5175

length += 3;

5176

}

5177

else if ((digitab[ptr[3]] & ctype_digit) != 0)

5178

{

5179

ptr += 4;

5180

length += 3;

5181

while ((digitab[*ptr] & ctype_digit) != 0) ptr++;

5182

if (*ptr != ')')

5183

{

5184

*errorptr = ERR26;

5185

goto PCRE_ERROR_RETURN;

5186

}

5187

}

5188

else /* An assertion must follow */

5189

{

5190

ptr++; /* Can treat like ':' as far as spacing is concerned */

5191

if (ptr[2] != '?' ||

5192

(ptr[3] != '=' && ptr[3] != '!' && ptr[3] != '<') )

5193

{

5194

ptr += 2; /* To get right offset in message */

5195

*errorptr = ERR28;

5196

goto PCRE_ERROR_RETURN;

5197

}

5198

}

5199

break;

5200

5201

/* Else loop checking valid options until ) is met. Anything else is an

5202

error. If we are without any brackets, i.e. at top level, the settings

5203

act as if specified in the options, so massage the options immediately.

5204

This is for backward compatibility with Perl 5.004. */

5205

5206

default:

5207

set = unset = 0;

5208

optset = &set;

5209

ptr += 2;

5210

5211

for (;; ptr++)

5212

{

5213

c = *ptr;

5214

switch (c)

5215

{

5216

case 'i':

5217

*optset |= PCRE_CASELESS;

5218

continue;

5219

5220

case 'm':

5221

*optset |= PCRE_MULTILINE;

5222

continue;

5223

5224

case 's':

5225

*optset |= PCRE_DOTALL;

5226

continue;

5227

5228

case 'x':

5229

*optset |= PCRE_EXTENDED;

5230

continue;

5231

5232

case 'X':

5233

*optset |= PCRE_EXTRA;

5234

continue;

5235

5236

case 'U':

5237

*optset |= PCRE_UNGREEDY;

5238

continue;

5239

5240

case '-':

5241

optset = &unset;

5242

continue;

5243

5244

/* A termination by ')' indicates an options-setting-only item; if

5245

this is at the very start of the pattern (indicated by item_count

5246

being zero), we use it to set the global options. This is helpful

5247

when analyzing the pattern for first characters, etc. Otherwise

5248

nothing is done here and it is handled during the compiling

5249

process.

5250

5251

[Historical note: Up to Perl 5.8, options settings at top level

5252

were always global settings, wherever they appeared in the pattern.

5253

That is, they were equivalent to an external setting. From 5.8

5254

onwards, they apply only to what follows (which is what you might

5255

expect).] */

5256

5257

case ')':

5258

if (item_count == 0)

5259

{

5260

options = (options | set) & (~unset);

5261

set = unset = 0; /* To save length */

5262

item_count--; /* To allow for several */

5263

}

5264

5265

/* Fall through */

5266

5267

/* A termination by ':' indicates the start of a nested group with

5268

the given options set. This is again handled at compile time, but

5269

we must allow for compiled space if any of the ims options are

5270

set. We also have to allow for resetting space at the end of

5271

the group, which is why 4 is added to the length and not just 2.

5272

If there are several changes of options within the same group, this

5273

will lead to an over-estimate on the length, but this shouldn't

5274

matter very much. We also have to allow for resetting options at

5275

the start of any alternations, which we do by setting

5276

branch_newextra to 2. Finally, we record whether the case-dependent

5277

flag ever changes within the regex. This is used by the "required

5278

character" code. */

5279

5280

case ':':

5281

if (((set|unset) & PCRE_IMS) != 0)

5282

{

5283

length += 4;

5284

branch_newextra = 2;

5285

if (((set|unset) & PCRE_CASELESS) != 0) options |= PCRE_ICHANGED;

5286

}

5287

goto END_OPTIONS;

5288

5289

/* Unrecognized option character */

5290

5291

default:

5292

*errorptr = ERR12;

5293

goto PCRE_ERROR_RETURN;

5294

}

5295

}

5296

5297

/* If we hit a closing bracket, that's it - this is a freestanding

5298

option-setting. We need to ensure that branch_extra is updated if

5299

necessary. The only values branch_newextra can have here are 0 or 2.

5300

If the value is 2, then branch_extra must either be 2 or 5, depending

5301

on whether this is a lookbehind group or not. */

5302

5303

END_OPTIONS:

5304

if (c == ')')

5305

{

5306

if (branch_newextra == 2 &&

5307

(branch_extra == 0 || branch_extra == 1+LINK_SIZE))

5308

branch_extra += branch_newextra;

5309

continue;

5310

}

5311

5312

/* If options were terminated by ':' control comes here. Fall through

5313

to handle the group below. */

5314

}

5315

}

5316

5317

/* Extracting brackets must be counted so we can process escapes in a

5318

Perlish way. If the number exceeds EXTRACT_BASIC_MAX we are going to

5319

need an additional 3 bytes of store per extracting bracket. However, if

5320

PCRE_NO_AUTO)CAPTURE is set, unadorned brackets become non-capturing, so we

5321

must leave the count alone (it will aways be zero). */

5322

5323

else if ((options & PCRE_NO_AUTO_CAPTURE) == 0)

5324

{

5325

bracount++;

5326

if (bracount > EXTRACT_BASIC_MAX) bracket_length += 3;

5327

}

5328

5329

/* Save length for computing whole length at end if there's a repeat that

5330

requires duplication of the group. Also save the current value of

5331

branch_extra, and start the new group with the new value. If non-zero, this

5332

will either be 2 for a (?imsx: group, or 3 for a lookbehind assertion. */

5333

5334

if (brastackptr >= sizeof(brastack)/sizeof(int))

5335

{

5336

*errorptr = ERR19;

5337

goto PCRE_ERROR_RETURN;

5338

}

5339

5340

bralenstack[brastackptr] = branch_extra;

5341

branch_extra = branch_newextra;

5342

5343

brastack[brastackptr++] = length;

5344

length += bracket_length;

5345

continue;

5346

5347

/* Handle ket. Look for subsequent max/min; for certain sets of values we

5348

have to replicate this bracket up to that many times. If brastackptr is

5349

0 this is an unmatched bracket which will generate an error, but take care

5350

not to try to access brastack[-1] when computing the length and restoring

5351

the branch_extra value. */

5352

5353

case ')':

5354

length += 1 + LINK_SIZE;

5355

if (brastackptr > 0)

5356

{

5357

duplength = length - brastack[--brastackptr];

5358

branch_extra = bralenstack[brastackptr];

5359

}

5360

else duplength = 0;

5361

5362

/* The following code is also used when a recursion such as (?3) is

5363

followed by a quantifier, because in that case, it has to be wrapped inside

5364

brackets so that the quantifier works. The value of duplength must be

5365

set before arrival. */

5366

5367

HANDLE_QUANTIFIED_BRACKETS:

5368

5369

/* Leave ptr at the final char; for read_repeat_counts this happens

5370

automatically; for the others we need an increment. */

5371

5372

if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))

5373

{

5374

ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);

5375

if (*errorptr != NULL) goto PCRE_ERROR_RETURN;

5376

}

5377

else if (c == '*') { min = 0; max = -1; ptr++; }

5378

else if (c == '+') { min = 1; max = -1; ptr++; }

5379

else if (c == '?') { min = 0; max = 1; ptr++; }

5380

else { min = 1; max = 1; }

5381

5382

/* If the minimum is zero, we have to allow for an OP_BRAZERO before the

5383

group, and if the maximum is greater than zero, we have to replicate

5384

maxval-1 times; each replication acquires an OP_BRAZERO plus a nesting

5385

bracket set. */

5386

5387

if (min == 0)

5388

{

5389

length++;

5390

if (max > 0) length += (max - 1) * (duplength + 3 + 2*LINK_SIZE);

5391

}

5392

5393

/* When the minimum is greater than zero, we have to replicate up to

5394

minval-1 times, with no additions required in the copies. Then, if there

5395

is a limited maximum we have to replicate up to maxval-1 times allowing

5396

for a BRAZERO item before each optional copy and nesting brackets for all

5397

but one of the optional copies. */

5398

5399

else

5400

{

5401

length += (min - 1) * duplength;

5402

if (max > min) /* Need this test as max=-1 means no limit */

5403

length += (max - min) * (duplength + 3 + 2*LINK_SIZE)

5404

- (2 + 2*LINK_SIZE);

5405

}

5406

5407

/* Allow space for once brackets for "possessive quantifier" */

5408

5409

if (ptr[1] == '+')

5410

{

5411

ptr++;

5412

length += 2 + 2*LINK_SIZE;

5413

}

5414

continue;

5415

5416

/* Non-special character. It won't be space or # in extended mode, so it is

5417

always a genuine character. If we are in a \Q...\E sequence, check for the

5418

end; if not, we have a literal. */

5419

5420

default:

5421

NORMAL_CHAR:

5422

5423

if (inescq && c == '\\' && ptr[1] == 'E')

5424

{

5425

inescq = FALSE;

5426

ptr++;

5427

continue;

5428

}

5429

5430

length += 2; /* For a one-byte character */

5431

lastitemlength = 1; /* Default length of last item for repeats */

5432

5433

/* In UTF-8 mode, check for additional bytes. */

5434

5435

#ifdef SUPPORT_UTF8

5436

if (utf8 && (c & 0xc0) == 0xc0)

5437

{

5438

while ((ptr[1] & 0xc0) == 0x80) /* Can't flow over the end */

5439

{ /* because the end is marked */

5440

lastitemlength++; /* by a zero byte. */

5441

length++;

5442

ptr++;

5443

}

5444

}

5445

#endif

5446

5447

continue;

5448

}

5449

}

5450

5451

length += 2 + LINK_SIZE; /* For final KET and END */

5452

5453

if ((options & PCRE_AUTO_CALLOUT) != 0)

5454

length += 2 + 2*LINK_SIZE; /* For final callout */

5455

5456

if (length > MAX_PATTERN_SIZE)

5457

{

5458

*errorptr = ERR20;

5459

return NULL;

5460

}

5461

5462

/* Compute the size of data block needed and get it, either from malloc or

5463

externally provided function. */

5464

5465

size = length + sizeof(real_pcre) + name_count * (max_name_size + 3);

5466

re = (real_pcre *)(pcre_malloc)(size);

5467

5468

if (re == NULL)

5469

{

5470

*errorptr = ERR21;

5471

return NULL;

5472

}

5473

5474

/* Put in the magic number, and save the sizes, options, and character table

5475

pointer. NULL is used for the default character tables. The nullpad field is at

5476

the end; it's there to help in the case when a regex compiled on a system with

5477

4-byte pointers is run on another with 8-byte pointers. */

5478

5479

re->magic_number = MAGIC_NUMBER;

5480

re->size = size;

5481

re->options = options;

5482

re->dummy1 = re->dummy2 = 0;

5483

re->name_table_offset = sizeof(real_pcre);

5484

re->name_entry_size = max_name_size + 3;

5485

re->name_count = name_count;

5486

re->tables = (tables == pcre_default_tables)? NULL : tables;

5487

re->nullpad = NULL;

5488

5489

/* The starting points of the name/number translation table and of the code are

5490

passed around in the compile data block. */

5491

5492

compile_block.names_found = 0;

5493

compile_block.name_entry_size = max_name_size + 3;

5494

compile_block.name_table = (uschar *)re + re->name_table_offset;

5495

codestart = compile_block.name_table + re->name_entry_size * re->name_count;

5496

compile_block.start_code = codestart;

5497

compile_block.start_pattern = (const uschar *)pattern;

5498

compile_block.req_varyopt = 0;

5499

compile_block.nopartial = FALSE;

5500

5501

/* Set up a starting, non-extracting bracket, then compile the expression. On

5502

error, *errorptr will be set non-NULL, so we don't need to look at the result

5503

of the function here. */

5504

5505

ptr = (const uschar *)pattern;

5506

code = (uschar *)codestart;

5507

*code = OP_BRA;

5508

bracount = 0;

5509

(void)compile_regex(options, options & PCRE_IMS, &bracount, &code, &ptr,

5510

errorptr, FALSE, 0, &firstbyte, &reqbyte, NULL, &compile_block);

5511

re->top_bracket = bracount;

5512

re->top_backref = compile_block.top_backref;

5513

5514

if (compile_block.nopartial) re->options |= PCRE_NOPARTIAL;

5515

5516

/* If not reached end of pattern on success, there's an excess bracket. */

5517

5518

if (*errorptr == NULL && *ptr != 0) *errorptr = ERR22;

5519

5520

/* Fill in the terminating state and check for disastrous overflow, but

5521

if debugging, leave the test till after things are printed out. */

5522

5523

*code++ = OP_END;

5524

5525

#ifndef DEBUG

5526

if (code - codestart > length) *errorptr = ERR23;

5527

#endif

5528

5529

/* Give an error if there's back reference to a non-existent capturing

5530

subpattern. */

5531

5532

if (re->top_backref > re->top_bracket) *errorptr = ERR15;

5533

5534

/* Failed to compile, or error while post-processing */

5535

5536

if (*errorptr != NULL)

5537

{

5538

(pcre_free)(re);

5539

PCRE_ERROR_RETURN:

5540

*erroroffset = ptr - (const uschar *)pattern;

5541

return NULL;

5542

}

5543

5544

/* If the anchored option was not passed, set the flag if we can determine that

5545

the pattern is anchored by virtue of ^ characters or \A or anything else (such

5546

as starting with .* when DOTALL is set).

5547

5548

Otherwise, if we know what the first character has to be, save it, because that

5549

speeds up unanchored matches no end. If not, see if we can set the

5550

PCRE_STARTLINE flag. This is helpful for multiline matches when all branches

5551

start with ^. and also when all branches start with .* for non-DOTALL matches.

5552

5553

5554

if ((options & PCRE_ANCHORED) == 0)

5555

{

5556

int temp_options = options;

5557

if (is_anchored(codestart, &temp_options, 0, compile_block.backref_map))

5558

re->options |= PCRE_ANCHORED;

5559

else

5560

{

5561

if (firstbyte < 0)

5562

firstbyte = find_firstassertedchar(codestart, &temp_options, FALSE);

5563

if (firstbyte >= 0) /* Remove caseless flag for non-caseable chars */

5564

{

5565

int ch = firstbyte & 255;

5566

re->first_byte = ((firstbyte & REQ_CASELESS) != 0 &&

5567

compile_block.fcc[ch] == ch)? ch : firstbyte;

5568

re->options |= PCRE_FIRSTSET;

5569

}

5570

else if (is_startline(codestart, 0, compile_block.backref_map))

5571

re->options |= PCRE_STARTLINE;

5572

}

5573

}

5574

5575

/* For an anchored pattern, we use the "required byte" only if it follows a

5576

variable length item in the regex. Remove the caseless flag for non-caseable

5577

bytes. */

5578

5579

if (reqbyte >= 0 &&

5580

((re->options & PCRE_ANCHORED) == 0 || (reqbyte & REQ_VARY) != 0))

5581

{

5582

int ch = reqbyte & 255;

5583

re->req_byte = ((reqbyte & REQ_CASELESS) != 0 &&

5584

compile_block.fcc[ch] == ch)? (reqbyte & ~REQ_CASELESS) : reqbyte;

5585

re->options |= PCRE_REQCHSET;

5586

}

5587

5588

/* Print out the compiled data for debugging */

5589

5590

#ifdef DEBUG

5591

5592

printf("Length = %d top_bracket = %d top_backref = %d\n",

5593

length, re->top_bracket, re->top_backref);

5594

5595

if (re->options != 0)

5596

{

5597

printf("%s%s%s%s%s%s%s%s%s%s\n",

5598

((re->options & PCRE_NOPARTIAL) != 0)? "nopartial " : "",

5599

((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",

5600

((re->options & PCRE_CASELESS) != 0)? "caseless " : "",

5601

((re->options & PCRE_ICHANGED) != 0)? "case state changed " : "",

5602

((re->options & PCRE_EXTENDED) != 0)? "extended " : "",

5603

((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",

5604

((re->options & PCRE_DOTALL) != 0)? "dotall " : "",

5605

((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",

5606

((re->options & PCRE_EXTRA) != 0)? "extra " : "",

5607

((re->options & PCRE_UNGREEDY) != 0)? "ungreedy " : "");

5608

}

5609

5610

if ((re->options & PCRE_FIRSTSET) != 0)

5611

{

5612

int ch = re->first_byte & 255;

5613

const char *caseless = ((re->first_byte & REQ_CASELESS) == 0)? "" : " (caseless)";

5614

if (isprint(ch)) printf("First char = %c%s\n", ch, caseless);

5615

else printf("First char = \\x%02x%s\n", ch, caseless);

5616

}

5617

5618

if ((re->options & PCRE_REQCHSET) != 0)

5619

{

5620

int ch = re->req_byte & 255;

5621

const char *caseless = ((re->req_byte & REQ_CASELESS) == 0)? "" : " (caseless)";

5622

if (isprint(ch)) printf("Req char = %c%s\n", ch, caseless);

5623

else printf("Req char = \\x%02x%s\n", ch, caseless);

5624

}

5625

5626

print_internals(re, stdout);

5627

5628

/* This check is done here in the debugging case so that the code that

5629

was compiled can be seen. */

5630

5631

if (code - codestart > length)

5632

{

5633

*errorptr = ERR23;

5634

(pcre_free)(re);

5635

*erroroffset = ptr - (uschar *)pattern;

5636

return NULL;

5637

}

5638

#endif

5639

5640

return (pcre *)re;

5641

}

5642

5643

5644

5645

/*************************************************

5646

* Match a back-reference *

5647

*************************************************/

5648

5649

/* If a back reference hasn't been set, the length that is passed is greater

5650

than the number of characters left in the string, so the match fails.

5651

5652

Arguments:

5653

offset index into the offset vector

5654

eptr points into the subject

5655

length length to be matched

5656

md points to match data block

5657

ims the ims flags

5658

5659

Returns: TRUE if matched

5660

5661

5662

static BOOL

5663

match_ref(int offset, register const uschar *eptr, int length, match_data *md,

5664

unsigned long int ims)

5665

{

5666

const uschar *p = md->start_subject + md->offset_vector[offset];

5667

5668

#ifdef DEBUG

5669

if (eptr >= md->end_subject)

5670

printf("matching subject <null>");

5671

else

5672

{

5673

printf("matching subject ");

5674

pchars(eptr, length, TRUE, md);

5675

}

5676

printf(" against backref ");

5677

pchars(p, length, FALSE, md);

5678

printf("\n");

5679

#endif

5680

5681

/* Always fail if not enough characters left */

5682

5683

if (length > md->end_subject - eptr) return FALSE;

5684

5685

/* Separate the caselesss case for speed */

5686

5687

if ((ims & PCRE_CASELESS) != 0)

5688

{

5689

while (length-- > 0)

5690

if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;

5691

}

5692

else

5693

{ while (length-- > 0) if (*p++ != *eptr++) return FALSE; }

5694

5695

return TRUE;

5696

}

5697

5698

5699

#ifdef SUPPORT_UTF8

5700

/*************************************************

5701

* Match character against an XCLASS *

5702

*************************************************/

5703

5704

/* This function is called from within the XCLASS code below, to match a

5705

character against an extended class which might match values > 255.

5706

5707

Arguments:

5708

c the character

5709

data points to the flag byte of the XCLASS data

5710

5711

Returns: TRUE if character matches, else FALSE

5712

5713

5714

static BOOL

5715

match_xclass(int c, const uschar *data)

5716

{

5717

int t;

5718

BOOL negated = (*data & XCL_NOT) != 0;

5719

5720

/* Character values < 256 are matched against a bitmap, if one is present. If

5721

not, we still carry on, because there may be ranges that start below 256 in the

5722

additional data. */

5723

5724

if (c < 256)

5725

{

5726

if ((*data & XCL_MAP) != 0 && (data[1 + c/8] & (1 << (c&7))) != 0)

5727

return !negated; /* char found */

5728

}

5729

5730

/* First skip the bit map if present. Then match against the list of Unicode

5731

properties or large chars or ranges that end with a large char. We won't ever

5732

encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */

5733

5734

if ((*data++ & XCL_MAP) != 0) data += 32;

5735

5736

while ((t = *data++) != XCL_END)

5737

{

5738

int x, y;

5739

if (t == XCL_SINGLE)

5740

{

5741

GETCHARINC(x, data);

5742

if (c == x) return !negated;

5743

}

5744

else if (t == XCL_RANGE)

5745

{

5746

GETCHARINC(x, data);

5747

GETCHARINC(y, data);

5748

if (c >= x && c <= y) return !negated;

5749

}

5750

5751

#ifdef SUPPORT_UCP

5752

else /* XCL_PROP & XCL_NOTPROP */

5753

{

5754

int chartype, othercase;

5755

int rqdtype = *data++;

5756

int category = ucp_findchar(c, &chartype, &othercase);

5757

if (rqdtype >= 128)

5758

{

5759

if ((rqdtype - 128 == category) == (t == XCL_PROP)) return !negated;

5760

}

5761

else

5762

{

5763

if ((rqdtype == chartype) == (t == XCL_PROP)) return !negated;

5764

}

5765

}

5766

#endif /* SUPPORT_UCP */

5767

}

5768

5769

return negated; /* char did not match */

5770

}

5771

#endif

5772

5773

5774

/***************************************************************************

5775

****************************************************************************

5776

RECURSION IN THE match() FUNCTION

5777

5778

The match() function is highly recursive. Some regular expressions can cause

5779

it to recurse thousands of times. I was writing for Unix, so I just let it

5780

call itself recursively. This uses the stack for saving everything that has

5781

to be saved for a recursive call. On Unix, the stack can be large, and this

5782

works fine.

5783

5784

It turns out that on non-Unix systems there are problems with programs that

5785

use a lot of stack. (This despite the fact that every last chip has oodles

5786

of memory these days, and techniques for extending the stack have been known

5787

for decades.) So....

5788

5789

There is a fudge, triggered by defining NO_RECURSE, which avoids recursive

5790

calls by keeping local variables that need to be preserved in blocks of memory

5791

obtained from malloc instead instead of on the stack. Macros are used to

5792

achieve this so that the actual code doesn't look very different to what it

5793

always used to.

5794

****************************************************************************

5795

***************************************************************************/

5796

5797

5798

/* These versions of the macros use the stack, as normal */

5799

5800

#ifndef NO_RECURSE

5801

#define REGISTER register

5802

#define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) rx = match(ra,rb,rc,rd,re,rf,rg)

5803

#define RRETURN(ra) return ra

5804

#else

5805

5806

5807

/* These versions of the macros manage a private stack on the heap. Note

5808

that the rd argument of RMATCH isn't actually used. It's the md argument of

5809

match(), which never changes. */

5810

5811

#define REGISTER

5812

5813

#define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\

5814

5815

heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\

5816

if (setjmp(frame->Xwhere) == 0)\

5817

5818

newframe->Xeptr = ra;\

5819

newframe->Xecode = rb;\

5820

newframe->Xoffset_top = rc;\

5821

newframe->Xims = re;\

5822

newframe->Xeptrb = rf;\

5823

newframe->Xflags = rg;\

5824

newframe->Xprevframe = frame;\

5825

frame = newframe;\

5826

DPRINTF(("restarting from line %d\n", __LINE__));\

5827

goto HEAP_RECURSE;\

5828

5829

else\

5830

5831

DPRINTF(("longjumped back to line %d\n", __LINE__));\

5832

frame = md->thisframe;\

5833

rx = frame->Xresult;\

5834

5835

}

5836

5837

#define RRETURN(ra)\

5838

5839

heapframe *newframe = frame;\

5840

frame = newframe->Xprevframe;\

5841

(pcre_stack_free)(newframe);\

5842

if (frame != NULL)\

5843

5844

frame->Xresult = ra;\

5845

md->thisframe = frame;\

5846

longjmp(frame->Xwhere, 1);\

5847

5848

return ra;\

5849

}

5850

5851

5852

/* Structure for remembering the local variables in a private frame */

5853

5854

typedef struct heapframe {

5855

struct heapframe *Xprevframe;

5856

5857

/* Function arguments that may change */

5858

5859

const uschar *Xeptr;

5860

const uschar *Xecode;

5861

int Xoffset_top;

5862

long int Xims;

5863

eptrblock *Xeptrb;

5864

int Xflags;

5865

5866

/* Function local variables */

5867

5868

const uschar *Xcallpat;

5869

const uschar *Xcharptr;

5870

const uschar *Xdata;

5871

const uschar *Xnext;

5872

const uschar *Xpp;

5873

const uschar *Xprev;

5874

const uschar *Xsaved_eptr;

5875

5876

recursion_info Xnew_recursive;

5877

5878

BOOL Xcur_is_word;

5879

BOOL Xcondition;

5880

BOOL Xminimize;

5881

BOOL Xprev_is_word;

5882

5883

unsigned long int Xoriginal_ims;

5884

5885

#ifdef SUPPORT_UCP

5886

int Xprop_type;

5887

int Xprop_fail_result;

5888

int Xprop_category;

5889

int Xprop_chartype;

5890

int Xprop_othercase;

5891

int Xprop_test_against;

5892

int *Xprop_test_variable;

5893

#endif

5894

5895

int Xctype;

5896

int Xfc;

5897

int Xfi;

5898

int Xlength;

5899

int Xmax;

5900

int Xmin;

5901

int Xnumber;

5902

int Xoffset;

5903

int Xop;

5904

int Xsave_capture_last;

5905

int Xsave_offset1, Xsave_offset2, Xsave_offset3;

5906

int Xstacksave[REC_STACK_SAVE_MAX];

5907

5908

eptrblock Xnewptrb;

5909

5910

/* Place to pass back result, and where to jump back to */

5911

5912

int Xresult;

5913

jmp_buf Xwhere;

5914

5915

} heapframe;

5916

5917

#endif

5918

5919

5920

/***************************************************************************

5921

***************************************************************************/

5922

5923

5924

5925

/*************************************************

5926

* Match from current position *

5927

*************************************************/

5928

5929

/* On entry ecode points to the first opcode, and eptr to the first character

5930

in the subject string, while eptrb holds the value of eptr at the start of the

5931

last bracketed group - used for breaking infinite loops matching zero-length

5932

strings. This function is called recursively in many circumstances. Whenever it

5933

returns a negative (error) response, the outer incarnation must also return the

5934

same response.

5935

5936

Performance note: It might be tempting to extract commonly used fields from the

5937

md structure (e.g. utf8, end_subject) into individual variables to improve

5938

performance. Tests using gcc on a SPARC disproved this; in the first case, it

5939

made performance worse.

5940

5941

Arguments:

5942

eptr pointer in subject

5943

ecode position in code

5944

offset_top current top pointer

5945

md pointer to "static" info for the match

5946

ims current /i, /m, and /s options

5947

eptrb pointer to chain of blocks containing eptr at start of

5948

brackets - for testing for empty matches

5949

flags can contain

5950

match_condassert - this is an assertion condition

5951

match_isgroup - this is the start of a bracketed group

5952

5953

Returns: MATCH_MATCH if matched ) these values are >= 0

5954

MATCH_NOMATCH if failed to match )

5955

a negative PCRE_ERROR_xxx value if aborted by an error condition

5956

(e.g. stopped by recursion limit)

5957

5958

5959

static int

5960

match(REGISTER const uschar *eptr, REGISTER const uschar *ecode,

5961

int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,

5962

int flags)

5963

{

5964

/* These variables do not need to be preserved over recursion in this function,

5965

so they can be ordinary variables in all cases. Mark them with "register"

5966

because they are used a lot in loops. */

5967

5968

5969

5970

5971

5972

/* When recursion is not being used, all "local" variables that have to be

5973

preserved over calls to RMATCH() are part of a "frame" which is obtained from

5974

heap storage. Set up the top-level frame here; others are obtained from the

5975

heap whenever RMATCH() does a "recursion". See the macro definitions above. */

5976

5977

#ifdef NO_RECURSE

5978

heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));

5979

frame->Xprevframe = NULL; /* Marks the top level */

5980

5981

/* Copy in the original argument variables */

5982

5983

frame->Xeptr = eptr;

5984

frame->Xecode = ecode;

5985

frame->Xoffset_top = offset_top;

5986

frame->Xims = ims;

5987

frame->Xeptrb = eptrb;

5988

frame->Xflags = flags;

5989

5990

/* This is where control jumps back to to effect "recursion" */

5991

5992

HEAP_RECURSE:

5993

5994

/* Macros make the argument variables come from the current frame */

5995

5996

#define eptr frame->Xeptr

5997

#define ecode frame->Xecode

5998

#define offset_top frame->Xoffset_top

5999

#define ims frame->Xims

6000

#define eptrb frame->Xeptrb

6001

#define flags frame->Xflags

6002

6003

/* Ditto for the local variables */

6004

6005

#ifdef SUPPORT_UTF8

6006

#define charptr frame->Xcharptr

6007

#endif

6008

#define callpat frame->Xcallpat

6009

#define data frame->Xdata

6010

#define next frame->Xnext

6011

#define pp frame->Xpp

6012

#define prev frame->Xprev

6013

#define saved_eptr frame->Xsaved_eptr

6014

6015

#define new_recursive frame->Xnew_recursive

6016

6017

#define cur_is_word frame->Xcur_is_word

6018

#define condition frame->Xcondition

6019

#define minimize frame->Xminimize

6020

#define prev_is_word frame->Xprev_is_word

6021

6022

#define original_ims frame->Xoriginal_ims

6023

6024

#ifdef SUPPORT_UCP

6025

#define prop_type frame->Xprop_type

6026

#define prop_fail_result frame->Xprop_fail_result

6027

#define prop_category frame->Xprop_category

6028

#define prop_chartype frame->Xprop_chartype

6029

#define prop_othercase frame->Xprop_othercase

6030

#define prop_test_against frame->Xprop_test_against

6031

#define prop_test_variable frame->Xprop_test_variable

6032

#endif

6033

6034

#define ctype frame->Xctype

6035

#define fc frame->Xfc

6036

#define fi frame->Xfi

6037

#define length frame->Xlength

6038

#define max frame->Xmax

6039

#define min frame->Xmin

6040

#define number frame->Xnumber

6041

#define offset frame->Xoffset

6042

#define op frame->Xop

6043

#define save_capture_last frame->Xsave_capture_last

6044

#define save_offset1 frame->Xsave_offset1

6045

#define save_offset2 frame->Xsave_offset2

6046

#define save_offset3 frame->Xsave_offset3

6047

#define stacksave frame->Xstacksave

6048

6049

#define newptrb frame->Xnewptrb

6050

6051

/* When recursion is being used, local variables are allocated on the stack and

6052

get preserved during recursion in the normal way. In this environment, fi and

6053

i, and fc and c, can be the same variables. */

6054

6055

#else

6056

#define fi i

6057

#define fc c

6058

6059

6060

#ifdef SUPPORT_UTF8 /* Many of these variables are used ony */

6061

const uschar *charptr; /* small blocks of the code. My normal */

6062

#endif /* style of coding would have declared */

6063

const uschar *callpat; /* them within each of those blocks. */

6064

const uschar *data; /* However, in order to accommodate the */

6065

const uschar *next; /* version of this code that uses an */

6066

const uschar *pp; /* external "stack" implemented on the */

6067

const uschar *prev; /* heap, it is easier to declare them */

6068

const uschar *saved_eptr; /* all here, so the declarations can */

6069

/* be cut out in a block. The only */

6070

recursion_info new_recursive; /* declarations within blocks below are */

6071

/* for variables that do not have to */

6072

BOOL cur_is_word; /* be preserved over a recursive call */

6073

BOOL condition; /* to RMATCH(). */

6074

BOOL minimize;

6075

BOOL prev_is_word;

6076

6077

unsigned long int original_ims;

6078

6079

#ifdef SUPPORT_UCP

6080

int prop_type;

6081

int prop_fail_result;

6082

int prop_category;

6083

int prop_chartype;

6084

int prop_othercase;

6085

int prop_test_against;

6086

int *prop_test_variable;

6087

#endif

6088

6089

int ctype;

6090

int length;

6091

int max;

6092

int min;

6093

int number;

6094

int offset;

6095

int op;

6096

int save_capture_last;

6097

int save_offset1, save_offset2, save_offset3;

6098

int stacksave[REC_STACK_SAVE_MAX];

6099

6100

eptrblock newptrb;

6101

#endif

6102

6103

/* These statements are here to stop the compiler complaining about unitialized

6104

variables. */

6105

6106

#ifdef SUPPORT_UCP

6107

prop_fail_result = 0;

6108

prop_test_against = 0;

6109

prop_test_variable = NULL;

6110

#endif

6111

6112

/* OK, now we can get on with the real code of the function. Recursion is

6113

specified by the macros RMATCH and RRETURN. When NO_RECURSE is *not* defined,

6114

these just turn into a recursive call to match() and a "return", respectively.

6115

However, RMATCH isn't like a function call because it's quite a complicated

6116

macro. It has to be used in one particular way. This shouldn't, however, impact

6117

performance when true recursion is being used. */

6118

6119

if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);

6120

6121

original_ims = ims; /* Save for resetting on ')' */

6122

6123

/* At the start of a bracketed group, add the current subject pointer to the

6124

stack of such pointers, to be re-instated at the end of the group when we hit

6125

the closing ket. When match() is called in other circumstances, we don't add to

6126

this stack. */

6127

6128

if ((flags & match_isgroup) != 0)

6129

{

6130

newptrb.epb_prev = eptrb;

6131

newptrb.epb_saved_eptr = eptr;

6132

eptrb = &newptrb;

6133

}

6134

6135

/* Now start processing the operations. */

6136

6137

for (;;)

6138

{

6139

op = *ecode;

6140

minimize = FALSE;

6141

6142

/* For partial matching, remember if we ever hit the end of the subject after

6143

matching at least one subject character. */

6144

6145

if (md->partial &&

6146

eptr >= md->end_subject &&

6147

eptr > md->start_match)

6148

md->hitend = TRUE;

6149

6150

/* Opening capturing bracket. If there is space in the offset vector, save

6151

the current subject position in the working slot at the top of the vector. We

6152

mustn't change the current values of the data slot, because they may be set

6153

from a previous iteration of this group, and be referred to by a reference

6154

inside the group.

6155

6156

If the bracket fails to match, we need to restore this value and also the

6157

values of the final offsets, in case they were set by a previous iteration of

6158

the same bracket.

6159

6160

If there isn't enough space in the offset vector, treat this as if it were a

6161

non-capturing bracket. Don't worry about setting the flag for the error case

6162

here; that is handled in the code for KET. */

6163

6164

if (op > OP_BRA)

6165

{

6166

number = op - OP_BRA;

6167

6168

/* For extended extraction brackets (large number), we have to fish out the

6169

number from a dummy opcode at the start. */

6170

6171

if (number > EXTRACT_BASIC_MAX)

6172

number = GET2(ecode, 2+LINK_SIZE);

6173

offset = number << 1;

6174

6175

#ifdef DEBUG

6176

printf("start bracket %d subject=", number);

6177

pchars(eptr, 16, TRUE, md);

6178

printf("\n");

6179

#endif

6180

6181

if (offset < md->offset_max)

6182

{

6183

save_offset1 = md->offset_vector[offset];

6184

save_offset2 = md->offset_vector[offset+1];

6185

save_offset3 = md->offset_vector[md->offset_end - number];

6186

save_capture_last = md->capture_last;

6187

6188

DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));

6189

md->offset_vector[md->offset_end - number] = eptr - md->start_subject;

6190

6191

6192

{

6193

RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,

6194

match_isgroup);

6195

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

6196

md->capture_last = save_capture_last;

6197

ecode += GET(ecode, 1);

6198

}

6199

while (*ecode == OP_ALT);

6200

6201

DPRINTF(("bracket %d failed\n", number));

6202

6203

md->offset_vector[offset] = save_offset1;

6204

md->offset_vector[offset+1] = save_offset2;

6205

md->offset_vector[md->offset_end - number] = save_offset3;

6206

6207

RRETURN(MATCH_NOMATCH);

6208

}

6209

6210

/* Insufficient room for saving captured contents */

6211

6212

else op = OP_BRA;

6213

}

6214

6215

/* Other types of node can be handled by a switch */

6216

6217

switch(op)

6218

{

6219

case OP_BRA: /* Non-capturing bracket: optimized */

6220

DPRINTF(("start bracket 0\n"));

6221

6222

{

6223

RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,

6224

match_isgroup);

6225

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

6226

ecode += GET(ecode, 1);

6227

}

6228

while (*ecode == OP_ALT);

6229

DPRINTF(("bracket 0 failed\n"));

6230

RRETURN(MATCH_NOMATCH);

6231

6232

/* Conditional group: compilation checked that there are no more than

6233

two branches. If the condition is false, skipping the first branch takes us

6234

past the end if there is only one branch, but that's OK because that is

6235

exactly what going to the ket would do. */

6236

6237

case OP_COND:

6238

if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */

6239

{

6240

offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */

6241

condition = (offset == CREF_RECURSE * 2)?

6242

(md->recursive != NULL) :

6243

(offset < offset_top && md->offset_vector[offset] >= 0);

6244

RMATCH(rrc, eptr, ecode + (condition?

6245

(LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1))),

6246

offset_top, md, ims, eptrb, match_isgroup);

6247

RRETURN(rrc);

6248

}

6249

6250

/* The condition is an assertion. Call match() to evaluate it - setting

6251

the final argument TRUE causes it to stop at the end of an assertion. */

6252

6253

else

6254

{

6255

RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,

6256

match_condassert | match_isgroup);

6257

if (rrc == MATCH_MATCH)

6258

{

6259

ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2);

6260

while (*ecode == OP_ALT) ecode += GET(ecode, 1);

6261

}

6262

else if (rrc != MATCH_NOMATCH)

6263

{

6264

RRETURN(rrc); /* Need braces because of following else */

6265

}

6266

else ecode += GET(ecode, 1);

6267

RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,

6268

match_isgroup);

6269

RRETURN(rrc);

6270

}

6271

/* Control never reaches here */

6272

6273

/* Skip over conditional reference or large extraction number data if

6274

encountered. */

6275

6276

case OP_CREF:

6277

case OP_BRANUMBER:

6278

ecode += 3;

6279

break;

6280

6281

/* End of the pattern. If we are in a recursion, we should restore the

6282

offsets appropriately and continue from after the call. */

6283

6284

case OP_END:

6285

if (md->recursive != NULL && md->recursive->group_num == 0)

6286

{

6287

recursion_info *rec = md->recursive;

6288

DPRINTF(("Hit the end in a (?0) recursion\n"));

6289

md->recursive = rec->prevrec;

6290

memmove(md->offset_vector, rec->offset_save,

6291

rec->saved_max * sizeof(int));

6292

md->start_match = rec->save_start;

6293

ims = original_ims;

6294

ecode = rec->after_call;

6295

break;

6296

}

6297

6298

/* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty

6299

string - backtracking will then try other alternatives, if any. */

6300

6301

if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);

6302

md->end_match_ptr = eptr; /* Record where we ended */

6303

md->end_offset_top = offset_top; /* and how many extracts were taken */

6304

RRETURN(MATCH_MATCH);

6305

6306

/* Change option settings */

6307

6308

case OP_OPT:

6309

ims = ecode[1];

6310

ecode += 2;

6311

DPRINTF(("ims set to %02lx\n", ims));

6312

break;

6313

6314

/* Assertion brackets. Check the alternative branches in turn - the

6315

matching won't pass the KET for an assertion. If any one branch matches,

6316

the assertion is true. Lookbehind assertions have an OP_REVERSE item at the

6317

start of each branch to move the current point backwards, so the code at

6318

this level is identical to the lookahead case. */

6319

6320

case OP_ASSERT:

6321

case OP_ASSERTBACK:

6322

6323

{

6324

RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,

6325

match_isgroup);

6326

if (rrc == MATCH_MATCH) break;

6327

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

6328

ecode += GET(ecode, 1);

6329

}

6330

while (*ecode == OP_ALT);

6331

if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);

6332

6333

/* If checking an assertion for a condition, return MATCH_MATCH. */

6334

6335

if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);

6336

6337

/* Continue from after the assertion, updating the offsets high water

6338

mark, since extracts may have been taken during the assertion. */

6339

6340

do ecode += GET(ecode,1); while (*ecode == OP_ALT);

6341

ecode += 1 + LINK_SIZE;

6342

offset_top = md->end_offset_top;

6343

continue;

6344

6345

/* Negative assertion: all branches must fail to match */

6346

6347

case OP_ASSERT_NOT:

6348

case OP_ASSERTBACK_NOT:

6349

6350

{

6351

RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,

6352

match_isgroup);

6353

if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);

6354

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

6355

ecode += GET(ecode,1);

6356

}

6357

while (*ecode == OP_ALT);

6358

6359

if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);

6360

6361

ecode += 1 + LINK_SIZE;

6362

continue;

6363

6364

/* Move the subject pointer back. This occurs only at the start of

6365

each branch of a lookbehind assertion. If we are too close to the start to

6366

move back, this match function fails. When working with UTF-8 we move

6367

back a number of characters, not bytes. */

6368

6369

case OP_REVERSE:

6370

#ifdef SUPPORT_UTF8

6371

if (md->utf8)

6372

{

6373

c = GET(ecode,1);

6374

for (i = 0; i < c; i++)

6375

{

6376

eptr--;

6377

if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);

6378

BACKCHAR(eptr)

6379

}

6380

}

6381

else

6382

#endif

6383

6384

/* No UTF-8 support, or not in UTF-8 mode: count is byte count */

6385

6386

{

6387

eptr -= GET(ecode,1);

6388

if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);

6389

}

6390

6391

/* Skip to next op code */

6392

6393

ecode += 1 + LINK_SIZE;

6394

break;

6395

6396

/* The callout item calls an external function, if one is provided, passing

6397

details of the match so far. This is mainly for debugging, though the

6398

function is able to force a failure. */

6399

6400

case OP_CALLOUT:

6401

if (pcre_callout != NULL)

6402

{

6403

pcre_callout_block cb;

6404

cb.version = 1; /* Version 1 of the callout block */

6405

cb.callout_number = ecode[1];

6406

cb.offset_vector = md->offset_vector;

6407

cb.subject = (const char *)md->start_subject;

6408

cb.subject_length = md->end_subject - md->start_subject;

6409

cb.start_match = md->start_match - md->start_subject;

6410

cb.current_position = eptr - md->start_subject;

6411

cb.pattern_position = GET(ecode, 2);

6412

cb.next_item_length = GET(ecode, 2 + LINK_SIZE);

6413

cb.capture_top = offset_top/2;

6414

cb.capture_last = md->capture_last;

6415

cb.callout_data = md->callout_data;

6416

if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);

6417

if (rrc < 0) RRETURN(rrc);

6418

}

6419

ecode += 2 + 2*LINK_SIZE;

6420

break;

6421

6422

/* Recursion either matches the current regex, or some subexpression. The

6423

offset data is the offset to the starting bracket from the start of the

6424

whole pattern. (This is so that it works from duplicated subpatterns.)

6425

6426

If there are any capturing brackets started but not finished, we have to

6427

save their starting points and reinstate them after the recursion. However,

6428

we don't know how many such there are (offset_top records the completed

6429

total) so we just have to save all the potential data. There may be up to

6430

65535 such values, which is too large to put on the stack, but using malloc

6431

for small numbers seems expensive. As a compromise, the stack is used when

6432

there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc

6433

is used. A problem is what to do if the malloc fails ... there is no way of

6434

returning to the top level with an error. Save the top REC_STACK_SAVE_MAX

6435

values on the stack, and accept that the rest may be wrong.

6436

6437

There are also other values that have to be saved. We use a chained

6438

sequence of blocks that actually live on the stack. Thanks to Robin Houston

6439

for the original version of this logic. */

6440

6441

case OP_RECURSE:

6442

{

6443

callpat = md->start_code + GET(ecode, 1);

6444

new_recursive.group_num = *callpat - OP_BRA;

6445

6446

/* For extended extraction brackets (large number), we have to fish out

6447

the number from a dummy opcode at the start. */

6448

6449

if (new_recursive.group_num > EXTRACT_BASIC_MAX)

6450

new_recursive.group_num = GET2(callpat, 2+LINK_SIZE);

6451

6452

/* Add to "recursing stack" */

6453

6454

new_recursive.prevrec = md->recursive;

6455

md->recursive = &new_recursive;

6456

6457

/* Find where to continue from afterwards */

6458

6459

ecode += 1 + LINK_SIZE;

6460

new_recursive.after_call = ecode;

6461

6462

/* Now save the offset data. */

6463

6464

new_recursive.saved_max = md->offset_end;

6465

if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)

6466

new_recursive.offset_save = stacksave;

6467

else

6468

{

6469

new_recursive.offset_save =

6470

(int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));

6471

if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);

6472

}

6473

6474

memcpy(new_recursive.offset_save, md->offset_vector,

6475

new_recursive.saved_max * sizeof(int));

6476

new_recursive.save_start = md->start_match;

6477

md->start_match = eptr;

6478

6479

/* OK, now we can do the recursion. For each top-level alternative we

6480

restore the offset and recursion data. */

6481

6482

DPRINTF(("Recursing into group %d\n", new_recursive.group_num));

6483

6484

{

6485

RMATCH(rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims,

6486

eptrb, match_isgroup);

6487

if (rrc == MATCH_MATCH)

6488

{

6489

md->recursive = new_recursive.prevrec;

6490

if (new_recursive.offset_save != stacksave)

6491

(pcre_free)(new_recursive.offset_save);

6492

RRETURN(MATCH_MATCH);

6493

}

6494

else if (rrc != MATCH_NOMATCH) RRETURN(rrc);

6495

6496

md->recursive = &new_recursive;

6497

memcpy(md->offset_vector, new_recursive.offset_save,

6498

new_recursive.saved_max * sizeof(int));

6499

callpat += GET(callpat, 1);

6500

}

6501

while (*callpat == OP_ALT);

6502

6503

DPRINTF(("Recursion didn't match\n"));

6504

md->recursive = new_recursive.prevrec;

6505

if (new_recursive.offset_save != stacksave)

6506

(pcre_free)(new_recursive.offset_save);

6507

RRETURN(MATCH_NOMATCH);

6508

}

6509

/* Control never reaches here */

6510

6511

/* "Once" brackets are like assertion brackets except that after a match,

6512

the point in the subject string is not moved back. Thus there can never be

6513

a move back into the brackets. Friedl calls these "atomic" subpatterns.

6514

Check the alternative branches in turn - the matching won't pass the KET

6515

for this kind of subpattern. If any one branch matches, we carry on as at

6516

the end of a normal bracket, leaving the subject pointer. */

6517

6518

case OP_ONCE:

6519

{

6520

prev = ecode;

6521

saved_eptr = eptr;

6522

6523

6524

{

6525

RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,

6526

eptrb, match_isgroup);

6527

if (rrc == MATCH_MATCH) break;

6528

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

6529

ecode += GET(ecode,1);

6530

}

6531

while (*ecode == OP_ALT);

6532

6533

/* If hit the end of the group (which could be repeated), fail */

6534

6535

if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);

6536

6537

/* Continue as from after the assertion, updating the offsets high water

6538

mark, since extracts may have been taken. */

6539

6540

do ecode += GET(ecode,1); while (*ecode == OP_ALT);

6541

6542

offset_top = md->end_offset_top;

6543

eptr = md->end_match_ptr;

6544

6545

/* For a non-repeating ket, just continue at this level. This also

6546

happens for a repeating ket if no characters were matched in the group.

6547

This is the forcible breaking of infinite loops as implemented in Perl

6548

5.005. If there is an options reset, it will get obeyed in the normal

6549

course of events. */

6550

6551

if (*ecode == OP_KET || eptr == saved_eptr)

6552

{

6553

ecode += 1+LINK_SIZE;

6554

break;

6555

}

6556

6557

/* The repeating kets try the rest of the pattern or restart from the

6558

preceding bracket, in the appropriate order. We need to reset any options

6559

that changed within the bracket before re-running it, so check the next

6560

opcode. */

6561

6562

if (ecode[1+LINK_SIZE] == OP_OPT)

6563

{

6564

ims = (ims & ~PCRE_IMS) | ecode[4];

6565

DPRINTF(("ims set to %02lx at group repeat\n", ims));

6566

}

6567

6568

if (*ecode == OP_KETRMIN)

6569

{

6570

RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);

6571

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

6572

RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);

6573

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

6574

}

6575

else /* OP_KETRMAX */

6576

{

6577

RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);

6578

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

6579

RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);

6580

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

6581

}

6582

}

6583

RRETURN(MATCH_NOMATCH);

6584

6585

/* An alternation is the end of a branch; scan along to find the end of the

6586

bracketed group and go to there. */

6587

6588

case OP_ALT:

6589

do ecode += GET(ecode,1); while (*ecode == OP_ALT);

6590

break;

6591

6592

/* BRAZERO and BRAMINZERO occur just before a bracket group, indicating

6593

that it may occur zero times. It may repeat infinitely, or not at all -

6594

i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper

6595

repeat limits are compiled as a number of copies, with the optional ones

6596

preceded by BRAZERO or BRAMINZERO. */

6597

6598

case OP_BRAZERO:

6599

{

6600

next = ecode+1;

6601

RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup);

6602

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

6603

do next += GET(next,1); while (*next == OP_ALT);

6604

ecode = next + 1+LINK_SIZE;

6605

}

6606

break;

6607

6608

case OP_BRAMINZERO:

6609

{

6610

next = ecode+1;

6611

do next += GET(next,1); while (*next == OP_ALT);

6612

RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb,

6613

match_isgroup);

6614

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

6615

ecode++;

6616

}

6617

break;

6618

6619

/* End of a group, repeated or non-repeating. If we are at the end of

6620

an assertion "group", stop matching and return MATCH_MATCH, but record the

6621

current high water mark for use by positive assertions. Do this also

6622

for the "once" (not-backup up) groups. */

6623

6624

case OP_KET:

6625

case OP_KETRMIN:

6626

case OP_KETRMAX:

6627

{

6628

prev = ecode - GET(ecode, 1);

6629

saved_eptr = eptrb->epb_saved_eptr;

6630

6631

/* Back up the stack of bracket start pointers. */

6632

6633

eptrb = eptrb->epb_prev;

6634

6635

if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||

6636

*prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||

6637

*prev == OP_ONCE)

6638

{

6639

md->end_match_ptr = eptr; /* For ONCE */

6640

md->end_offset_top = offset_top;

6641

RRETURN(MATCH_MATCH);

6642

}

6643

6644

/* In all other cases except a conditional group we have to check the

6645

group number back at the start and if necessary complete handling an

6646

extraction by setting the offsets and bumping the high water mark. */

6647

6648

if (*prev != OP_COND)

6649

{

6650

number = *prev - OP_BRA;

6651

6652

/* For extended extraction brackets (large number), we have to fish out

6653

the number from a dummy opcode at the start. */

6654

6655

if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);

6656

offset = number << 1;

6657

6658

#ifdef DEBUG

6659

printf("end bracket %d", number);

6660

printf("\n");

6661

#endif

6662

6663

/* Test for a numbered group. This includes groups called as a result

6664

of recursion. Note that whole-pattern recursion is coded as a recurse

6665

into group 0, so it won't be picked up here. Instead, we catch it when

6666

the OP_END is reached. */

6667

6668

if (number > 0)

6669

{

6670

md->capture_last = number;

6671

if (offset >= md->offset_max) md->offset_overflow = TRUE; else

6672

{

6673

md->offset_vector[offset] =

6674

md->offset_vector[md->offset_end - number];

6675

md->offset_vector[offset+1] = eptr - md->start_subject;

6676

if (offset_top <= offset) offset_top = offset + 2;

6677

}

6678

6679

/* Handle a recursively called group. Restore the offsets

6680

appropriately and continue from after the call. */

6681

6682

if (md->recursive != NULL && md->recursive->group_num == number)

6683

{

6684

recursion_info *rec = md->recursive;

6685

DPRINTF(("Recursion (%d) succeeded - continuing\n", number));

6686

md->recursive = rec->prevrec;

6687

md->start_match = rec->save_start;

6688

memcpy(md->offset_vector, rec->offset_save,

6689

rec->saved_max * sizeof(int));

6690

ecode = rec->after_call;

6691

ims = original_ims;

6692

break;

6693

}

6694

}

6695

}

6696

6697

/* Reset the value of the ims flags, in case they got changed during

6698

the group. */

6699

6700

ims = original_ims;

6701

DPRINTF(("ims reset to %02lx\n", ims));

6702

6703

/* For a non-repeating ket, just continue at this level. This also

6704

happens for a repeating ket if no characters were matched in the group.

6705

This is the forcible breaking of infinite loops as implemented in Perl

6706

5.005. If there is an options reset, it will get obeyed in the normal

6707

course of events. */

6708

6709

if (*ecode == OP_KET || eptr == saved_eptr)

6710

{

6711

ecode += 1 + LINK_SIZE;

6712

break;

6713

}

6714

6715

/* The repeating kets try the rest of the pattern or restart from the

6716

preceding bracket, in the appropriate order. */

6717

6718

if (*ecode == OP_KETRMIN)

6719

{

6720

RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);

6721

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

6722

RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);

6723

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

6724

}

6725

else /* OP_KETRMAX */

6726

{

6727

RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);

6728

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

6729

RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);

6730

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

6731

}

6732

}

6733

6734

RRETURN(MATCH_NOMATCH);

6735

6736

/* Start of subject unless notbol, or after internal newline if multiline */

6737

6738

case OP_CIRC:

6739

if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);

6740

if ((ims & PCRE_MULTILINE) != 0)

6741

{

6742

if (eptr != md->start_subject && eptr[-1] != NEWLINE)

6743

RRETURN(MATCH_NOMATCH);

6744

ecode++;

6745

break;

6746

}

6747

/* ... else fall through */

6748

6749

/* Start of subject assertion */

6750

6751

case OP_SOD:

6752

if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);

6753

ecode++;

6754

break;

6755

6756

/* Start of match assertion */

6757

6758

case OP_SOM:

6759

if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);

6760

ecode++;

6761

break;

6762

6763

/* Assert before internal newline if multiline, or before a terminating

6764

newline unless endonly is set, else end of subject unless noteol is set. */

6765

6766

case OP_DOLL:

6767

if ((ims & PCRE_MULTILINE) != 0)

6768

{

6769

if (eptr < md->end_subject)

6770

{ if (*eptr != NEWLINE) RRETURN(MATCH_NOMATCH); }

6771

else

6772

{ if (md->noteol) RRETURN(MATCH_NOMATCH); }

6773

ecode++;

6774

break;

6775

}

6776

else

6777

{

6778

if (md->noteol) RRETURN(MATCH_NOMATCH);

6779

if (!md->endonly)

6780

{

6781

if (eptr < md->end_subject - 1 ||

6782

(eptr == md->end_subject - 1 && *eptr != NEWLINE))

6783

RRETURN(MATCH_NOMATCH);

6784

ecode++;

6785

break;

6786

}

6787

}

6788

/* ... else fall through */

6789

6790

/* End of subject assertion (\z) */

6791

6792

case OP_EOD:

6793

if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);

6794

ecode++;

6795

break;

6796

6797

/* End of subject or ending \n assertion (\Z) */

6798

6799

case OP_EODN:

6800

if (eptr < md->end_subject - 1 ||

6801

(eptr == md->end_subject - 1 && *eptr != NEWLINE)) RRETURN(MATCH_NOMATCH);

6802

ecode++;

6803

break;

6804

6805

/* Word boundary assertions */

6806

6807

case OP_NOT_WORD_BOUNDARY:

6808

case OP_WORD_BOUNDARY:

6809

{

6810

6811

/* Find out if the previous and current characters are "word" characters.

6812

It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to

6813

be "non-word" characters. */

6814

6815

#ifdef SUPPORT_UTF8

6816

if (md->utf8)

6817

{

6818

if (eptr == md->start_subject) prev_is_word = FALSE; else

6819

{

6820

const uschar *lastptr = eptr - 1;

6821

while((*lastptr & 0xc0) == 0x80) lastptr--;

6822

GETCHAR(c, lastptr);

6823

prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;

6824

}

6825

if (eptr >= md->end_subject) cur_is_word = FALSE; else

6826

{

6827

GETCHAR(c, eptr);

6828

cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;

6829

}

6830

}

6831

else

6832

#endif

6833

6834

/* More streamlined when not in UTF-8 mode */

6835

6836

{

6837

prev_is_word = (eptr != md->start_subject) &&

6838

((md->ctypes[eptr[-1]] & ctype_word) != 0);

6839

cur_is_word = (eptr < md->end_subject) &&

6840

((md->ctypes[*eptr] & ctype_word) != 0);

6841

}

6842

6843

/* Now see if the situation is what we want */

6844

6845

if ((*ecode++ == OP_WORD_BOUNDARY)?

6846

cur_is_word == prev_is_word : cur_is_word != prev_is_word)

6847

RRETURN(MATCH_NOMATCH);

6848

}

6849

break;

6850

6851

/* Match a single character type; inline for speed */

6852

6853

case OP_ANY:

6854

if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE)

6855

RRETURN(MATCH_NOMATCH);

6856

if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);

6857

#ifdef SUPPORT_UTF8

6858

if (md->utf8)

6859

while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;

6860

#endif

6861

ecode++;

6862

break;

6863

6864

/* Match a single byte, even in UTF-8 mode. This opcode really does match

6865

any byte, even newline, independent of the setting of PCRE_DOTALL. */

6866

6867

case OP_ANYBYTE:

6868

if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);

6869

ecode++;

6870

break;

6871

6872

case OP_NOT_DIGIT:

6873

if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

6874

GETCHARINCTEST(c, eptr);

6875

if (

6876

#ifdef SUPPORT_UTF8

6877

c < 256 &&

6878

#endif

6879

(md->ctypes[c] & ctype_digit) != 0

6880

)

6881

RRETURN(MATCH_NOMATCH);

6882

ecode++;

6883

break;

6884

6885

case OP_DIGIT:

6886

if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

6887

GETCHARINCTEST(c, eptr);

6888

if (

6889

#ifdef SUPPORT_UTF8

6890

c >= 256 ||

6891

#endif

6892

(md->ctypes[c] & ctype_digit) == 0

6893

)

6894

RRETURN(MATCH_NOMATCH);

6895

ecode++;

6896

break;

6897

6898

case OP_NOT_WHITESPACE:

6899

if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

6900

GETCHARINCTEST(c, eptr);

6901

if (

6902

#ifdef SUPPORT_UTF8

6903

c < 256 &&

6904

#endif

6905

(md->ctypes[c] & ctype_space) != 0

6906

)

6907

RRETURN(MATCH_NOMATCH);

6908

ecode++;

6909

break;

6910

6911

case OP_WHITESPACE:

6912

if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

6913

GETCHARINCTEST(c, eptr);

6914

if (

6915

#ifdef SUPPORT_UTF8

6916

c >= 256 ||

6917

#endif

6918

(md->ctypes[c] & ctype_space) == 0

6919

)

6920

RRETURN(MATCH_NOMATCH);

6921

ecode++;

6922

break;

6923

6924

case OP_NOT_WORDCHAR:

6925

if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

6926

GETCHARINCTEST(c, eptr);

6927

if (

6928

#ifdef SUPPORT_UTF8

6929

c < 256 &&

6930

#endif

6931

(md->ctypes[c] & ctype_word) != 0

6932

)

6933

RRETURN(MATCH_NOMATCH);

6934

ecode++;

6935

break;

6936

6937

case OP_WORDCHAR:

6938

if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

6939

GETCHARINCTEST(c, eptr);

6940

if (

6941

#ifdef SUPPORT_UTF8

6942

c >= 256 ||

6943

#endif

6944

(md->ctypes[c] & ctype_word) == 0

6945

)

6946

RRETURN(MATCH_NOMATCH);

6947

ecode++;

6948

break;

6949

6950

#ifdef SUPPORT_UCP

6951

/* Check the next character by Unicode property. We will get here only

6952

if the support is in the binary; otherwise a compile-time error occurs. */

6953

6954

case OP_PROP:

6955

case OP_NOTPROP:

6956

if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

6957

GETCHARINCTEST(c, eptr);

6958

{

6959

int chartype, rqdtype;

6960

int othercase;

6961

int category = ucp_findchar(c, &chartype, &othercase);

6962

6963

rqdtype = *(++ecode);

6964

ecode++;

6965

6966

if (rqdtype >= 128)

6967

{

6968

if ((rqdtype - 128 != category) == (op == OP_PROP))

6969

RRETURN(MATCH_NOMATCH);

6970

}

6971

else

6972

{

6973

if ((rqdtype != chartype) == (op == OP_PROP))

6974

RRETURN(MATCH_NOMATCH);

6975

}

6976

}

6977

break;

6978

6979

/* Match an extended Unicode sequence. We will get here only if the support

6980

is in the binary; otherwise a compile-time error occurs. */

6981

6982

case OP_EXTUNI:

6983

if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

6984

GETCHARINCTEST(c, eptr);

6985

{

6986

int chartype;

6987

int othercase;

6988

int category = ucp_findchar(c, &chartype, &othercase);

6989

if (category == ucp_M) RRETURN(MATCH_NOMATCH);

6990

while (eptr < md->end_subject)

6991

{

6992

int len = 1;

6993

if (!md->utf8) c = *eptr; else

6994

{

6995

GETCHARLEN(c, eptr, len);

6996

}

6997

category = ucp_findchar(c, &chartype, &othercase);

6998

if (category != ucp_M) break;

6999

eptr += len;

7000

}

7001

}

7002

ecode++;

7003

break;

7004

#endif

7005

7006

7007

/* Match a back reference, possibly repeatedly. Look past the end of the

7008

item to see if there is repeat information following. The code is similar

7009

to that for character classes, but repeated for efficiency. Then obey

7010

similar code to character type repeats - written out again for speed.

7011

However, if the referenced string is the empty string, always treat

7012

it as matched, any number of times (otherwise there could be infinite

7013

loops). */

7014

7015

case OP_REF:

7016

{

7017

offset = GET2(ecode, 1) << 1; /* Doubled ref number */

7018

ecode += 3; /* Advance past item */

7019

7020

/* If the reference is unset, set the length to be longer than the amount

7021

of subject left; this ensures that every attempt at a match fails. We

7022

can't just fail here, because of the possibility of quantifiers with zero

7023

minima. */

7024

7025

length = (offset >= offset_top || md->offset_vector[offset] < 0)?

7026

md->end_subject - eptr + 1 :

7027

md->offset_vector[offset+1] - md->offset_vector[offset];

7028

7029

/* Set up for repetition, or handle the non-repeated case */

7030

7031

switch (*ecode)

7032

{

7033

case OP_CRSTAR:

7034

case OP_CRMINSTAR:

7035

case OP_CRPLUS:

7036

case OP_CRMINPLUS:

7037

case OP_CRQUERY:

7038

case OP_CRMINQUERY:

7039

c = *ecode++ - OP_CRSTAR;

7040

minimize = (c & 1) != 0;

7041

min = rep_min[c]; /* Pick up values from tables; */

7042

max = rep_max[c]; /* zero for max => infinity */

7043

if (max == 0) max = INT_MAX;

7044

break;

7045

7046

case OP_CRRANGE:

7047

case OP_CRMINRANGE:

7048

minimize = (*ecode == OP_CRMINRANGE);

7049

min = GET2(ecode, 1);

7050

max = GET2(ecode, 3);

7051

if (max == 0) max = INT_MAX;

7052

ecode += 5;

7053

break;

7054

7055

default: /* No repeat follows */

7056

if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);

7057

eptr += length;

7058

continue; /* With the main loop */

7059

}

7060

7061

/* If the length of the reference is zero, just continue with the

7062

main loop. */

7063

7064

if (length == 0) continue;

7065

7066

/* First, ensure the minimum number of matches are present. We get back

7067

the length of the reference string explicitly rather than passing the

7068

address of eptr, so that eptr can be a register variable. */

7069

7070

for (i = 1; i <= min; i++)

7071

{

7072

if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);

7073

eptr += length;

7074

}

7075

7076

/* If min = max, continue at the same level without recursion.

7077

They are not both allowed to be zero. */

7078

7079

if (min == max) continue;

7080

7081

/* If minimizing, keep trying and advancing the pointer */

7082

7083

if (minimize)

7084

{

7085

for (fi = min;; fi++)

7086

{

7087

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7088

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7089

if (fi >= max || !match_ref(offset, eptr, length, md, ims))

7090

RRETURN(MATCH_NOMATCH);

7091

eptr += length;

7092

}

7093

/* Control never gets here */

7094

}

7095

7096

/* If maximizing, find the longest string and work backwards */

7097

7098

else

7099

{

7100

pp = eptr;

7101

for (i = min; i < max; i++)

7102

{

7103

if (!match_ref(offset, eptr, length, md, ims)) break;

7104

eptr += length;

7105

}

7106

while (eptr >= pp)

7107

{

7108

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7109

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7110

eptr -= length;

7111

}

7112

RRETURN(MATCH_NOMATCH);

7113

}

7114

}

7115

/* Control never gets here */

7116

7117

7118

7119

/* Match a bit-mapped character class, possibly repeatedly. This op code is

7120

used when all the characters in the class have values in the range 0-255,

7121

and either the matching is caseful, or the characters are in the range

7122

0-127 when UTF-8 processing is enabled. The only difference between

7123

OP_CLASS and OP_NCLASS occurs when a data character outside the range is

7124

encountered.

7125

7126

First, look past the end of the item to see if there is repeat information

7127

following. Then obey similar code to character type repeats - written out

7128

again for speed. */

7129

7130

case OP_NCLASS:

7131

case OP_CLASS:

7132

{

7133

data = ecode + 1; /* Save for matching */

7134

ecode += 33; /* Advance past the item */

7135

7136

switch (*ecode)

7137

{

7138

case OP_CRSTAR:

7139

case OP_CRMINSTAR:

7140

case OP_CRPLUS:

7141

case OP_CRMINPLUS:

7142

case OP_CRQUERY:

7143

case OP_CRMINQUERY:

7144

c = *ecode++ - OP_CRSTAR;

7145

minimize = (c & 1) != 0;

7146

min = rep_min[c]; /* Pick up values from tables; */

7147

max = rep_max[c]; /* zero for max => infinity */

7148

if (max == 0) max = INT_MAX;

7149

break;

7150

7151

case OP_CRRANGE:

7152

case OP_CRMINRANGE:

7153

minimize = (*ecode == OP_CRMINRANGE);

7154

min = GET2(ecode, 1);

7155

max = GET2(ecode, 3);

7156

if (max == 0) max = INT_MAX;

7157

ecode += 5;

7158

break;

7159

7160

default: /* No repeat follows */

7161

min = max = 1;

7162

break;

7163

}

7164

7165

/* First, ensure the minimum number of matches are present. */

7166

7167

#ifdef SUPPORT_UTF8

7168

/* UTF-8 mode */

7169

if (md->utf8)

7170

{

7171

for (i = 1; i <= min; i++)

7172

{

7173

if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

7174

GETCHARINC(c, eptr);

7175

if (c > 255)

7176

{

7177

if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);

7178

}

7179

else

7180

{

7181

if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);

7182

}

7183

}

7184

}

7185

else

7186

#endif

7187

/* Not UTF-8 mode */

7188

{

7189

for (i = 1; i <= min; i++)

7190

{

7191

if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

7192

c = *eptr++;

7193

if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);

7194

}

7195

}

7196

7197

/* If max == min we can continue with the main loop without the

7198

need to recurse. */

7199

7200

if (min == max) continue;

7201

7202

/* If minimizing, keep testing the rest of the expression and advancing

7203

the pointer while it matches the class. */

7204

7205

if (minimize)

7206

{

7207

#ifdef SUPPORT_UTF8

7208

/* UTF-8 mode */

7209

if (md->utf8)

7210

{

7211

for (fi = min;; fi++)

7212

{

7213

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7214

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7215

if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

7216

GETCHARINC(c, eptr);

7217

if (c > 255)

7218

{

7219

if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);

7220

}

7221

else

7222

{

7223

if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);

7224

}

7225

}

7226

}

7227

else

7228

#endif

7229

/* Not UTF-8 mode */

7230

{

7231

for (fi = min;; fi++)

7232

{

7233

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7234

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7235

if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

7236

c = *eptr++;

7237

if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);

7238

}

7239

}

7240

/* Control never gets here */

7241

}

7242

7243

/* If maximizing, find the longest possible run, then work backwards. */

7244

7245

else

7246

{

7247

pp = eptr;

7248

7249

#ifdef SUPPORT_UTF8

7250

/* UTF-8 mode */

7251

if (md->utf8)

7252

{

7253

for (i = min; i < max; i++)

7254

{

7255

int len = 1;

7256

if (eptr >= md->end_subject) break;

7257

GETCHARLEN(c, eptr, len);

7258

if (c > 255)

7259

{

7260

if (op == OP_CLASS) break;

7261

}

7262

else

7263

{

7264

if ((data[c/8] & (1 << (c&7))) == 0) break;

7265

}

7266

eptr += len;

7267

}

7268

for (;;)

7269

{

7270

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7271

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7272

if (eptr-- == pp) break; /* Stop if tried at original pos */

7273

BACKCHAR(eptr);

7274

}

7275

}

7276

else

7277

#endif

7278

/* Not UTF-8 mode */

7279

{

7280

for (i = min; i < max; i++)

7281

{

7282

if (eptr >= md->end_subject) break;

7283

c = *eptr;

7284

if ((data[c/8] & (1 << (c&7))) == 0) break;

7285

eptr++;

7286

}

7287

while (eptr >= pp)

7288

{

7289

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7290

eptr--;

7291

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7292

}

7293

}

7294

7295

RRETURN(MATCH_NOMATCH);

7296

}

7297

}

7298

/* Control never gets here */

7299

7300

7301

/* Match an extended character class. This opcode is encountered only

7302

in UTF-8 mode, because that's the only time it is compiled. */

7303

7304

#ifdef SUPPORT_UTF8

7305

case OP_XCLASS:

7306

{

7307

data = ecode + 1 + LINK_SIZE; /* Save for matching */

7308

ecode += GET(ecode, 1); /* Advance past the item */

7309

7310

switch (*ecode)

7311

{

7312

case OP_CRSTAR:

7313

case OP_CRMINSTAR:

7314

case OP_CRPLUS:

7315

case OP_CRMINPLUS:

7316

case OP_CRQUERY:

7317

case OP_CRMINQUERY:

7318

c = *ecode++ - OP_CRSTAR;

7319

minimize = (c & 1) != 0;

7320

min = rep_min[c]; /* Pick up values from tables; */

7321

max = rep_max[c]; /* zero for max => infinity */

7322

if (max == 0) max = INT_MAX;

7323

break;

7324

7325

case OP_CRRANGE:

7326

case OP_CRMINRANGE:

7327

minimize = (*ecode == OP_CRMINRANGE);

7328

min = GET2(ecode, 1);

7329

max = GET2(ecode, 3);

7330

if (max == 0) max = INT_MAX;

7331

ecode += 5;

7332

break;

7333

7334

default: /* No repeat follows */

7335

min = max = 1;

7336

break;

7337

}

7338

7339

/* First, ensure the minimum number of matches are present. */

7340

7341

for (i = 1; i <= min; i++)

7342

{

7343

if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

7344

GETCHARINC(c, eptr);

7345

if (!match_xclass(c, data)) RRETURN(MATCH_NOMATCH);

7346

}

7347

7348

/* If max == min we can continue with the main loop without the

7349

need to recurse. */

7350

7351

if (min == max) continue;

7352

7353

/* If minimizing, keep testing the rest of the expression and advancing

7354

the pointer while it matches the class. */

7355

7356

if (minimize)

7357

{

7358

for (fi = min;; fi++)

7359

{

7360

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7361

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7362

if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

7363

GETCHARINC(c, eptr);

7364

if (!match_xclass(c, data)) RRETURN(MATCH_NOMATCH);

7365

}

7366

/* Control never gets here */

7367

}

7368

7369

/* If maximizing, find the longest possible run, then work backwards. */

7370

7371

else

7372

{

7373

pp = eptr;

7374

for (i = min; i < max; i++)

7375

{

7376

int len = 1;

7377

if (eptr >= md->end_subject) break;

7378

GETCHARLEN(c, eptr, len);

7379

if (!match_xclass(c, data)) break;

7380

eptr += len;

7381

}

7382

for(;;)

7383

{

7384

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7385

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7386

if (eptr-- == pp) break; /* Stop if tried at original pos */

7387

BACKCHAR(eptr)

7388

}

7389

RRETURN(MATCH_NOMATCH);

7390

}

7391

7392

/* Control never gets here */

7393

}

7394

#endif /* End of XCLASS */

7395

7396

/* Match a single character, casefully */

7397

7398

case OP_CHAR:

7399

#ifdef SUPPORT_UTF8

7400

if (md->utf8)

7401

{

7402

length = 1;

7403

ecode++;

7404

GETCHARLEN(fc, ecode, length);

7405

if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);

7406

while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);

7407

}

7408

else

7409

#endif

7410

7411

/* Non-UTF-8 mode */

7412

{

7413

if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);

7414

if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);

7415

ecode += 2;

7416

}

7417

break;

7418

7419

/* Match a single character, caselessly */

7420

7421

case OP_CHARNC:

7422

#ifdef SUPPORT_UTF8

7423

if (md->utf8)

7424

{

7425

length = 1;

7426

ecode++;

7427

GETCHARLEN(fc, ecode, length);

7428

7429

if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);

7430

7431

/* If the pattern character's value is < 128, we have only one byte, and

7432

can use the fast lookup table. */

7433

7434

if (fc < 128)

7435

{

7436

if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);

7437

}

7438

7439

/* Otherwise we must pick up the subject character */

7440

7441

else

7442

{

7443

int dc;

7444

GETCHARINC(dc, eptr);

7445

ecode += length;

7446

7447

/* If we have Unicode property support, we can use it to test the other

7448

case of the character, if there is one. The result of ucp_findchar() is

7449

< 0 if the char isn't found, and othercase is returned as zero if there

7450

isn't one. */

7451

7452

if (fc != dc)

7453

{

7454

#ifdef SUPPORT_UCP

7455

int chartype;

7456

int othercase;

7457

if (ucp_findchar(fc, &chartype, &othercase) < 0 || dc != othercase)

7458

#endif

7459

RRETURN(MATCH_NOMATCH);

7460

}

7461

}

7462

}

7463

else

7464

#endif /* SUPPORT_UTF8 */

7465

7466

/* Non-UTF-8 mode */

7467

{

7468

if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);

7469

if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);

7470

ecode += 2;

7471

}

7472

break;

7473

7474

/* Match a single character repeatedly; different opcodes share code. */

7475

7476

case OP_EXACT:

7477

min = max = GET2(ecode, 1);

7478

ecode += 3;

7479

goto REPEATCHAR;

7480

7481

case OP_UPTO:

7482

case OP_MINUPTO:

7483

min = 0;

7484

max = GET2(ecode, 1);

7485

minimize = *ecode == OP_MINUPTO;

7486

ecode += 3;

7487

goto REPEATCHAR;

7488

7489

case OP_STAR:

7490

case OP_MINSTAR:

7491

case OP_PLUS:

7492

case OP_MINPLUS:

7493

case OP_QUERY:

7494

case OP_MINQUERY:

7495

c = *ecode++ - OP_STAR;

7496

minimize = (c & 1) != 0;

7497

min = rep_min[c]; /* Pick up values from tables; */

7498

max = rep_max[c]; /* zero for max => infinity */

7499

if (max == 0) max = INT_MAX;

7500

7501

/* Common code for all repeated single-character matches. We can give

7502

up quickly if there are fewer than the minimum number of characters left in

7503

the subject. */

7504

7505

REPEATCHAR:

7506

#ifdef SUPPORT_UTF8

7507

if (md->utf8)

7508

{

7509

length = 1;

7510

charptr = ecode;

7511

GETCHARLEN(fc, ecode, length);

7512

if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);

7513

ecode += length;

7514

7515

/* Handle multibyte character matching specially here. There is

7516

support for caseless matching if UCP support is present. */

7517

7518

if (length > 1)

7519

{

7520

int oclength = 0;

7521

uschar occhars[8];

7522

7523

#ifdef SUPPORT_UCP

7524

int othercase;

7525

int chartype;

7526

if ((ims & PCRE_CASELESS) != 0 &&

7527

ucp_findchar(fc, &chartype, &othercase) >= 0 &&

7528

othercase > 0)

7529

oclength = ord2utf8(othercase, occhars);

7530

#endif /* SUPPORT_UCP */

7531

7532

for (i = 1; i <= min; i++)

7533

{

7534

if (memcmp(eptr, charptr, length) == 0) eptr += length;

7535

/* Need braces because of following else */

7536

else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }

7537

else

7538

{

7539

if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);

7540

eptr += oclength;

7541

}

7542

}

7543

7544

if (min == max) continue;

7545

7546

if (minimize)

7547

{

7548

for (fi = min;; fi++)

7549

{

7550

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7551

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7552

if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

7553

if (memcmp(eptr, charptr, length) == 0) eptr += length;

7554

/* Need braces because of following else */

7555

else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }

7556

else

7557

{

7558

if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);

7559

eptr += oclength;

7560

}

7561

}

7562

/* Control never gets here */

7563

}

7564

else

7565

{

7566

pp = eptr;

7567

for (i = min; i < max; i++)

7568

{

7569

if (eptr > md->end_subject - length) break;

7570

if (memcmp(eptr, charptr, length) == 0) eptr += length;

7571

else if (oclength == 0) break;

7572

else

7573

{

7574

if (memcmp(eptr, occhars, oclength) != 0) break;

7575

eptr += oclength;

7576

}

7577

}

7578

while (eptr >= pp)

7579

{

7580

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7581

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7582

eptr -= length;

7583

}

7584

RRETURN(MATCH_NOMATCH);

7585

}

7586

/* Control never gets here */

7587

}

7588

7589

/* If the length of a UTF-8 character is 1, we fall through here, and

7590

obey the code as for non-UTF-8 characters below, though in this case the

7591

value of fc will always be < 128. */

7592

}

7593

else

7594

#endif /* SUPPORT_UTF8 */

7595

7596

/* When not in UTF-8 mode, load a single-byte character. */

7597

{

7598

if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);

7599

fc = *ecode++;

7600

}

7601

7602

/* The value of fc at this point is always less than 256, though we may or

7603

may not be in UTF-8 mode. The code is duplicated for the caseless and

7604

caseful cases, for speed, since matching characters is likely to be quite

7605

common. First, ensure the minimum number of matches are present. If min =

7606

max, continue at the same level without recursing. Otherwise, if

7607

minimizing, keep trying the rest of the expression and advancing one

7608

matching character if failing, up to the maximum. Alternatively, if

7609

maximizing, find the maximum number of characters and work backwards. */

7610

7611

DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,

7612

max, eptr));

7613

7614

if ((ims & PCRE_CASELESS) != 0)

7615

{

7616

fc = md->lcc[fc];

7617

for (i = 1; i <= min; i++)

7618

if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);

7619

if (min == max) continue;

7620

if (minimize)

7621

{

7622

for (fi = min;; fi++)

7623

{

7624

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7625

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7626

if (fi >= max || eptr >= md->end_subject ||

7627

fc != md->lcc[*eptr++])

7628

RRETURN(MATCH_NOMATCH);

7629

}

7630

/* Control never gets here */

7631

}

7632

else

7633

{

7634

pp = eptr;

7635

for (i = min; i < max; i++)

7636

{

7637

if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;

7638

eptr++;

7639

}

7640

while (eptr >= pp)

7641

{

7642

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7643

eptr--;

7644

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7645

}

7646

RRETURN(MATCH_NOMATCH);

7647

}

7648

/* Control never gets here */

7649

}

7650

7651

/* Caseful comparisons (includes all multi-byte characters) */

7652

7653

else

7654

{

7655

for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);

7656

if (min == max) continue;

7657

if (minimize)

7658

{

7659

for (fi = min;; fi++)

7660

{

7661

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7662

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7663

if (fi >= max || eptr >= md->end_subject || fc != *eptr++)

7664

RRETURN(MATCH_NOMATCH);

7665

}

7666

/* Control never gets here */

7667

}

7668

else

7669

{

7670

pp = eptr;

7671

for (i = min; i < max; i++)

7672

{

7673

if (eptr >= md->end_subject || fc != *eptr) break;

7674

eptr++;

7675

}

7676

while (eptr >= pp)

7677

{

7678

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7679

eptr--;

7680

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7681

}

7682

RRETURN(MATCH_NOMATCH);

7683

}

7684

}

7685

/* Control never gets here */

7686

7687

/* Match a negated single one-byte character. The character we are

7688

checking can be multibyte. */

7689

7690

case OP_NOT:

7691

if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

7692

ecode++;

7693

GETCHARINCTEST(c, eptr);

7694

if ((ims & PCRE_CASELESS) != 0)

7695

{

7696

#ifdef SUPPORT_UTF8

7697

if (c < 256)

7698

#endif

7699

c = md->lcc[c];

7700

if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);

7701

}

7702

else

7703

{

7704

if (*ecode++ == c) RRETURN(MATCH_NOMATCH);

7705

}

7706

break;

7707

7708

/* Match a negated single one-byte character repeatedly. This is almost a

7709

repeat of the code for a repeated single character, but I haven't found a

7710

nice way of commoning these up that doesn't require a test of the

7711

positive/negative option for each character match. Maybe that wouldn't add

7712

very much to the time taken, but character matching *is* what this is all

7713

about... */

7714

7715

case OP_NOTEXACT:

7716

min = max = GET2(ecode, 1);

7717

ecode += 3;

7718

goto REPEATNOTCHAR;

7719

7720

case OP_NOTUPTO:

7721

case OP_NOTMINUPTO:

7722

min = 0;

7723

max = GET2(ecode, 1);

7724

minimize = *ecode == OP_NOTMINUPTO;

7725

ecode += 3;

7726

goto REPEATNOTCHAR;

7727

7728

case OP_NOTSTAR:

7729

case OP_NOTMINSTAR:

7730

case OP_NOTPLUS:

7731

case OP_NOTMINPLUS:

7732

case OP_NOTQUERY:

7733

case OP_NOTMINQUERY:

7734

c = *ecode++ - OP_NOTSTAR;

7735

minimize = (c & 1) != 0;

7736

min = rep_min[c]; /* Pick up values from tables; */

7737

max = rep_max[c]; /* zero for max => infinity */

7738

if (max == 0) max = INT_MAX;

7739

7740

/* Common code for all repeated single-byte matches. We can give up quickly

7741

if there are fewer than the minimum number of bytes left in the

7742

subject. */

7743

7744

REPEATNOTCHAR:

7745

if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);

7746

fc = *ecode++;

7747

7748

/* The code is duplicated for the caseless and caseful cases, for speed,

7749

since matching characters is likely to be quite common. First, ensure the

7750

minimum number of matches are present. If min = max, continue at the same

7751

level without recursing. Otherwise, if minimizing, keep trying the rest of

7752

the expression and advancing one matching character if failing, up to the

7753

maximum. Alternatively, if maximizing, find the maximum number of

7754

characters and work backwards. */

7755

7756

DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,

7757

max, eptr));

7758

7759

if ((ims & PCRE_CASELESS) != 0)

7760

{

7761

fc = md->lcc[fc];

7762

7763

#ifdef SUPPORT_UTF8

7764

/* UTF-8 mode */

7765

if (md->utf8)

7766

{

7767

7768

for (i = 1; i <= min; i++)

7769

{

7770

GETCHARINC(d, eptr);

7771

if (d < 256) d = md->lcc[d];

7772

if (fc == d) RRETURN(MATCH_NOMATCH);

7773

}

7774

}

7775

else

7776

#endif

7777

7778

/* Not UTF-8 mode */

7779

{

7780

for (i = 1; i <= min; i++)

7781

if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);

7782

}

7783

7784

if (min == max) continue;

7785

7786

if (minimize)

7787

{

7788

#ifdef SUPPORT_UTF8

7789

/* UTF-8 mode */

7790

if (md->utf8)

7791

{

7792

7793

for (fi = min;; fi++)

7794

{

7795

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7796

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7797

GETCHARINC(d, eptr);

7798

if (d < 256) d = md->lcc[d];

7799

if (fi >= max || eptr >= md->end_subject || fc == d)

7800

RRETURN(MATCH_NOMATCH);

7801

}

7802

}

7803

else

7804

#endif

7805

/* Not UTF-8 mode */

7806

{

7807

for (fi = min;; fi++)

7808

{

7809

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7810

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7811

if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])

7812

RRETURN(MATCH_NOMATCH);

7813

}

7814

}

7815

/* Control never gets here */

7816

}

7817

7818

/* Maximize case */

7819

7820

else

7821

{

7822

pp = eptr;

7823

7824

#ifdef SUPPORT_UTF8

7825

/* UTF-8 mode */

7826

if (md->utf8)

7827

{

7828

7829

for (i = min; i < max; i++)

7830

{

7831

int len = 1;

7832

if (eptr >= md->end_subject) break;

7833

GETCHARLEN(d, eptr, len);

7834

if (d < 256) d = md->lcc[d];

7835

if (fc == d) break;

7836

eptr += len;

7837

}

7838

for(;;)

7839

{

7840

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7841

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7842

if (eptr-- == pp) break; /* Stop if tried at original pos */

7843

BACKCHAR(eptr);

7844

}

7845

}

7846

else

7847

#endif

7848

/* Not UTF-8 mode */

7849

{

7850

for (i = min; i < max; i++)

7851

{

7852

if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;

7853

eptr++;

7854

}

7855

while (eptr >= pp)

7856

{

7857

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7858

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7859

eptr--;

7860

}

7861

}

7862

7863

RRETURN(MATCH_NOMATCH);

7864

}

7865

/* Control never gets here */

7866

}

7867

7868

/* Caseful comparisons */

7869

7870

else

7871

{

7872

#ifdef SUPPORT_UTF8

7873

/* UTF-8 mode */

7874

if (md->utf8)

7875

{

7876

7877

for (i = 1; i <= min; i++)

7878

{

7879

GETCHARINC(d, eptr);

7880

if (fc == d) RRETURN(MATCH_NOMATCH);

7881

}

7882

}

7883

else

7884

#endif

7885

/* Not UTF-8 mode */

7886

{

7887

for (i = 1; i <= min; i++)

7888

if (fc == *eptr++) RRETURN(MATCH_NOMATCH);

7889

}

7890

7891

if (min == max) continue;

7892

7893

if (minimize)

7894

{

7895

#ifdef SUPPORT_UTF8

7896

/* UTF-8 mode */

7897

if (md->utf8)

7898

{

7899

7900

for (fi = min;; fi++)

7901

{

7902

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7903

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7904

GETCHARINC(d, eptr);

7905

if (fi >= max || eptr >= md->end_subject || fc == d)

7906

RRETURN(MATCH_NOMATCH);

7907

}

7908

}

7909

else

7910

#endif

7911

/* Not UTF-8 mode */

7912

{

7913

for (fi = min;; fi++)

7914

{

7915

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7916

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7917

if (fi >= max || eptr >= md->end_subject || fc == *eptr++)

7918

RRETURN(MATCH_NOMATCH);

7919

}

7920

}

7921

/* Control never gets here */

7922

}

7923

7924

/* Maximize case */

7925

7926

else

7927

{

7928

pp = eptr;

7929

7930

#ifdef SUPPORT_UTF8

7931

/* UTF-8 mode */

7932

if (md->utf8)

7933

{

7934

7935

for (i = min; i < max; i++)

7936

{

7937

int len = 1;

7938

if (eptr >= md->end_subject) break;

7939

GETCHARLEN(d, eptr, len);

7940

if (fc == d) break;

7941

eptr += len;

7942

}

7943

for(;;)

7944

{

7945

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7946

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7947

if (eptr-- == pp) break; /* Stop if tried at original pos */

7948

BACKCHAR(eptr);

7949

}

7950

}

7951

else

7952

#endif

7953

/* Not UTF-8 mode */

7954

{

7955

for (i = min; i < max; i++)

7956

{

7957

if (eptr >= md->end_subject || fc == *eptr) break;

7958

eptr++;

7959

}

7960

while (eptr >= pp)

7961

{

7962

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7963

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7964

eptr--;

7965

}

7966

}

7967

7968

RRETURN(MATCH_NOMATCH);

7969

}

7970

}

7971

/* Control never gets here */

7972

7973

/* Match a single character type repeatedly; several different opcodes

7974

share code. This is very similar to the code for single characters, but we

7975

repeat it in the interests of efficiency. */

7976

7977

case OP_TYPEEXACT:

7978

min = max = GET2(ecode, 1);

7979

minimize = TRUE;

7980

ecode += 3;

7981

goto REPEATTYPE;

7982

7983

case OP_TYPEUPTO:

7984

case OP_TYPEMINUPTO:

7985

min = 0;

7986

max = GET2(ecode, 1);

7987

minimize = *ecode == OP_TYPEMINUPTO;

7988

ecode += 3;

7989

goto REPEATTYPE;

7990

7991

case OP_TYPESTAR:

7992

case OP_TYPEMINSTAR:

7993

case OP_TYPEPLUS:

7994

case OP_TYPEMINPLUS:

7995

case OP_TYPEQUERY:

7996

case OP_TYPEMINQUERY:

7997

c = *ecode++ - OP_TYPESTAR;

7998

minimize = (c & 1) != 0;

7999

min = rep_min[c]; /* Pick up values from tables; */

8000

max = rep_max[c]; /* zero for max => infinity */

8001

if (max == 0) max = INT_MAX;

8002

8003

/* Common code for all repeated single character type matches. Note that

8004

in UTF-8 mode, '.' matches a character of any length, but for the other

8005

character types, the valid characters are all one-byte long. */

8006

8007

REPEATTYPE:

8008

ctype = *ecode++; /* Code for the character type */

8009

8010

#ifdef SUPPORT_UCP

8011

if (ctype == OP_PROP || ctype == OP_NOTPROP)

8012

{

8013

prop_fail_result = ctype == OP_NOTPROP;

8014

prop_type = *ecode++;

8015

if (prop_type >= 128)

8016

{

8017

prop_test_against = prop_type - 128;

8018

prop_test_variable = &prop_category;

8019

}

8020

else

8021

{

8022

prop_test_against = prop_type;

8023

prop_test_variable = &prop_chartype;

8024

}

8025

}

8026

else prop_type = -1;

8027

#endif

8028

8029

/* First, ensure the minimum number of matches are present. Use inline

8030

code for maximizing the speed, and do the type test once at the start

8031

(i.e. keep it out of the loop). Also we can test that there are at least

8032

the minimum number of bytes before we start. This isn't as effective in

8033

UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that

8034

is tidier. Also separate the UCP code, which can be the same for both UTF-8

8035

and single-bytes. */

8036

8037

if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);

8038

if (min > 0)

8039

{

8040

#ifdef SUPPORT_UCP

8041

if (prop_type > 0)

8042

{

8043

for (i = 1; i <= min; i++)

8044

{

8045

GETCHARINC(c, eptr);

8046

prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);

8047

if ((*prop_test_variable == prop_test_against) == prop_fail_result)

8048

RRETURN(MATCH_NOMATCH);

8049

}

8050

}

8051

8052

/* Match extended Unicode sequences. We will get here only if the

8053

support is in the binary; otherwise a compile-time error occurs. */

8054

8055

else if (ctype == OP_EXTUNI)

8056

{

8057

for (i = 1; i <= min; i++)

8058

{

8059

GETCHARINCTEST(c, eptr);

8060

prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);

8061

if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);

8062

while (eptr < md->end_subject)

8063

{

8064

int len = 1;

8065

if (!md->utf8) c = *eptr; else

8066

{

8067

GETCHARLEN(c, eptr, len);

8068

}

8069

prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);

8070

if (prop_category != ucp_M) break;

8071

eptr += len;

8072

}

8073

}

8074

}

8075

8076

else

8077

#endif /* SUPPORT_UCP */

8078

8079

/* Handle all other cases when the coding is UTF-8 */

8080

8081

#ifdef SUPPORT_UTF8

8082

if (md->utf8) switch(ctype)

8083

{

8084

case OP_ANY:

8085

for (i = 1; i <= min; i++)

8086

{

8087

if (eptr >= md->end_subject ||

8088

(*eptr++ == NEWLINE && (ims & PCRE_DOTALL) == 0))

8089

RRETURN(MATCH_NOMATCH);

8090

while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;

8091

}

8092

break;

8093

8094

case OP_ANYBYTE:

8095

eptr += min;

8096

break;

8097

8098

case OP_NOT_DIGIT:

8099

for (i = 1; i <= min; i++)

8100

{

8101

if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

8102

GETCHARINC(c, eptr);

8103

if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)

8104

RRETURN(MATCH_NOMATCH);

8105

}

8106

break;

8107

8108

case OP_DIGIT:

8109

for (i = 1; i <= min; i++)

8110

{

8111

if (eptr >= md->end_subject ||

8112

*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)

8113

RRETURN(MATCH_NOMATCH);

8114

/* No need to skip more bytes - we know it's a 1-byte character */

8115

}

8116

break;

8117

8118

case OP_NOT_WHITESPACE:

8119

for (i = 1; i <= min; i++)

8120

{

8121

if (eptr >= md->end_subject ||

8122

(*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))

8123

RRETURN(MATCH_NOMATCH);

8124

while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;

8125

}

8126

break;

8127

8128

case OP_WHITESPACE:

8129

for (i = 1; i <= min; i++)

8130

{

8131

if (eptr >= md->end_subject ||

8132

*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)

8133

RRETURN(MATCH_NOMATCH);

8134

/* No need to skip more bytes - we know it's a 1-byte character */

8135

}

8136

break;

8137

8138

case OP_NOT_WORDCHAR:

8139

for (i = 1; i <= min; i++)

8140

{

8141

if (eptr >= md->end_subject ||

8142

(*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))

8143

RRETURN(MATCH_NOMATCH);

8144

while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;

8145

}

8146

break;

8147

8148

case OP_WORDCHAR:

8149

for (i = 1; i <= min; i++)

8150

{

8151

if (eptr >= md->end_subject ||

8152

*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)

8153

RRETURN(MATCH_NOMATCH);

8154

/* No need to skip more bytes - we know it's a 1-byte character */

8155

}

8156

break;

8157

8158

default:

8159

RRETURN(PCRE_ERROR_INTERNAL);

8160

} /* End switch(ctype) */

8161

8162

else

8163

#endif /* SUPPORT_UTF8 */

8164

8165

/* Code for the non-UTF-8 case for minimum matching of operators other

8166

than OP_PROP and OP_NOTPROP. */

8167

8168

switch(ctype)

8169

{

8170

case OP_ANY:

8171

if ((ims & PCRE_DOTALL) == 0)

8172

{

8173

for (i = 1; i <= min; i++)

8174

if (*eptr++ == NEWLINE) RRETURN(MATCH_NOMATCH);

8175

}

8176

else eptr += min;

8177

break;

8178

8179

case OP_ANYBYTE:

8180

eptr += min;

8181

break;

8182

8183

case OP_NOT_DIGIT:

8184

for (i = 1; i <= min; i++)

8185

if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);

8186

break;

8187

8188

case OP_DIGIT:

8189

for (i = 1; i <= min; i++)

8190

if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);

8191

break;

8192

8193

case OP_NOT_WHITESPACE:

8194

for (i = 1; i <= min; i++)

8195

if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);

8196

break;

8197

8198

case OP_WHITESPACE:

8199

for (i = 1; i <= min; i++)

8200

if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);

8201

break;

8202

8203

case OP_NOT_WORDCHAR:

8204

for (i = 1; i <= min; i++)

8205

if ((md->ctypes[*eptr++] & ctype_word) != 0)

8206

RRETURN(MATCH_NOMATCH);

8207

break;

8208

8209

case OP_WORDCHAR:

8210

for (i = 1; i <= min; i++)

8211

if ((md->ctypes[*eptr++] & ctype_word) == 0)

8212

RRETURN(MATCH_NOMATCH);

8213

break;

8214

8215

default:

8216

RRETURN(PCRE_ERROR_INTERNAL);

8217

}

8218

}

8219

8220

/* If min = max, continue at the same level without recursing */

8221

8222

if (min == max) continue;

8223

8224

/* If minimizing, we have to test the rest of the pattern before each

8225

subsequent match. Again, separate the UTF-8 case for speed, and also

8226

separate the UCP cases. */

8227

8228

if (minimize)

8229

{

8230

#ifdef SUPPORT_UCP

8231

if (prop_type > 0)

8232

{

8233

for (fi = min;; fi++)

8234

{

8235

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

8236

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

8237

if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

8238

GETCHARINC(c, eptr);

8239

prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);

8240

if ((*prop_test_variable == prop_test_against) == prop_fail_result)

8241

RRETURN(MATCH_NOMATCH);

8242

}

8243

}

8244

8245

/* Match extended Unicode sequences. We will get here only if the

8246

support is in the binary; otherwise a compile-time error occurs. */

8247

8248

else if (ctype == OP_EXTUNI)

8249

{

8250

for (fi = min;; fi++)

8251

{

8252

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

8253

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

8254

if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

8255

GETCHARINCTEST(c, eptr);

8256

prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);

8257

if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);

8258

while (eptr < md->end_subject)

8259

{

8260

int len = 1;

8261

if (!md->utf8) c = *eptr; else

8262

{

8263

GETCHARLEN(c, eptr, len);

8264

}

8265

prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);

8266

if (prop_category != ucp_M) break;

8267

eptr += len;

8268

}

8269

}

8270

}

8271

8272

else

8273

#endif /* SUPPORT_UCP */

8274

8275

#ifdef SUPPORT_UTF8

8276

/* UTF-8 mode */

8277

if (md->utf8)

8278

{

8279

for (fi = min;; fi++)

8280

{

8281

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

8282

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

8283

if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

8284

8285

GETCHARINC(c, eptr);

8286

switch(ctype)

8287

{

8288

case OP_ANY:

8289

if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);

8290

break;

8291

8292

case OP_ANYBYTE:

8293

break;

8294

8295

case OP_NOT_DIGIT:

8296

if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)

8297

RRETURN(MATCH_NOMATCH);

8298

break;

8299

8300

case OP_DIGIT:

8301

if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)

8302

RRETURN(MATCH_NOMATCH);

8303

break;

8304

8305

case OP_NOT_WHITESPACE:

8306

if (c < 256 && (md->ctypes[c] & ctype_space) != 0)

8307

RRETURN(MATCH_NOMATCH);

8308

break;

8309

8310

case OP_WHITESPACE:

8311

if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)

8312

RRETURN(MATCH_NOMATCH);

8313

break;

8314

8315

case OP_NOT_WORDCHAR:

8316

if (c < 256 && (md->ctypes[c] & ctype_word) != 0)

8317

RRETURN(MATCH_NOMATCH);

8318

break;

8319

8320

case OP_WORDCHAR:

8321

if (c >= 256 && (md->ctypes[c] & ctype_word) == 0)

8322

RRETURN(MATCH_NOMATCH);

8323

break;

8324

8325

default:

8326

RRETURN(PCRE_ERROR_INTERNAL);

8327

}

8328

}

8329

}

8330

else

8331

#endif

8332

/* Not UTF-8 mode */

8333

{

8334

for (fi = min;; fi++)

8335

{

8336

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

8337

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

8338

if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

8339

c = *eptr++;

8340

switch(ctype)

8341

{

8342

case OP_ANY:

8343

if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);

8344

break;

8345

8346

case OP_ANYBYTE:

8347

break;

8348

8349

case OP_NOT_DIGIT:

8350

if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);

8351

break;

8352

8353

case OP_DIGIT:

8354

if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);

8355

break;

8356

8357

case OP_NOT_WHITESPACE:

8358

if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);

8359

break;

8360

8361

case OP_WHITESPACE:

8362

if ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);

8363

break;

8364

8365

case OP_NOT_WORDCHAR:

8366

if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);

8367

break;

8368

8369

case OP_WORDCHAR:

8370

if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);

8371

break;

8372

8373

default:

8374

RRETURN(PCRE_ERROR_INTERNAL);

8375

}

8376

}

8377

}

8378

/* Control never gets here */

8379

}

8380

8381

/* If maximizing it is worth using inline code for speed, doing the type

8382

test once at the start (i.e. keep it out of the loop). Again, keep the

8383

UTF-8 and UCP stuff separate. */

8384

8385

else

8386

{

8387

pp = eptr; /* Remember where we started */

8388

8389

#ifdef SUPPORT_UCP

8390

if (prop_type > 0)

8391

{

8392

for (i = min; i < max; i++)

8393

{

8394

int len = 1;

8395

if (eptr >= md->end_subject) break;

8396

GETCHARLEN(c, eptr, len);

8397

prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);

8398

if ((*prop_test_variable == prop_test_against) == prop_fail_result)

8399

break;

8400

eptr+= len;

8401

}

8402

8403

/* eptr is now past the end of the maximum run */

8404

8405

for(;;)

8406

{

8407

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

8408

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

8409

if (eptr-- == pp) break; /* Stop if tried at original pos */

8410

BACKCHAR(eptr);

8411

}

8412

}

8413

8414

/* Match extended Unicode sequences. We will get here only if the

8415

support is in the binary; otherwise a compile-time error occurs. */

8416

8417

else if (ctype == OP_EXTUNI)

8418

{

8419

for (i = min; i < max; i++)

8420

{

8421

if (eptr >= md->end_subject) break;

8422

GETCHARINCTEST(c, eptr);

8423

prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);

8424

if (prop_category == ucp_M) break;

8425

while (eptr < md->end_subject)

8426

{

8427

int len = 1;

8428

if (!md->utf8) c = *eptr; else

8429

{

8430

GETCHARLEN(c, eptr, len);

8431

}

8432

prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);

8433

if (prop_category != ucp_M) break;

8434

eptr += len;

8435

}

8436

}

8437

8438

/* eptr is now past the end of the maximum run */

8439

8440

for(;;)

8441

{

8442

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

8443

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

8444

if (eptr-- == pp) break; /* Stop if tried at original pos */

8445

for (;;) /* Move back over one extended */

8446

{

8447

int len = 1;

8448

BACKCHAR(eptr);

8449

if (!md->utf8) c = *eptr; else

8450

{

8451

GETCHARLEN(c, eptr, len);

8452

}

8453

prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);

8454

if (prop_category != ucp_M) break;

8455

eptr--;

8456

}

8457

}

8458

}

8459

8460

else

8461

#endif /* SUPPORT_UCP */

8462

8463

#ifdef SUPPORT_UTF8

8464

/* UTF-8 mode */

8465

8466

if (md->utf8)

8467

{

8468

switch(ctype)

8469

{

8470

case OP_ANY:

8471

8472

/* Special code is required for UTF8, but when the maximum is unlimited

8473

we don't need it, so we repeat the non-UTF8 code. This is probably

8474

worth it, because .* is quite a common idiom. */

8475

8476

if (max < INT_MAX)

8477

{

8478

if ((ims & PCRE_DOTALL) == 0)

8479

{

8480

for (i = min; i < max; i++)

8481

{

8482

if (eptr >= md->end_subject || *eptr == NEWLINE) break;

8483

eptr++;

8484

while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;

8485

}

8486

}

8487

else

8488

{

8489

for (i = min; i < max; i++)

8490

{

8491

eptr++;

8492

while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;

8493

}

8494

}

8495

}

8496

8497

/* Handle unlimited UTF-8 repeat */

8498

8499

else

8500

{

8501

if ((ims & PCRE_DOTALL) == 0)

8502

{

8503

for (i = min; i < max; i++)

8504

{

8505

if (eptr >= md->end_subject || *eptr == NEWLINE) break;

8506

eptr++;

8507

}

8508

break;

8509

}

8510

else

8511

{

8512

c = max - min;

8513

if (c > md->end_subject - eptr) c = md->end_subject - eptr;

8514

eptr += c;

8515

}

8516

}

8517

break;

8518

8519

/* The byte case is the same as non-UTF8 */

8520

8521

case OP_ANYBYTE:

8522

c = max - min;

8523

if (c > md->end_subject - eptr) c = md->end_subject - eptr;

8524

eptr += c;

8525

break;

8526

8527

case OP_NOT_DIGIT:

8528

for (i = min; i < max; i++)

8529

{

8530

int len = 1;

8531

if (eptr >= md->end_subject) break;

8532

GETCHARLEN(c, eptr, len);

8533

if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;

8534

eptr+= len;

8535

}

8536

break;

8537

8538

case OP_DIGIT:

8539

for (i = min; i < max; i++)

8540

{

8541

int len = 1;

8542

if (eptr >= md->end_subject) break;

8543

GETCHARLEN(c, eptr, len);

8544

if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;

8545

eptr+= len;

8546

}

8547

break;

8548

8549

case OP_NOT_WHITESPACE:

8550

for (i = min; i < max; i++)

8551

{

8552

int len = 1;

8553

if (eptr >= md->end_subject) break;

8554

GETCHARLEN(c, eptr, len);

8555

if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;

8556

eptr+= len;

8557

}

8558

break;

8559

8560

case OP_WHITESPACE:

8561

for (i = min; i < max; i++)

8562

{

8563

int len = 1;

8564

if (eptr >= md->end_subject) break;

8565

GETCHARLEN(c, eptr, len);

8566

if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;

8567

eptr+= len;

8568

}

8569

break;

8570

8571

case OP_NOT_WORDCHAR:

8572

for (i = min; i < max; i++)

8573

{

8574

int len = 1;

8575

if (eptr >= md->end_subject) break;

8576

GETCHARLEN(c, eptr, len);

8577

if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;

8578

eptr+= len;

8579

}

8580

break;

8581

8582

case OP_WORDCHAR:

8583

for (i = min; i < max; i++)

8584

{

8585

int len = 1;

8586

if (eptr >= md->end_subject) break;

8587

GETCHARLEN(c, eptr, len);

8588

if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;

8589

eptr+= len;

8590

}

8591

break;

8592

8593

default:

8594

RRETURN(PCRE_ERROR_INTERNAL);

8595

}

8596

8597

/* eptr is now past the end of the maximum run */

8598

8599

for(;;)

8600

{

8601

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

8602

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

8603

if (eptr-- == pp) break; /* Stop if tried at original pos */

8604

BACKCHAR(eptr);

8605

}

8606

}

8607

else

8608

#endif

8609

8610

/* Not UTF-8 mode */

8611

{

8612

switch(ctype)

8613

{

8614

case OP_ANY:

8615

if ((ims & PCRE_DOTALL) == 0)

8616

{

8617

for (i = min; i < max; i++)

8618

{

8619

if (eptr >= md->end_subject || *eptr == NEWLINE) break;

8620

eptr++;

8621

}

8622

break;

8623

}

8624

/* For DOTALL case, fall through and treat as \C */

8625

8626

case OP_ANYBYTE:

8627

c = max - min;

8628

if (c > md->end_subject - eptr) c = md->end_subject - eptr;

8629

eptr += c;

8630

break;

8631

8632

case OP_NOT_DIGIT:

8633

for (i = min; i < max; i++)

8634

{

8635

if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)

8636

break;

8637

eptr++;

8638

}

8639

break;

8640

8641

case OP_DIGIT:

8642

for (i = min; i < max; i++)

8643

{

8644

if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)

8645

break;

8646

eptr++;

8647

}

8648

break;

8649

8650

case OP_NOT_WHITESPACE:

8651

for (i = min; i < max; i++)

8652

{

8653

if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)

8654

break;

8655

eptr++;

8656

}

8657

break;

8658

8659

case OP_WHITESPACE:

8660

for (i = min; i < max; i++)

8661

{

8662

if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)

8663

break;

8664

eptr++;

8665

}

8666

break;

8667

8668

case OP_NOT_WORDCHAR:

8669

for (i = min; i < max; i++)

8670

{

8671

if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)

8672

break;

8673

eptr++;

8674

}

8675

break;

8676

8677

case OP_WORDCHAR:

8678

for (i = min; i < max; i++)

8679

{

8680

if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)

8681

break;

8682

eptr++;

8683

}

8684

break;

8685

8686

default:

8687

RRETURN(PCRE_ERROR_INTERNAL);

8688

}

8689

8690

/* eptr is now past the end of the maximum run */

8691

8692

while (eptr >= pp)

8693

{

8694

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

8695

eptr--;

8696

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

8697

}

8698

}

8699

8700

/* Get here if we can't make it match with any permitted repetitions */

8701

8702

RRETURN(MATCH_NOMATCH);

8703

}

8704

/* Control never gets here */

8705

8706

/* There's been some horrible disaster. Since all codes > OP_BRA are

8707

for capturing brackets, and there shouldn't be any gaps between 0 and

8708

OP_BRA, arrival here can only mean there is something seriously wrong

8709

in the code above or the OP_xxx definitions. */

8710

8711

default:

8712

DPRINTF(("Unknown opcode %d\n", *ecode));

8713

RRETURN(PCRE_ERROR_UNKNOWN_NODE);

8714

}

8715

8716

/* Do not stick any code in here without much thought; it is assumed

8717

that "continue" in the code above comes out to here to repeat the main

8718

loop. */

8719

8720

} /* End of main loop */

8721

/* Control never reaches here */

8722

}

8723

8724

8725

/***************************************************************************

8726

****************************************************************************

8727

RECURSION IN THE match() FUNCTION

8728

8729

Undefine all the macros that were defined above to handle this. */

8730

8731

#ifdef NO_RECURSE

8732

#undef eptr

8733

#undef ecode

8734

#undef offset_top

8735

#undef ims

8736

#undef eptrb

8737

#undef flags

8738

8739

#undef callpat

8740

#undef charptr

8741

#undef data

8742

#undef next

8743

#undef pp

8744

#undef prev

8745

#undef saved_eptr

8746

8747

#undef new_recursive

8748

8749

#undef cur_is_word

8750

#undef condition

8751

#undef minimize

8752

#undef prev_is_word

8753

8754

#undef original_ims

8755

8756

#undef ctype

8757

#undef length

8758

#undef max

8759

#undef min

8760

#undef number

8761

#undef offset

8762

#undef op

8763

#undef save_capture_last

8764

#undef save_offset1

8765

#undef save_offset2

8766

#undef save_offset3

8767

#undef stacksave

8768

8769

#undef newptrb

8770

8771

#endif

8772

8773

/* These two are defined as macros in both cases */

8774

8775

#undef fc

8776

#undef fi

8777

8778

/***************************************************************************

8779

***************************************************************************/

8780

8781

8782

8783

/*************************************************

8784

* Execute a Regular Expression *

8785

*************************************************/

8786

8787

/* This function applies a compiled re to a subject string and picks out

8788

portions of the string if it matches. Two elements in the vector are set for

8789

each substring: the offsets to the start and end of the substring.

8790

8791

Arguments:

8792

argument_re points to the compiled expression

8793

extra_data points to extra data or is NULL

8794

subject points to the subject string

8795

length length of subject string (may contain binary zeros)

8796

start_offset where to start in the subject string

8797

options option bits

8798

offsets points to a vector of ints to be filled in with offsets

8799

offsetcount the number of elements in the vector

8800

8801

Returns: > 0 => success; value is the number of elements filled in

8802

= 0 => success, but offsets is not big enough

8803

-1 => failed to match

8804

< -1 => some kind of unexpected problem

8805

8806

8807

EXPORT int

8808

pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,

8809

const char *subject, int length, int start_offset, int options, int *offsets,

8810

int offsetcount)

8811

{

8812

int rc, resetcount, ocount;

8813

int first_byte = -1;

8814

int req_byte = -1;

8815

int req_byte2 = -1;

8816

unsigned long int ims = 0;

8817

BOOL using_temporary_offsets = FALSE;

8818

BOOL anchored;

8819

BOOL startline;

8820

BOOL first_byte_caseless = FALSE;

8821

BOOL req_byte_caseless = FALSE;

8822

match_data match_block;

8823

const uschar *tables;

8824

const uschar *start_bits = NULL;

8825

const uschar *start_match = (const uschar *)subject + start_offset;

8826

const uschar *end_subject;

8827

const uschar *req_byte_ptr = start_match - 1;

8828

8829

pcre_study_data internal_study;

8830

const pcre_study_data *study;

8831

8832

real_pcre internal_re;

8833

const real_pcre *external_re = (const real_pcre *)argument_re;

8834

const real_pcre *re = external_re;

8835

8836

/* Plausibility checks */

8837

8838

if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;

8839

if (re == NULL || subject == NULL ||

8840

(offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;

8841

if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;

8842

8843

/* Fish out the optional data from the extra_data structure, first setting

8844

the default values. */

8845

8846

study = NULL;

8847

match_block.match_limit = MATCH_LIMIT;

8848

match_block.callout_data = NULL;

8849

8850

/* The table pointer is always in native byte order. */

8851

8852

tables = external_re->tables;

8853

8854

if (extra_data != NULL)

8855

{

8856

8857

if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)

8858

study = (const pcre_study_data *)extra_data->study_data;

8859

if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)

8860

match_block.match_limit = extra_data->match_limit;

8861

if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)

8862

match_block.callout_data = extra_data->callout_data;

8863

if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;

8864

}

8865

8866

/* If the exec call supplied NULL for tables, use the inbuilt ones. This

8867

is a feature that makes it possible to save compiled regex and re-use them

8868

in other programs later. */

8869

8870

if (tables == NULL) tables = pcre_default_tables;

8871

8872

/* Check that the first field in the block is the magic number. If it is not,

8873

test for a regex that was compiled on a host of opposite endianness. If this is

8874

the case, flipped values are put in internal_re and internal_study if there was

8875

study data too. */

8876

8877

if (re->magic_number != MAGIC_NUMBER)

8878

{

8879

re = try_flipped(re, &internal_re, study, &internal_study);

8880

if (re == NULL) return PCRE_ERROR_BADMAGIC;

8881

if (study != NULL) study = &internal_study;

8882

}

8883

8884

/* Set up other data */

8885

8886

anchored = ((re->options | options) & PCRE_ANCHORED) != 0;

8887

startline = (re->options & PCRE_STARTLINE) != 0;

8888

8889

/* The code starts after the real_pcre block and the capture name table. */

8890

8891

match_block.start_code = (const uschar *)external_re + re->name_table_offset +

8892

re->name_count * re->name_entry_size;

8893

8894

match_block.start_subject = (const uschar *)subject;

8895

match_block.start_offset = start_offset;

8896

match_block.end_subject = match_block.start_subject + length;

8897

end_subject = match_block.end_subject;

8898

8899

match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;

8900

match_block.utf8 = (re->options & PCRE_UTF8) != 0;

8901

8902

match_block.notbol = (options & PCRE_NOTBOL) != 0;

8903

match_block.noteol = (options & PCRE_NOTEOL) != 0;

8904

match_block.notempty = (options & PCRE_NOTEMPTY) != 0;

8905

match_block.partial = (options & PCRE_PARTIAL) != 0;

8906

match_block.hitend = FALSE;

8907

8908

match_block.recursive = NULL; /* No recursion at top level */

8909

8910

match_block.lcc = tables + lcc_offset;

8911

match_block.ctypes = tables + ctypes_offset;

8912

8913

/* Partial matching is supported only for a restricted set of regexes at the

8914

moment. */

8915

8916

if (match_block.partial && (re->options & PCRE_NOPARTIAL) != 0)

8917

return PCRE_ERROR_BADPARTIAL;

8918

8919

/* Check a UTF-8 string if required. Unfortunately there's no way of passing

8920

back the character offset. */

8921

8922

#ifdef SUPPORT_UTF8

8923

if (match_block.utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)

8924

{

8925

if (valid_utf8((uschar *)subject, length) >= 0)

8926

return PCRE_ERROR_BADUTF8;

8927

if (start_offset > 0 && start_offset < length)

8928

{

8929

int tb = ((uschar *)subject)[start_offset];

8930

if (tb > 127)

8931

{

8932

tb &= 0xc0;

8933

if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;

8934

}

8935

}

8936

}

8937

#endif

8938

8939

/* The ims options can vary during the matching as a result of the presence

8940

of (?ims) items in the pattern. They are kept in a local variable so that

8941

restoring at the exit of a group is easy. */

8942

8943

ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);

8944

8945

/* If the expression has got more back references than the offsets supplied can

8946

hold, we get a temporary chunk of working store to use during the matching.

8947

Otherwise, we can use the vector supplied, rounding down its size to a multiple

8948

of 3. */

8949

8950

ocount = offsetcount - (offsetcount % 3);

8951

8952

if (re->top_backref > 0 && re->top_backref >= ocount/3)

8953

{

8954

ocount = re->top_backref * 3 + 3;

8955

match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));

8956

if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;

8957

using_temporary_offsets = TRUE;

8958

DPRINTF(("Got memory to hold back references\n"));

8959

}

8960

else match_block.offset_vector = offsets;

8961

8962

match_block.offset_end = ocount;

8963

match_block.offset_max = (2*ocount)/3;

8964

match_block.offset_overflow = FALSE;

8965

match_block.capture_last = -1;

8966

8967

/* Compute the minimum number of offsets that we need to reset each time. Doing

8968

this makes a huge difference to execution time when there aren't many brackets

8969

in the pattern. */

8970

8971

resetcount = 2 + re->top_bracket * 2;

8972

if (resetcount > offsetcount) resetcount = ocount;

8973

8974

/* Reset the working variable associated with each extraction. These should

8975

never be used unless previously set, but they get saved and restored, and so we

8976

initialize them to avoid reading uninitialized locations. */

8977

8978

if (match_block.offset_vector != NULL)

8979

{

8980

8981

8982

while (--iptr >= iend) *iptr = -1;

8983

}

8984

8985

/* Set up the first character to match, if available. The first_byte value is

8986

never set for an anchored regular expression, but the anchoring may be forced

8987

at run time, so we have to test for anchoring. The first char may be unset for

8988

an unanchored pattern, of course. If there's no first char and the pattern was

8989

studied, there may be a bitmap of possible first characters. */

8990

8991

if (!anchored)

8992

{

8993

if ((re->options & PCRE_FIRSTSET) != 0)

8994

{

8995

first_byte = re->first_byte & 255;

8996

if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)

8997

first_byte = match_block.lcc[first_byte];

8998

}

8999

else

9000

if (!startline && study != NULL &&

9001

(study->options & PCRE_STUDY_MAPPED) != 0)

9002

start_bits = study->start_bits;

9003

}

9004

9005

/* For anchored or unanchored matches, there may be a "last known required

9006

character" set. */

9007

9008

if ((re->options & PCRE_REQCHSET) != 0)

9009

{

9010

req_byte = re->req_byte & 255;

9011

req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;

9012

req_byte2 = (tables + fcc_offset)[req_byte]; /* case flipped */

9013

}

9014

9015

/* Loop for handling unanchored repeated matching attempts; for anchored regexs

9016

the loop runs just once. */

9017

9018

9019

{

9020

/* Reset the maximum number of extractions we might see. */

9021

9022

if (match_block.offset_vector != NULL)

9023

{

9024

9025

9026

while (iptr < iend) *iptr++ = -1;

9027

}

9028

9029

/* Advance to a unique first char if possible */

9030

9031

if (first_byte >= 0)

9032

{

9033

if (first_byte_caseless)

9034

while (start_match < end_subject &&

9035

match_block.lcc[*start_match] != first_byte)

9036

start_match++;

9037

else

9038

while (start_match < end_subject && *start_match != first_byte)

9039

start_match++;

9040

}

9041

9042

/* Or to just after \n for a multiline match if possible */

9043

9044

else if (startline)

9045

{

9046

if (start_match > match_block.start_subject + start_offset)

9047

{

9048

while (start_match < end_subject && start_match[-1] != NEWLINE)

9049

start_match++;

9050

}

9051

}

9052

9053

/* Or to a non-unique first char after study */

9054

9055

else if (start_bits != NULL)

9056

{

9057

while (start_match < end_subject)

9058

{

9059

9060

if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;

9061

}

9062

}

9063

9064

#ifdef DEBUG /* Sigh. Some compilers never learn. */

9065

printf(">>>> Match against: ");

9066

pchars(start_match, end_subject - start_match, TRUE, &match_block);

9067

printf("\n");

9068

#endif

9069

9070

/* If req_byte is set, we know that that character must appear in the subject

9071

for the match to succeed. If the first character is set, req_byte must be

9072

later in the subject; otherwise the test starts at the match point. This

9073

optimization can save a huge amount of backtracking in patterns with nested

9074

unlimited repeats that aren't going to match. Writing separate code for

9075

cased/caseless versions makes it go faster, as does using an autoincrement

9076

and backing off on a match.

9077

9078

HOWEVER: when the subject string is very, very long, searching to its end can

9079

take a long time, and give bad performance on quite ordinary patterns. This

9080

showed up when somebody was matching /^C/ on a 32-megabyte string... so we

9081

don't do this when the string is sufficiently long.

9082

9083

ALSO: this processing is disabled when partial matching is requested.

9084

9085

9086

if (req_byte >= 0 &&

9087

end_subject - start_match < REQ_BYTE_MAX &&

9088

!match_block.partial)

9089

{

9090

9091

9092

/* We don't need to repeat the search if we haven't yet reached the

9093

place we found it at last time. */

9094

9095

if (p > req_byte_ptr)

9096

{

9097

if (req_byte_caseless)

9098

{

9099

while (p < end_subject)

9100

{

9101

9102

if (pp == req_byte || pp == req_byte2) { p--; break; }

9103

}

9104

}

9105

else

9106

{

9107

while (p < end_subject)

9108

{

9109

if (*p++ == req_byte) { p--; break; }

9110

}

9111

}

9112

9113

/* If we can't find the required character, break the matching loop */

9114

9115

if (p >= end_subject) break;

9116

9117

/* If we have found the required character, save the point where we

9118

found it, so that we don't search again next time round the loop if

9119

the start hasn't passed this character yet. */

9120

9121

req_byte_ptr = p;

9122

}

9123

}

9124

9125

/* When a match occurs, substrings will be set for all internal extractions;

9126

we just need to set up the whole thing as substring 0 before returning. If

9127

there were too many extractions, set the return code to zero. In the case

9128

where we had to get some local store to hold offsets for backreferences, copy

9129

those back references that we can. In this case there need not be overflow

9130

if certain parts of the pattern were not used. */

9131

9132

match_block.start_match = start_match;

9133

match_block.match_call_count = 0;

9134

9135

rc = match(start_match, match_block.start_code, 2, &match_block, ims, NULL,

9136

match_isgroup);

9137

9138

if (rc == MATCH_NOMATCH)

9139

{

9140

start_match++;

9141

#ifdef SUPPORT_UTF8

9142

if (match_block.utf8)

9143

while(start_match < end_subject && (*start_match & 0xc0) == 0x80)

9144

start_match++;

9145

#endif

9146

continue;

9147

}

9148

9149

if (rc != MATCH_MATCH)

9150

{

9151

DPRINTF((">>>> error: returning %d\n", rc));

9152

return rc;

9153

}

9154

9155

/* We have a match! Copy the offset information from temporary store if

9156

necessary */

9157

9158

if (using_temporary_offsets)

9159

{

9160

if (offsetcount >= 4)

9161

{

9162

memcpy(offsets + 2, match_block.offset_vector + 2,

9163

(offsetcount - 2) * sizeof(int));

9164

DPRINTF(("Copied offsets from temporary memory\n"));

9165

}

9166

if (match_block.end_offset_top > offsetcount)

9167

match_block.offset_overflow = TRUE;

9168

9169

DPRINTF(("Freeing temporary memory\n"));

9170

(pcre_free)(match_block.offset_vector);

9171

}

9172

9173

rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;

9174

9175

if (offsetcount < 2) rc = 0; else

9176

{

9177

offsets[0] = start_match - match_block.start_subject;

9178

offsets[1] = match_block.end_match_ptr - match_block.start_subject;

9179

}

9180

9181

DPRINTF((">>>> returning %d\n", rc));

9182

return rc;

9183

}

9184

9185

/* This "while" is the end of the "do" above */

9186

9187

while (!anchored && start_match <= end_subject);

9188

9189

if (using_temporary_offsets)

9190

{

9191

DPRINTF(("Freeing temporary memory\n"));

9192

(pcre_free)(match_block.offset_vector);

9193

}

9194

9195

if (match_block.partial && match_block.hitend)

9196

{

9197

DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));

9198

return PCRE_ERROR_PARTIAL;

9199

}

9200

else

9201

{

9202

DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));

9203

return PCRE_ERROR_NOMATCH;

9204

}

9205

}

9206

9207

/* End of pcre.c */

Older »