~ubuntu-branches/ubuntu/gutsy/poco/gutsy

« back to all changes in this revision

Viewing changes to Foundation/src/pcre.c

Committer: Bazaar Package Importer
Author(s): Krzysztof Burghardt
Date: 2007-04-27 18:33:48 UTC
Revision ID: james.westby@ubuntu.com-20070427183348-xgnpct0qd6a2ip34

Tags: upstream-1.2.9

Import upstream version 1.2.9

files added:

CHANGELOG

CONTRIBUTORS

CppUnit

CppUnit/CppUnit_vs71.sln

CppUnit/CppUnit_vs71.vcproj

CppUnit/CppUnit_vs80.sln

CppUnit/CppUnit_vs80.vcproj

CppUnit/Makefile

CppUnit/WinTestRunner

CppUnit/WinTestRunner/WinTestRunner_vs71.vcproj

CppUnit/WinTestRunner/WinTestRunner_vs80.vcproj

CppUnit/WinTestRunner/include

CppUnit/WinTestRunner/include/WinTestRunner

CppUnit/WinTestRunner/include/WinTestRunner/WinTestRunner.h

CppUnit/WinTestRunner/res

CppUnit/WinTestRunner/res/Resource.h

CppUnit/WinTestRunner/res/WinTestRunner.rc

CppUnit/WinTestRunner/src

CppUnit/WinTestRunner/src/ActiveTest.cpp

CppUnit/WinTestRunner/src/ActiveTest.h

CppUnit/WinTestRunner/src/DLLMain.cpp

CppUnit/WinTestRunner/src/GUITestResult.cpp

CppUnit/WinTestRunner/src/GUITestResult.h

CppUnit/WinTestRunner/src/ProgressBar.cpp

CppUnit/WinTestRunner/src/ProgressBar.h

CppUnit/WinTestRunner/src/SynchronizedTestResult.h

CppUnit/WinTestRunner/src/TestResultDecorator.h

CppUnit/WinTestRunner/src/TestRunnerDlg.cpp

CppUnit/WinTestRunner/src/TestRunnerDlg.h

CppUnit/WinTestRunner/src/WinTestRunner.cpp

CppUnit/cppunit.vmsbuild

CppUnit/doc

CppUnit/doc/README.html

CppUnit/doc/cookbook.htm

CppUnit/doc/license.htm

CppUnit/doc/test.gif

CppUnit/include

CppUnit/include/CppUnit

CppUnit/include/CppUnit/CppUnit.h

CppUnit/include/CppUnit/CppUnitException.h

CppUnit/include/CppUnit/Guards.h

CppUnit/include/CppUnit/Orthodox.h

CppUnit/include/CppUnit/RepeatedTest.h

CppUnit/include/CppUnit/Test.h

CppUnit/include/CppUnit/TestCaller.h

CppUnit/include/CppUnit/TestCase.h

CppUnit/include/CppUnit/TestDecorator.h

CppUnit/include/CppUnit/TestFailure.h

CppUnit/include/CppUnit/TestResult.h

CppUnit/include/CppUnit/TestRunner.h

CppUnit/include/CppUnit/TestSetup.h

CppUnit/include/CppUnit/TestSuite.h

CppUnit/include/CppUnit/TextTestResult.h

CppUnit/include/CppUnit/estring.h

CppUnit/src

CppUnit/src/CppUnitException.cpp

CppUnit/src/TestCase.cpp

CppUnit/src/TestDecorator.cpp

CppUnit/src/TestFailure.cpp

CppUnit/src/TestResult.cpp

CppUnit/src/TestRunner.cpp

CppUnit/src/TestSuite.cpp

CppUnit/src/TextTestResult.cpp

Foundation

Foundation/Foundation_vs71.sln

Foundation/Foundation_vs71.vcproj

Foundation/Foundation_vs80.sln

Foundation/Foundation_vs80.vcproj

Foundation/Makefile

Foundation/foundation.vmsbuild

Foundation/include

Foundation/include/Poco

Foundation/include/Poco/ASCIIEncoding.h

Foundation/include/Poco/AbstractCache.h

Foundation/include/Poco/AbstractDelegate.h

Foundation/include/Poco/AbstractEvent.h

Foundation/include/Poco/AbstractObserver.h

Foundation/include/Poco/AbstractPriorityDelegate.h

Foundation/include/Poco/AbstractStrategy.h

Foundation/include/Poco/ActiveDispatcher.h

Foundation/include/Poco/ActiveMethod.h

Foundation/include/Poco/ActiveResult.h

Foundation/include/Poco/ActiveRunnable.h

Foundation/include/Poco/ActiveStarter.h

Foundation/include/Poco/Activity.h

Foundation/include/Poco/Any.h

Foundation/include/Poco/ArchiveStrategy.h

Foundation/include/Poco/AsyncChannel.h

Foundation/include/Poco/AutoPtr.h

Foundation/include/Poco/AutoReleasePool.h

Foundation/include/Poco/Base64Decoder.h

Foundation/include/Poco/Base64Encoder.h

Foundation/include/Poco/BasicEvent.h

Foundation/include/Poco/BinaryReader.h

Foundation/include/Poco/BinaryWriter.h

Foundation/include/Poco/Buffer.h

Foundation/include/Poco/BufferAllocator.h

Foundation/include/Poco/BufferedBidirectionalStreamBuf.h

Foundation/include/Poco/BufferedStreamBuf.h

Foundation/include/Poco/Bugcheck.h

Foundation/include/Poco/ByteOrder.h

Foundation/include/Poco/Channel.h

Foundation/include/Poco/ClassLibrary.h

Foundation/include/Poco/ClassLoader.h

Foundation/include/Poco/CompareFunctions.h

Foundation/include/Poco/Config.h

Foundation/include/Poco/Configurable.h

Foundation/include/Poco/ConsoleChannel.h

Foundation/include/Poco/CountingStream.h

Foundation/include/Poco/DateTime.h

Foundation/include/Poco/DateTimeFormat.h

Foundation/include/Poco/DateTimeFormatter.h

Foundation/include/Poco/DateTimeParser.h

Foundation/include/Poco/Debugger.h

Foundation/include/Poco/DefaultStrategy.h

Foundation/include/Poco/DeflatingStream.h

Foundation/include/Poco/Delegate.h

Foundation/include/Poco/DigestEngine.h

Foundation/include/Poco/DigestStream.h

Foundation/include/Poco/DirectoryIterator.h

Foundation/include/Poco/DirectoryIterator_UNIX.h

Foundation/include/Poco/DirectoryIterator_VMS.h

Foundation/include/Poco/DirectoryIterator_WIN32.h

Foundation/include/Poco/DirectoryIterator_WIN32U.h

Foundation/include/Poco/DynamicFactory.h

Foundation/include/Poco/Environment.h

Foundation/include/Poco/Environment_UNIX.h

Foundation/include/Poco/Environment_VMS.h

Foundation/include/Poco/Environment_WIN32.h

Foundation/include/Poco/Environment_WIN32U.h

Foundation/include/Poco/ErrorHandler.h

Foundation/include/Poco/Event.h

Foundation/include/Poco/EventArgs.h

Foundation/include/Poco/EventLogChannel.h

Foundation/include/Poco/Event_POSIX.h

Foundation/include/Poco/Event_WIN32.h

Foundation/include/Poco/Exception.h

Foundation/include/Poco/ExpirationDecorator.h

Foundation/include/Poco/Expire.h

Foundation/include/Poco/ExpireCache.h

Foundation/include/Poco/ExpireLRUCache.h

Foundation/include/Poco/ExpireStrategy.h

Foundation/include/Poco/FIFOEvent.h

Foundation/include/Poco/FIFOStrategy.h

Foundation/include/Poco/FPEnvironment.h

Foundation/include/Poco/FPEnvironment_C99.h

Foundation/include/Poco/FPEnvironment_DEC.h

Foundation/include/Poco/FPEnvironment_DUMMY.h

Foundation/include/Poco/FPEnvironment_SUN.h

Foundation/include/Poco/FPEnvironment_WIN32.h

Foundation/include/Poco/File.h

Foundation/include/Poco/FileChannel.h

Foundation/include/Poco/FileStreamFactory.h

Foundation/include/Poco/File_UNIX.h

Foundation/include/Poco/File_VMS.h

Foundation/include/Poco/File_WIN32.h

Foundation/include/Poco/File_WIN32U.h

Foundation/include/Poco/Format.h

Foundation/include/Poco/Formatter.h

Foundation/include/Poco/FormattingChannel.h

Foundation/include/Poco/Foundation.h

Foundation/include/Poco/Glob.h

Foundation/include/Poco/HMACEngine.h

Foundation/include/Poco/HashFunction.h

Foundation/include/Poco/HashStatistic.h

Foundation/include/Poco/HashTable.h

Foundation/include/Poco/HexBinaryDecoder.h

Foundation/include/Poco/HexBinaryEncoder.h

Foundation/include/Poco/InflatingStream.h

Foundation/include/Poco/Instantiator.h

Foundation/include/Poco/KeyValueArgs.h

Foundation/include/Poco/LRUCache.h

Foundation/include/Poco/LRUStrategy.h

Foundation/include/Poco/Latin1Encoding.h

Foundation/include/Poco/Latin9Encoding.h

Foundation/include/Poco/LineEndingConverter.h

Foundation/include/Poco/LocalDateTime.h

Foundation/include/Poco/LogFile.h

Foundation/include/Poco/LogFile_STD.h

Foundation/include/Poco/LogFile_VMS.h

Foundation/include/Poco/LogFile_WIN32.h

Foundation/include/Poco/LogFile_WIN32U.h

Foundation/include/Poco/LogStream.h

Foundation/include/Poco/Logger.h

Foundation/include/Poco/LoggingFactory.h

Foundation/include/Poco/LoggingRegistry.h

Foundation/include/Poco/MD2Engine.h

Foundation/include/Poco/MD4Engine.h

Foundation/include/Poco/MD5Engine.h

Foundation/include/Poco/Manifest.h

Foundation/include/Poco/MemoryPool.h

Foundation/include/Poco/Message.h

Foundation/include/Poco/MetaObject.h

Foundation/include/Poco/Mutex.h

Foundation/include/Poco/Mutex_POSIX.h

Foundation/include/Poco/Mutex_WIN32.h

Foundation/include/Poco/NObserver.h

Foundation/include/Poco/NamedEvent.h

Foundation/include/Poco/NamedEvent_UNIX.h

Foundation/include/Poco/NamedEvent_VMS.h

Foundation/include/Poco/NamedEvent_WIN32.h

Foundation/include/Poco/NamedEvent_WIN32U.h

Foundation/include/Poco/NamedMutex.h

Foundation/include/Poco/NamedMutex_UNIX.h

Foundation/include/Poco/NamedMutex_VMS.h

Foundation/include/Poco/NamedMutex_WIN32.h

Foundation/include/Poco/NamedMutex_WIN32U.h

Foundation/include/Poco/NestedDiagnosticContext.h

Foundation/include/Poco/Notification.h

Foundation/include/Poco/NotificationCenter.h

Foundation/include/Poco/NotificationQueue.h

Foundation/include/Poco/NotificationStrategy.h

Foundation/include/Poco/NullChannel.h

Foundation/include/Poco/NullStream.h

Foundation/include/Poco/NumberFormatter.h

Foundation/include/Poco/NumberParser.h

Foundation/include/Poco/Observer.h

Foundation/include/Poco/OpcomChannel.h

Foundation/include/Poco/Path.h

Foundation/include/Poco/Path_UNIX.h

Foundation/include/Poco/Path_VMS.h

Foundation/include/Poco/Path_WIN32.h

Foundation/include/Poco/Path_WIN32U.h

Foundation/include/Poco/PatternFormatter.h

Foundation/include/Poco/Pipe.h

Foundation/include/Poco/PipeImpl.h

Foundation/include/Poco/PipeImpl_DUMMY.h

Foundation/include/Poco/PipeImpl_POSIX.h

Foundation/include/Poco/PipeImpl_WIN32.h

Foundation/include/Poco/PipeStream.h

Foundation/include/Poco/Platform.h

Foundation/include/Poco/Platform_POSIX.h

Foundation/include/Poco/Platform_VMS.h

Foundation/include/Poco/Platform_WIN32.h

Foundation/include/Poco/Poco.h

Foundation/include/Poco/PriorityDelegate.h

Foundation/include/Poco/PriorityEvent.h

Foundation/include/Poco/PriorityExpire.h

Foundation/include/Poco/Process.h

Foundation/include/Poco/Process_UNIX.h

Foundation/include/Poco/Process_VMS.h

Foundation/include/Poco/Process_WIN32.h

Foundation/include/Poco/Process_WIN32U.h

Foundation/include/Poco/PurgeStrategy.h

Foundation/include/Poco/RWLock.h

Foundation/include/Poco/RWLock_POSIX.h

Foundation/include/Poco/RWLock_WIN32.h

Foundation/include/Poco/Random.h

Foundation/include/Poco/RandomStream.h

Foundation/include/Poco/RefCountedObject.h

Foundation/include/Poco/RegularExpression.h

Foundation/include/Poco/RotateStrategy.h

Foundation/include/Poco/Runnable.h

Foundation/include/Poco/RunnableAdapter.h

Foundation/include/Poco/SHA1Engine.h

Foundation/include/Poco/ScopedLock.h

Foundation/include/Poco/Semaphore.h

Foundation/include/Poco/Semaphore_POSIX.h

Foundation/include/Poco/Semaphore_WIN32.h

Foundation/include/Poco/SharedLibrary.h

Foundation/include/Poco/SharedLibrary_HPUX.h

Foundation/include/Poco/SharedLibrary_UNIX.h

Foundation/include/Poco/SharedLibrary_VMS.h

Foundation/include/Poco/SharedLibrary_WIN32.h

Foundation/include/Poco/SharedLibrary_WIN32U.h

Foundation/include/Poco/SharedPtr.h

Foundation/include/Poco/SignalHandler.h

Foundation/include/Poco/SimpleFileChannel.h

Foundation/include/Poco/SimpleHashTable.h

Foundation/include/Poco/SingletonHolder.h

Foundation/include/Poco/SplitterChannel.h

Foundation/include/Poco/Stopwatch.h

Foundation/include/Poco/StrategyCollection.h

Foundation/include/Poco/StreamChannel.h

Foundation/include/Poco/StreamConverter.h

Foundation/include/Poco/StreamCopier.h

Foundation/include/Poco/StreamTokenizer.h

Foundation/include/Poco/StreamUtil.h

Foundation/include/Poco/String.h

Foundation/include/Poco/StringTokenizer.h

Foundation/include/Poco/SynchronizedObject.h

Foundation/include/Poco/SyslogChannel.h

Foundation/include/Poco/Task.h

Foundation/include/Poco/TaskManager.h

Foundation/include/Poco/TaskNotification.h

Foundation/include/Poco/TeeStream.h

Foundation/include/Poco/TemporaryFile.h

Foundation/include/Poco/TextConverter.h

Foundation/include/Poco/TextEncoding.h

Foundation/include/Poco/TextIterator.h

Foundation/include/Poco/Thread.h

Foundation/include/Poco/ThreadLocal.h

Foundation/include/Poco/ThreadPool.h

Foundation/include/Poco/Thread_POSIX.h

Foundation/include/Poco/Thread_WIN32.h

Foundation/include/Poco/Timer.h

Foundation/include/Poco/Timespan.h

Foundation/include/Poco/Timestamp.h

Foundation/include/Poco/Timezone.h

Foundation/include/Poco/Token.h

Foundation/include/Poco/Types.h

Foundation/include/Poco/URI.h

Foundation/include/Poco/URIStreamFactory.h

Foundation/include/Poco/URIStreamOpener.h

Foundation/include/Poco/UTF16Encoding.h

Foundation/include/Poco/UTF8Encoding.h

Foundation/include/Poco/UUID.h

Foundation/include/Poco/UUIDGenerator.h

Foundation/include/Poco/UnbufferedStreamBuf.h

Foundation/include/Poco/UnicodeConverter.h

Foundation/include/Poco/UniqueExpireCache.h

Foundation/include/Poco/UniqueExpireLRUCache.h

Foundation/include/Poco/UniqueExpireStrategy.h

Foundation/include/Poco/ValidArgs.h

Foundation/include/Poco/Void.h

Foundation/include/Poco/Windows1252Encoding.h

Foundation/include/Poco/zconf.h

Foundation/include/Poco/zlib.h

Foundation/samples

Foundation/samples/ActiveMethod

Foundation/samples/ActiveMethod/ActiveMethod.vmsbuild

Foundation/samples/ActiveMethod/ActiveMethod_vs71.vcproj

Foundation/samples/ActiveMethod/ActiveMethod_vs80.vcproj

Foundation/samples/ActiveMethod/Makefile

Foundation/samples/ActiveMethod/bin

Foundation/samples/ActiveMethod/src

Foundation/samples/ActiveMethod/src/ActiveMethod.cpp

Foundation/samples/Activity

Foundation/samples/Activity/Activity.vmsbuild

Foundation/samples/Activity/Activity_vs71.vcproj

Foundation/samples/Activity/Activity_vs80.vcproj

Foundation/samples/Activity/Makefile

Foundation/samples/Activity/bin

Foundation/samples/Activity/src

Foundation/samples/Activity/src/Activity.cpp

Foundation/samples/BinaryReaderWriter

Foundation/samples/BinaryReaderWriter/BinaryReaderWriter.vmsbuild

Foundation/samples/BinaryReaderWriter/BinaryReaderWriter_vs71.vcproj

Foundation/samples/BinaryReaderWriter/BinaryReaderWriter_vs80.vcproj

Foundation/samples/BinaryReaderWriter/Makefile

Foundation/samples/BinaryReaderWriter/bin

Foundation/samples/BinaryReaderWriter/src

Foundation/samples/BinaryReaderWriter/src/BinaryReaderWriter.cpp

Foundation/samples/DateTime

Foundation/samples/DateTime/DateTime.vmsbuild

Foundation/samples/DateTime/DateTime_vs71.vcproj

Foundation/samples/DateTime/DateTime_vs80.vcproj

Foundation/samples/DateTime/Makefile

Foundation/samples/DateTime/bin

Foundation/samples/DateTime/src

Foundation/samples/DateTime/src/DateTime.cpp

Foundation/samples/Logger

Foundation/samples/Logger/Logger.vmsbuild

Foundation/samples/Logger/Logger_vs71.vcproj

Foundation/samples/Logger/Logger_vs80.vcproj

Foundation/samples/Logger/Makefile

Foundation/samples/Logger/bin

Foundation/samples/Logger/src

Foundation/samples/Logger/src/Logger.cpp

Foundation/samples/Makefile

Foundation/samples/NotificationQueue

Foundation/samples/NotificationQueue/Makefile

Foundation/samples/NotificationQueue/NotificationQueue.vmsbuild

Foundation/samples/NotificationQueue/NotificationQueue_vs71.vcproj

Foundation/samples/NotificationQueue/NotificationQueue_vs80.vcproj

Foundation/samples/NotificationQueue/bin

Foundation/samples/NotificationQueue/src

Foundation/samples/NotificationQueue/src/NotificationQueue.cpp

Foundation/samples/StringTokenizer

Foundation/samples/StringTokenizer/Makefile

Foundation/samples/StringTokenizer/StringTokenizer.vmsbuild

Foundation/samples/StringTokenizer/StringTokenizer_vs71.vcproj

Foundation/samples/StringTokenizer/StringTokenizer_vs80.vcproj

Foundation/samples/StringTokenizer/bin

Foundation/samples/StringTokenizer/src

Foundation/samples/StringTokenizer/src/StringTokenizer.cpp

Foundation/samples/Timer

Foundation/samples/Timer/Makefile

Foundation/samples/Timer/Timer.vmsbuild

Foundation/samples/Timer/Timer_vs71.vcproj

Foundation/samples/Timer/Timer_vs80.vcproj

Foundation/samples/Timer/bin

Foundation/samples/Timer/src

Foundation/samples/Timer/src/Timer.cpp

Foundation/samples/URI

Foundation/samples/URI/Makefile

Foundation/samples/URI/URI.vmsbuild

Foundation/samples/URI/URI_vs71.vcproj

Foundation/samples/URI/URI_vs80.vcproj

Foundation/samples/URI/bin

Foundation/samples/URI/src

Foundation/samples/URI/src/URI.cpp

Foundation/samples/base64decode

Foundation/samples/base64decode/Makefile

Foundation/samples/base64decode/base64decode.vmsbuild

Foundation/samples/base64decode/base64decode_vs71.vcproj

Foundation/samples/base64decode/base64decode_vs80.vcproj

Foundation/samples/base64decode/bin

Foundation/samples/base64decode/src

Foundation/samples/base64decode/src/base64decode.cpp

Foundation/samples/base64encode

Foundation/samples/base64encode/Makefile

Foundation/samples/base64encode/base64encode.vmsbuild

Foundation/samples/base64encode/base64encode_vs71.vcproj

Foundation/samples/base64encode/base64encode_vs80.vcproj

Foundation/samples/base64encode/bin

Foundation/samples/base64encode/src

Foundation/samples/base64encode/src/base64encode.cpp

Foundation/samples/deflate

Foundation/samples/deflate/Makefile

Foundation/samples/deflate/bin

Foundation/samples/deflate/deflate.vmsbuild

Foundation/samples/deflate/deflate_vs71.vcproj

Foundation/samples/deflate/deflate_vs80.vcproj

Foundation/samples/deflate/src

Foundation/samples/deflate/src/deflate.cpp

Foundation/samples/dir

Foundation/samples/dir/Makefile

Foundation/samples/dir/bin

Foundation/samples/dir/dir.vmsbuild

Foundation/samples/dir/dir_vs71.vcproj

Foundation/samples/dir/dir_vs80.vcproj

Foundation/samples/dir/src

Foundation/samples/dir/src/dir.cpp

Foundation/samples/grep

Foundation/samples/grep/Makefile

Foundation/samples/grep/bin

Foundation/samples/grep/grep.vmsbuild

Foundation/samples/grep/grep_vs71.vcproj

Foundation/samples/grep/grep_vs80.vcproj

Foundation/samples/grep/src

Foundation/samples/grep/src/grep.cpp

Foundation/samples/hmacmd5

Foundation/samples/hmacmd5/Makefile

Foundation/samples/hmacmd5/bin

Foundation/samples/hmacmd5/hmacmd5.vmsbuild

Foundation/samples/hmacmd5/hmacmd5_vs71.vcproj

Foundation/samples/hmacmd5/hmacmd5_vs80.vcproj

Foundation/samples/hmacmd5/src

Foundation/samples/hmacmd5/src/hmacmd5.cpp

Foundation/samples/inflate

Foundation/samples/inflate/Makefile

Foundation/samples/inflate/bin

Foundation/samples/inflate/inflate.vmsbuild

Foundation/samples/inflate/inflate_vs71.vcproj

Foundation/samples/inflate/inflate_vs80.vcproj

Foundation/samples/inflate/src

Foundation/samples/inflate/src/inflate.cpp

Foundation/samples/md5

Foundation/samples/md5/Makefile

Foundation/samples/md5/bin

Foundation/samples/md5/md5.vmsbuild

Foundation/samples/md5/md5_vs71.vcproj

Foundation/samples/md5/md5_vs80.vcproj

Foundation/samples/md5/src

Foundation/samples/md5/src/md5.cpp

Foundation/samples/samples_vs71.sln

Foundation/samples/samples_vs80.sln

Foundation/samples/uuidgen

Foundation/samples/uuidgen/Makefile

Foundation/samples/uuidgen/bin

Foundation/samples/uuidgen/src

Foundation/samples/uuidgen/src/uuidgen.cpp

Foundation/samples/uuidgen/uuidgen.vmsbuild

Foundation/samples/uuidgen/uuidgen_vs71.vcproj

Foundation/samples/uuidgen/uuidgen_vs80.vcproj

Foundation/src

Foundation/src/ASCIIEncoding.cpp

Foundation/src/AbstractObserver.cpp

Foundation/src/ActiveDispatcher.cpp

Foundation/src/ArchiveStrategy.cpp

Foundation/src/AsyncChannel.cpp

Foundation/src/Base64Decoder.cpp

Foundation/src/Base64Encoder.cpp

Foundation/src/BinaryReader.cpp

Foundation/src/BinaryWriter.cpp

Foundation/src/Bugcheck.cpp

Foundation/src/ByteOrder.cpp

Foundation/src/Channel.cpp

Foundation/src/Configurable.cpp

Foundation/src/ConsoleChannel.cpp

Foundation/src/CountingStream.cpp

Foundation/src/DateTime.cpp

Foundation/src/DateTimeFormat.cpp

Foundation/src/DateTimeFormatter.cpp

Foundation/src/DateTimeParser.cpp

Foundation/src/Debugger.cpp

Foundation/src/DeflatingStream.cpp

Foundation/src/DigestEngine.cpp

Foundation/src/DigestStream.cpp

Foundation/src/DirectoryIterator.cpp

Foundation/src/DirectoryIterator_UNIX.cpp

Foundation/src/DirectoryIterator_VMS.cpp

Foundation/src/DirectoryIterator_WIN32.cpp

Foundation/src/DirectoryIterator_WIN32U.cpp

Foundation/src/Environment.cpp

Foundation/src/Environment_UNIX.cpp

Foundation/src/Environment_VMS.cpp

Foundation/src/Environment_WIN32.cpp

Foundation/src/Environment_WIN32U.cpp

Foundation/src/ErrorHandler.cpp

Foundation/src/Event.cpp

Foundation/src/EventArgs.cpp

Foundation/src/EventLogChannel.cpp

Foundation/src/Event_POSIX.cpp

Foundation/src/Event_WIN32.cpp

Foundation/src/Exception.cpp

Foundation/src/FPEnvironment.cpp

Foundation/src/FPEnvironment_C99.cpp

Foundation/src/FPEnvironment_DEC.cpp

Foundation/src/FPEnvironment_DUMMY.cpp

Foundation/src/FPEnvironment_SUN.cpp

Foundation/src/FPEnvironment_WIN32.cpp

Foundation/src/File.cpp

Foundation/src/FileChannel.cpp

Foundation/src/FileStreamFactory.cpp

Foundation/src/File_UNIX.cpp

Foundation/src/File_VMS.cpp

Foundation/src/File_WIN32.cpp

Foundation/src/File_WIN32U.cpp

Foundation/src/Format.cpp

Foundation/src/Formatter.cpp

Foundation/src/FormattingChannel.cpp

Foundation/src/Glob.cpp

Foundation/src/HashStatistic.cpp

Foundation/src/HexBinaryDecoder.cpp

Foundation/src/HexBinaryEncoder.cpp

Foundation/src/InflatingStream.cpp

Foundation/src/Latin1Encoding.cpp

Foundation/src/Latin9Encoding.cpp

Foundation/src/LineEndingConverter.cpp

Foundation/src/LocalDateTime.cpp

Foundation/src/LogFile.cpp

Foundation/src/LogFile_STD.cpp

Foundation/src/LogFile_VMS.cpp

Foundation/src/LogFile_WIN32.cpp

Foundation/src/LogFile_WIN32U.cpp

Foundation/src/LogStream.cpp

Foundation/src/Logger.cpp

Foundation/src/LoggingFactory.cpp

Foundation/src/LoggingRegistry.cpp

Foundation/src/MD2Engine.cpp

Foundation/src/MD4Engine.cpp

Foundation/src/MD5Engine.cpp

Foundation/src/MSG00001.bin

Foundation/src/Manifest.cpp

Foundation/src/MemoryPool.cpp

Foundation/src/Message.cpp

Foundation/src/Mutex.cpp

Foundation/src/Mutex_POSIX.cpp

Foundation/src/Mutex_WIN32.cpp

Foundation/src/NamedEvent.cpp

Foundation/src/NamedEvent_UNIX.cpp

Foundation/src/NamedEvent_VMS.cpp

Foundation/src/NamedEvent_WIN32.cpp

Foundation/src/NamedEvent_WIN32U.cpp

Foundation/src/NamedMutex.cpp

Foundation/src/NamedMutex_UNIX.cpp

Foundation/src/NamedMutex_VMS.cpp

Foundation/src/NamedMutex_WIN32.cpp

Foundation/src/NamedMutex_WIN32U.cpp

Foundation/src/NestedDiagnosticContext.cpp

Foundation/src/Notification.cpp

Foundation/src/NotificationCenter.cpp

Foundation/src/NotificationQueue.cpp

Foundation/src/NullChannel.cpp

Foundation/src/NullStream.cpp

Foundation/src/NumberFormatter.cpp

Foundation/src/NumberParser.cpp

Foundation/src/OpcomChannel.cpp

Foundation/src/Path.cpp

Foundation/src/Path_UNIX.cpp

Foundation/src/Path_VMS.cpp

Foundation/src/Path_WIN32.cpp

Foundation/src/Path_WIN32U.cpp

Foundation/src/PatternFormatter.cpp

Foundation/src/Pipe.cpp

Foundation/src/PipeImpl.cpp

Foundation/src/PipeImpl_DUMMY.cpp

Foundation/src/PipeImpl_POSIX.cpp

Foundation/src/PipeImpl_WIN32.cpp

Foundation/src/PipeStream.cpp

Foundation/src/Process.cpp

Foundation/src/Process_UNIX.cpp

Foundation/src/Process_VMS.cpp

Foundation/src/Process_WIN32.cpp

Foundation/src/Process_WIN32U.cpp

Foundation/src/PurgeStrategy.cpp

Foundation/src/RWLock.cpp

Foundation/src/RWLock_POSIX.cpp

Foundation/src/RWLock_WIN32.cpp

Foundation/src/Random.cpp

Foundation/src/RandomStream.cpp

Foundation/src/RefCountedObject.cpp

Foundation/src/RegularExpression.cpp

Foundation/src/RotateStrategy.cpp

Foundation/src/Runnable.cpp

Foundation/src/SHA1Engine.cpp

Foundation/src/Semaphore.cpp

Foundation/src/Semaphore_POSIX.cpp

Foundation/src/Semaphore_WIN32.cpp

Foundation/src/SharedLibrary.cpp

Foundation/src/SharedLibrary_HPUX.cpp

Foundation/src/SharedLibrary_UNIX.cpp

Foundation/src/SharedLibrary_VMS.cpp

Foundation/src/SharedLibrary_WIN32.cpp

Foundation/src/SharedLibrary_WIN32U.cpp

Foundation/src/SignalHandler.cpp

Foundation/src/SimpleFileChannel.cpp

Foundation/src/SplitterChannel.cpp

Foundation/src/Stopwatch.cpp

Foundation/src/StreamChannel.cpp

Foundation/src/StreamConverter.cpp

Foundation/src/StreamCopier.cpp

Foundation/src/StreamTokenizer.cpp

Foundation/src/String.cpp

Foundation/src/StringTokenizer.cpp

Foundation/src/SynchronizedObject.cpp

Foundation/src/SyslogChannel.cpp

Foundation/src/Task.cpp

Foundation/src/TaskManager.cpp

Foundation/src/TaskNotification.cpp

Foundation/src/TeeStream.cpp

Foundation/src/TemporaryFile.cpp

Foundation/src/TextConverter.cpp

Foundation/src/TextEncoding.cpp

Foundation/src/TextIterator.cpp

Foundation/src/Thread.cpp

Foundation/src/ThreadLocal.cpp

Foundation/src/ThreadPool.cpp

Foundation/src/Thread_POSIX.cpp

Foundation/src/Thread_WIN32.cpp

Foundation/src/Timer.cpp

Foundation/src/Timespan.cpp

Foundation/src/Timestamp.cpp

Foundation/src/Timezone.cpp

Foundation/src/Timezone_UNIX.cpp

Foundation/src/Timezone_WIN32.cpp

Foundation/src/Token.cpp

Foundation/src/URI.cpp

Foundation/src/URIStreamFactory.cpp

Foundation/src/URIStreamOpener.cpp

Foundation/src/UTF16Encoding.cpp

Foundation/src/UTF8Encoding.cpp

Foundation/src/UUID.cpp

Foundation/src/UUIDGenerator.cpp

Foundation/src/UnicodeConverter.cpp

Foundation/src/Void.cpp

Foundation/src/Windows1252Encoding.cpp

Foundation/src/adler32.c

Foundation/src/chartables.c

Foundation/src/compress.c

Foundation/src/crc32.c

Foundation/src/crc32.h

Foundation/src/deflate.c

Foundation/src/deflate.h

Foundation/src/get.c

Foundation/src/gzio.c

Foundation/src/infback.c

Foundation/src/inffast.c

Foundation/src/inffast.h

Foundation/src/inffixed.h

Foundation/src/inflate.c

Foundation/src/inflate.h

Foundation/src/inftrees.c

Foundation/src/inftrees.h

Foundation/src/maketables.c

Foundation/src/pcre.c

Foundation/src/pcre.h

Foundation/src/pcreconfig.h

Foundation/src/pcreinternal.h

Foundation/src/pocomsg.h

Foundation/src/pocomsg.mc

Foundation/src/pocomsg.rc

Foundation/src/study.c

Foundation/src/trees.c

Foundation/src/trees.h

Foundation/src/zconf.h

Foundation/src/zlib.h

Foundation/src/zutil.c

Foundation/src/zutil.h

Foundation/testsuite

Foundation/testsuite/Makefile

Foundation/testsuite/Makefile-Driver

Foundation/testsuite/Makefile-TestApp

Foundation/testsuite/Makefile-TestLibrary

Foundation/testsuite/TestApp_vs71.vcproj

Foundation/testsuite/TestApp_vs80.vcproj

Foundation/testsuite/TestLibrary_vs71.vcproj

Foundation/testsuite/TestLibrary_vs80.vcproj

Foundation/testsuite/TestSuite_vs71.vcproj

Foundation/testsuite/TestSuite_vs80.vcproj

Foundation/testsuite/src

Foundation/testsuite/src/ActiveDispatcherTest.cpp

Foundation/testsuite/src/ActiveDispatcherTest.h

Foundation/testsuite/src/ActiveMethodTest.cpp

Foundation/testsuite/src/ActiveMethodTest.h

Foundation/testsuite/src/ActivityTest.cpp

Foundation/testsuite/src/ActivityTest.h

Foundation/testsuite/src/AnyTest.cpp

Foundation/testsuite/src/AnyTest.h

Foundation/testsuite/src/AutoPtrTest.cpp

Foundation/testsuite/src/AutoPtrTest.h

Foundation/testsuite/src/AutoReleasePoolTest.cpp

Foundation/testsuite/src/AutoReleasePoolTest.h

Foundation/testsuite/src/Base64Test.cpp

Foundation/testsuite/src/Base64Test.h

Foundation/testsuite/src/BasicEventTest.cpp

Foundation/testsuite/src/BasicEventTest.h

Foundation/testsuite/src/BinaryReaderWriterTest.cpp

Foundation/testsuite/src/BinaryReaderWriterTest.h

Foundation/testsuite/src/ByteOrderTest.cpp

Foundation/testsuite/src/ByteOrderTest.h

Foundation/testsuite/src/CacheTestSuite.cpp

Foundation/testsuite/src/CacheTestSuite.h

Foundation/testsuite/src/ChannelTest.cpp

Foundation/testsuite/src/ChannelTest.h

Foundation/testsuite/src/ClassLoaderTest.cpp

Foundation/testsuite/src/ClassLoaderTest.h

Foundation/testsuite/src/CoreTest.cpp

Foundation/testsuite/src/CoreTest.h

Foundation/testsuite/src/CoreTestSuite.cpp

Foundation/testsuite/src/CoreTestSuite.h

Foundation/testsuite/src/CountingStreamTest.cpp

Foundation/testsuite/src/CountingStreamTest.h

Foundation/testsuite/src/CryptTestSuite.cpp

Foundation/testsuite/src/CryptTestSuite.h

Foundation/testsuite/src/DateTimeFormatterTest.cpp

Foundation/testsuite/src/DateTimeFormatterTest.h

Foundation/testsuite/src/DateTimeParserTest.cpp

Foundation/testsuite/src/DateTimeParserTest.h

Foundation/testsuite/src/DateTimeTest.cpp

Foundation/testsuite/src/DateTimeTest.h

Foundation/testsuite/src/DateTimeTestSuite.cpp

Foundation/testsuite/src/DateTimeTestSuite.h

Foundation/testsuite/src/DigestStreamTest.cpp

Foundation/testsuite/src/DigestStreamTest.h

Foundation/testsuite/src/Driver.cpp

Foundation/testsuite/src/DummyDelegate.cpp

Foundation/testsuite/src/DummyDelegate.h

Foundation/testsuite/src/DynamicFactoryTest.cpp

Foundation/testsuite/src/DynamicFactoryTest.h

Foundation/testsuite/src/EventTestSuite.cpp

Foundation/testsuite/src/EventTestSuite.h

Foundation/testsuite/src/ExpireCacheTest.cpp

Foundation/testsuite/src/ExpireCacheTest.h

Foundation/testsuite/src/ExpireLRUCacheTest.cpp

Foundation/testsuite/src/ExpireLRUCacheTest.h

Foundation/testsuite/src/FIFOEventTest.cpp

Foundation/testsuite/src/FIFOEventTest.h

Foundation/testsuite/src/FPETest.cpp

Foundation/testsuite/src/FPETest.h

Foundation/testsuite/src/FileChannelTest.cpp

Foundation/testsuite/src/FileChannelTest.h

Foundation/testsuite/src/FileTest.cpp

Foundation/testsuite/src/FileTest.h

Foundation/testsuite/src/FilesystemTestSuite.cpp

Foundation/testsuite/src/FilesystemTestSuite.h

Foundation/testsuite/src/FormatTest.cpp

Foundation/testsuite/src/FormatTest.h

Foundation/testsuite/src/FoundationTestSuite.cpp

Foundation/testsuite/src/FoundationTestSuite.h

Foundation/testsuite/src/GlobTest.cpp

Foundation/testsuite/src/GlobTest.h

Foundation/testsuite/src/HMACEngineTest.cpp

Foundation/testsuite/src/HMACEngineTest.h

Foundation/testsuite/src/HashTest.cpp

Foundation/testsuite/src/HashTest.h

Foundation/testsuite/src/HexBinaryTest.cpp

Foundation/testsuite/src/HexBinaryTest.h

Foundation/testsuite/src/LRUCacheTest.cpp

Foundation/testsuite/src/LRUCacheTest.h

Foundation/testsuite/src/LineEndingConverterTest.cpp

Foundation/testsuite/src/LineEndingConverterTest.h

Foundation/testsuite/src/LocalDateTimeTest.cpp

Foundation/testsuite/src/LocalDateTimeTest.h

Foundation/testsuite/src/LogStreamTest.cpp

Foundation/testsuite/src/LogStreamTest.h

Foundation/testsuite/src/LoggerTest.cpp

Foundation/testsuite/src/LoggerTest.h

Foundation/testsuite/src/LoggingFactoryTest.cpp

Foundation/testsuite/src/LoggingFactoryTest.h

Foundation/testsuite/src/LoggingRegistryTest.cpp

Foundation/testsuite/src/LoggingRegistryTest.h

Foundation/testsuite/src/LoggingTestSuite.cpp

Foundation/testsuite/src/LoggingTestSuite.h

Foundation/testsuite/src/MD2EngineTest.cpp

Foundation/testsuite/src/MD2EngineTest.h

Foundation/testsuite/src/MD4EngineTest.cpp

Foundation/testsuite/src/MD4EngineTest.h

Foundation/testsuite/src/MD5EngineTest.cpp

Foundation/testsuite/src/MD5EngineTest.h

Foundation/testsuite/src/ManifestTest.cpp

Foundation/testsuite/src/ManifestTest.h

Foundation/testsuite/src/MemoryPoolTest.cpp

Foundation/testsuite/src/MemoryPoolTest.h

Foundation/testsuite/src/NDCTest.cpp

Foundation/testsuite/src/NDCTest.h

Foundation/testsuite/src/NamedEventTest.cpp

Foundation/testsuite/src/NamedEventTest.h

Foundation/testsuite/src/NamedMutexTest.cpp

Foundation/testsuite/src/NamedMutexTest.h

Foundation/testsuite/src/NotificationCenterTest.cpp

Foundation/testsuite/src/NotificationCenterTest.h

Foundation/testsuite/src/NotificationQueueTest.cpp

Foundation/testsuite/src/NotificationQueueTest.h

Foundation/testsuite/src/NotificationsTestSuite.cpp

Foundation/testsuite/src/NotificationsTestSuite.h

Foundation/testsuite/src/NullStreamTest.cpp

Foundation/testsuite/src/NullStreamTest.h

Foundation/testsuite/src/NumberFormatterTest.cpp

Foundation/testsuite/src/NumberFormatterTest.h

Foundation/testsuite/src/NumberParserTest.cpp

Foundation/testsuite/src/NumberParserTest.h

Foundation/testsuite/src/PathTest.cpp

Foundation/testsuite/src/PathTest.h

Foundation/testsuite/src/PatternFormatterTest.cpp

Foundation/testsuite/src/PatternFormatterTest.h

Foundation/testsuite/src/PriorityEventTest.cpp

Foundation/testsuite/src/PriorityEventTest.h

Foundation/testsuite/src/ProcessTest.cpp

Foundation/testsuite/src/ProcessTest.h

Foundation/testsuite/src/ProcessesTestSuite.cpp

Foundation/testsuite/src/ProcessesTestSuite.h

Foundation/testsuite/src/RWLockTest.cpp

Foundation/testsuite/src/RWLockTest.h

Foundation/testsuite/src/RandomStreamTest.cpp

Foundation/testsuite/src/RandomStreamTest.h

Foundation/testsuite/src/RandomTest.cpp

Foundation/testsuite/src/RandomTest.h

Foundation/testsuite/src/RegularExpressionTest.cpp

Foundation/testsuite/src/RegularExpressionTest.h

Foundation/testsuite/src/SHA1EngineTest.cpp

Foundation/testsuite/src/SHA1EngineTest.h

Foundation/testsuite/src/SemaphoreTest.cpp

Foundation/testsuite/src/SemaphoreTest.h

Foundation/testsuite/src/SharedLibraryTest.cpp

Foundation/testsuite/src/SharedLibraryTest.h

Foundation/testsuite/src/SharedLibraryTestSuite.cpp

Foundation/testsuite/src/SharedLibraryTestSuite.h

Foundation/testsuite/src/SharedPtrTest.cpp

Foundation/testsuite/src/SharedPtrTest.h

Foundation/testsuite/src/SimpleFileChannelTest.cpp

Foundation/testsuite/src/SimpleFileChannelTest.h

Foundation/testsuite/src/StopwatchTest.cpp

Foundation/testsuite/src/StopwatchTest.h

Foundation/testsuite/src/StreamConverterTest.cpp

Foundation/testsuite/src/StreamConverterTest.h

Foundation/testsuite/src/StreamCopierTest.cpp

Foundation/testsuite/src/StreamCopierTest.h

Foundation/testsuite/src/StreamTokenizerTest.cpp

Foundation/testsuite/src/StreamTokenizerTest.h

Foundation/testsuite/src/StreamsTestSuite.cpp

Foundation/testsuite/src/StreamsTestSuite.h

Foundation/testsuite/src/StringTest.cpp

Foundation/testsuite/src/StringTest.h

Foundation/testsuite/src/StringTokenizerTest.cpp

Foundation/testsuite/src/StringTokenizerTest.h

Foundation/testsuite/src/TaskManagerTest.cpp

Foundation/testsuite/src/TaskManagerTest.h

Foundation/testsuite/src/TaskTest.cpp

Foundation/testsuite/src/TaskTest.h

Foundation/testsuite/src/TaskTestSuite.cpp

Foundation/testsuite/src/TaskTestSuite.h

Foundation/testsuite/src/TeeStreamTest.cpp

Foundation/testsuite/src/TeeStreamTest.h

Foundation/testsuite/src/TestApp.cpp

Foundation/testsuite/src/TestChannel.cpp

Foundation/testsuite/src/TestChannel.h

Foundation/testsuite/src/TestLibrary.cpp

Foundation/testsuite/src/TestPlugin.cpp

Foundation/testsuite/src/TestPlugin.h

Foundation/testsuite/src/TextConverterTest.cpp

Foundation/testsuite/src/TextConverterTest.h

Foundation/testsuite/src/TextIteratorTest.cpp

Foundation/testsuite/src/TextIteratorTest.h

Foundation/testsuite/src/TextTestSuite.cpp

Foundation/testsuite/src/TextTestSuite.h

Foundation/testsuite/src/ThreadLocalTest.cpp

Foundation/testsuite/src/ThreadLocalTest.h

Foundation/testsuite/src/ThreadPoolTest.cpp

Foundation/testsuite/src/ThreadPoolTest.h

Foundation/testsuite/src/ThreadTest.cpp

Foundation/testsuite/src/ThreadTest.h

Foundation/testsuite/src/ThreadingTestSuite.cpp

Foundation/testsuite/src/ThreadingTestSuite.h

Foundation/testsuite/src/TimerTest.cpp

Foundation/testsuite/src/TimerTest.h

Foundation/testsuite/src/TimespanTest.cpp

Foundation/testsuite/src/TimespanTest.h

Foundation/testsuite/src/TimestampTest.cpp

Foundation/testsuite/src/TimestampTest.h

Foundation/testsuite/src/TimezoneTest.cpp

Foundation/testsuite/src/TimezoneTest.h

Foundation/testsuite/src/URIStreamOpenerTest.cpp

Foundation/testsuite/src/URIStreamOpenerTest.h

Foundation/testsuite/src/URITest.cpp

Foundation/testsuite/src/URITest.h

Foundation/testsuite/src/URITestSuite.cpp

Foundation/testsuite/src/URITestSuite.h

Foundation/testsuite/src/UUIDGeneratorTest.cpp

Foundation/testsuite/src/UUIDGeneratorTest.h

Foundation/testsuite/src/UUIDTest.cpp

Foundation/testsuite/src/UUIDTest.h

Foundation/testsuite/src/UUIDTestSuite.cpp

Foundation/testsuite/src/UUIDTestSuite.h

Foundation/testsuite/src/UniqueExpireCacheTest.cpp

Foundation/testsuite/src/UniqueExpireCacheTest.h

Foundation/testsuite/src/UniqueExpireLRUCacheTest.cpp

Foundation/testsuite/src/UniqueExpireLRUCacheTest.h

Foundation/testsuite/src/WinDriver.cpp

Foundation/testsuite/src/ZLibTest.cpp

Foundation/testsuite/src/ZLibTest.h

Foundation/testsuite/testapp.vmsbuild

Foundation/testsuite/testlibrary.opt

Foundation/testsuite/testlibrary.vmsbuild

Foundation/testsuite/testsuite.vmsbuild

LICENSE

MANIFEST

Makefile

NEWS

Net/Makefile

Net/Net.vmsbuild

Net/Net_vs71.sln

Net/Net_vs71.vcproj

Net/Net_vs80.sln

Net/Net_vs80.vcproj

Net/include

Net/include/Poco

Net/include/Poco/Net

Net/include/Poco/Net/DNS.h

Net/include/Poco/Net/DatagramSocket.h

Net/include/Poco/Net/DatagramSocketImpl.h

Net/include/Poco/Net/DialogSocket.h

Net/include/Poco/Net/FTPClientSession.h

Net/include/Poco/Net/FTPStreamFactory.h

Net/include/Poco/Net/FilePartSource.h

Net/include/Poco/Net/HTMLForm.h

Net/include/Poco/Net/HTTPBasicCredentials.h

Net/include/Poco/Net/HTTPBasicStreamBuf.h

Net/include/Poco/Net/HTTPBufferAllocator.h

Net/include/Poco/Net/HTTPChunkedStream.h

Net/include/Poco/Net/HTTPClientSession.h

Net/include/Poco/Net/HTTPCookie.h

Net/include/Poco/Net/HTTPFixedLengthStream.h

Net/include/Poco/Net/HTTPHeaderStream.h

Net/include/Poco/Net/HTTPIOStream.h

Net/include/Poco/Net/HTTPMessage.h

Net/include/Poco/Net/HTTPRequest.h

Net/include/Poco/Net/HTTPRequestHandler.h

Net/include/Poco/Net/HTTPRequestHandlerFactory.h

Net/include/Poco/Net/HTTPResponse.h

Net/include/Poco/Net/HTTPServer.h

Net/include/Poco/Net/HTTPServerConnection.h

Net/include/Poco/Net/HTTPServerConnectionFactory.h

Net/include/Poco/Net/HTTPServerParams.h

Net/include/Poco/Net/HTTPServerRequest.h

Net/include/Poco/Net/HTTPServerResponse.h

Net/include/Poco/Net/HTTPServerSession.h

Net/include/Poco/Net/HTTPSession.h

Net/include/Poco/Net/HTTPSessionFactory.h

Net/include/Poco/Net/HTTPSessionInstantiator.h

Net/include/Poco/Net/HTTPStream.h

Net/include/Poco/Net/HTTPStreamFactory.h

Net/include/Poco/Net/HostEntry.h

Net/include/Poco/Net/ICMPClient.h

Net/include/Poco/Net/ICMPEventArgs.h

Net/include/Poco/Net/ICMPPacket.h

Net/include/Poco/Net/ICMPPacketImpl.h

Net/include/Poco/Net/ICMPSocket.h

Net/include/Poco/Net/ICMPSocketImpl.h

Net/include/Poco/Net/ICMPv4PacketImpl.h

Net/include/Poco/Net/IPAddress.h

Net/include/Poco/Net/MailMessage.h

Net/include/Poco/Net/MailRecipient.h

Net/include/Poco/Net/MailStream.h

Net/include/Poco/Net/MediaType.h

Net/include/Poco/Net/MessageHeader.h

Net/include/Poco/Net/MulticastSocket.h

Net/include/Poco/Net/MultipartReader.h

Net/include/Poco/Net/MultipartWriter.h

Net/include/Poco/Net/NameValueCollection.h

Net/include/Poco/Net/Net.h

Net/include/Poco/Net/NetException.h

Net/include/Poco/Net/NetworkInterface.h

Net/include/Poco/Net/NullPartHandler.h

Net/include/Poco/Net/POP3ClientSession.h

Net/include/Poco/Net/PartHandler.h

Net/include/Poco/Net/PartSource.h

Net/include/Poco/Net/QuotedPrintableDecoder.h

Net/include/Poco/Net/QuotedPrintableEncoder.h

Net/include/Poco/Net/RawSocket.h

Net/include/Poco/Net/RawSocketImpl.h

Net/include/Poco/Net/SMTPClientSession.h

Net/include/Poco/Net/ServerSocket.h

Net/include/Poco/Net/ServerSocketImpl.h

Net/include/Poco/Net/Socket.h

Net/include/Poco/Net/SocketAcceptor.h

Net/include/Poco/Net/SocketAddress.h

Net/include/Poco/Net/SocketConnector.h

Net/include/Poco/Net/SocketDefs.h

Net/include/Poco/Net/SocketImpl.h

Net/include/Poco/Net/SocketNotification.h

Net/include/Poco/Net/SocketNotifier.h

Net/include/Poco/Net/SocketReactor.h

Net/include/Poco/Net/SocketStream.h

Net/include/Poco/Net/StreamSocket.h

Net/include/Poco/Net/StreamSocketImpl.h

Net/include/Poco/Net/StringPartSource.h

Net/include/Poco/Net/TCPServer.h

Net/include/Poco/Net/TCPServerConnection.h

Net/include/Poco/Net/TCPServerConnectionFactory.h

Net/include/Poco/Net/TCPServerDispatcher.h

Net/include/Poco/Net/TCPServerParams.h

Net/samples

Net/samples/EchoServer

Net/samples/EchoServer/EchoServer.properties

Net/samples/EchoServer/EchoServer.vmsbuild

Net/samples/EchoServer/EchoServer_vs71.vcproj

Net/samples/EchoServer/EchoServer_vs80.vcproj

Net/samples/EchoServer/Makefile

Net/samples/EchoServer/bin

Net/samples/EchoServer/src

Net/samples/EchoServer/src/EchoServer.cpp

Net/samples/HTTPFormServer

Net/samples/HTTPFormServer/HTTPFormServer.properties

Net/samples/HTTPFormServer/HTTPFormServer.vmsbuild

Net/samples/HTTPFormServer/HTTPFormServer_vs71.vcproj

Net/samples/HTTPFormServer/HTTPFormServer_vs80.vcproj

Net/samples/HTTPFormServer/Makefile

Net/samples/HTTPFormServer/bin

Net/samples/HTTPFormServer/src

Net/samples/HTTPFormServer/src/HTTPFormServer.cpp

Net/samples/HTTPLoadTest

Net/samples/HTTPLoadTest/HTTPLoadTest.vmsbuild

Net/samples/HTTPLoadTest/HTTPLoadTest_vs71.vcproj

Net/samples/HTTPLoadTest/HTTPLoadTest_vs80.vcproj

Net/samples/HTTPLoadTest/Makefile

Net/samples/HTTPLoadTest/bin

Net/samples/HTTPLoadTest/src

Net/samples/HTTPLoadTest/src/HTTPLoadTest.cpp

Net/samples/HTTPTimeServer

Net/samples/HTTPTimeServer/HTTPTimeServer.properties

Net/samples/HTTPTimeServer/HTTPTimeServer.vmsbuild

Net/samples/HTTPTimeServer/HTTPTimeServer_vs71.vcproj

Net/samples/HTTPTimeServer/HTTPTimeServer_vs80.vcproj

Net/samples/HTTPTimeServer/Makefile

Net/samples/HTTPTimeServer/bin

Net/samples/HTTPTimeServer/src

Net/samples/HTTPTimeServer/src/HTTPTimeServer.cpp

Net/samples/Mail

Net/samples/Mail/Mail.vmsbuild

Net/samples/Mail/Mail_vs71.vcproj

Net/samples/Mail/Mail_vs80.vcproj

Net/samples/Mail/Makefile

Net/samples/Mail/bin

Net/samples/Mail/src

Net/samples/Mail/src/Mail.cpp

Net/samples/Mail/src/PocoLogo.hpp

Net/samples/Makefile

Net/samples/Ping

Net/samples/Ping/Makefile

Net/samples/Ping/Ping.vmsbuild

Net/samples/Ping/Ping_vs71.vcproj

Net/samples/Ping/Ping_vs80.vcproj

Net/samples/Ping/bin

Net/samples/Ping/ping.properties

Net/samples/Ping/src

Net/samples/Ping/src/Ping.cpp

Net/samples/TimeServer

Net/samples/TimeServer/Makefile

Net/samples/TimeServer/TimeServer.properties

Net/samples/TimeServer/TimeServer.vmsbuild

Net/samples/TimeServer/TimeServer_vs71.vcproj

Net/samples/TimeServer/TimeServer_vs80.vcproj

Net/samples/TimeServer/bin

Net/samples/TimeServer/src

Net/samples/TimeServer/src/TimeServer.cpp

Net/samples/dict

Net/samples/dict/Makefile

Net/samples/dict/bin

Net/samples/dict/dict.vmsbuild

Net/samples/dict/dict_vs71.vcproj

Net/samples/dict/dict_vs80.vcproj

Net/samples/dict/src

Net/samples/dict/src/dict.cpp

Net/samples/download

Net/samples/download/Makefile

Net/samples/download/bin

Net/samples/download/download.vmsbuild

Net/samples/download/download_vs71.vcproj

Net/samples/download/download_vs80.vcproj

Net/samples/download/src

Net/samples/download/src/download.cpp

Net/samples/httpget

Net/samples/httpget/Makefile

Net/samples/httpget/bin

Net/samples/httpget/httpget.vmsbuild

Net/samples/httpget/httpget_vs71.vcproj

Net/samples/httpget/httpget_vs80.vcproj

Net/samples/httpget/src

Net/samples/httpget/src/httpget.cpp

Net/samples/samples_vs71.sln

Net/samples/samples_vs80.sln

Net/src

Net/src/DNS.cpp

Net/src/DatagramSocket.cpp

Net/src/DatagramSocketImpl.cpp

Net/src/DialogSocket.cpp

Net/src/FTPClientSession.cpp

Net/src/FTPStreamFactory.cpp

Net/src/FilePartSource.cpp

Net/src/HTMLForm.cpp

Net/src/HTTPBasicCredentials.cpp

Net/src/HTTPBufferAllocator.cpp

Net/src/HTTPChunkedStream.cpp

Net/src/HTTPClientSession.cpp

Net/src/HTTPCookie.cpp

Net/src/HTTPFixedLengthStream.cpp

Net/src/HTTPHeaderStream.cpp

Net/src/HTTPIOStream.cpp

Net/src/HTTPMessage.cpp

Net/src/HTTPRequest.cpp

Net/src/HTTPRequestHandler.cpp

Net/src/HTTPRequestHandlerFactory.cpp

Net/src/HTTPResponse.cpp

Net/src/HTTPServer.cpp

Net/src/HTTPServerConnection.cpp

Net/src/HTTPServerConnectionFactory.cpp

Net/src/HTTPServerParams.cpp

Net/src/HTTPServerRequest.cpp

Net/src/HTTPServerResponse.cpp

Net/src/HTTPServerSession.cpp

Net/src/HTTPSession.cpp

Net/src/HTTPSessionFactory.cpp

Net/src/HTTPSessionInstantiator.cpp

Net/src/HTTPStream.cpp

Net/src/HTTPStreamFactory.cpp

Net/src/HostEntry.cpp

Net/src/ICMPClient.cpp

Net/src/ICMPEventArgs.cpp

Net/src/ICMPPacket.cpp

Net/src/ICMPPacketImpl.cpp

Net/src/ICMPSocket.cpp

Net/src/ICMPSocketImpl.cpp

Net/src/ICMPv4PacketImpl.cpp

Net/src/IPAddress.cpp

Net/src/MailMessage.cpp

Net/src/MailRecipient.cpp

Net/src/MailStream.cpp

Net/src/MediaType.cpp

Net/src/MessageHeader.cpp

Net/src/MulticastSocket.cpp

Net/src/MultipartReader.cpp

Net/src/MultipartWriter.cpp

Net/src/NameValueCollection.cpp

Net/src/NetException.cpp

Net/src/NetworkInterface.cpp

Net/src/NullPartHandler.cpp

Net/src/POP3ClientSession.cpp

Net/src/PartHandler.cpp

Net/src/PartSource.cpp

Net/src/QuotedPrintableDecoder.cpp

Net/src/QuotedPrintableEncoder.cpp

Net/src/RawSocket.cpp

Net/src/RawSocketImpl.cpp

Net/src/SMTPClientSession.cpp

Net/src/ServerSocket.cpp

Net/src/ServerSocketImpl.cpp

Net/src/Socket.cpp

Net/src/SocketAddress.cpp

Net/src/SocketImpl.cpp

Net/src/SocketNotification.cpp

Net/src/SocketNotifier.cpp

Net/src/SocketReactor.cpp

Net/src/SocketStream.cpp

Net/src/StreamSocket.cpp

Net/src/StreamSocketImpl.cpp

Net/src/StringPartSource.cpp

Net/src/TCPServer.cpp

Net/src/TCPServerConnection.cpp

Net/src/TCPServerConnectionFactory.cpp

Net/src/TCPServerDispatcher.cpp

Net/src/TCPServerParams.cpp

Net/testsuite

Net/testsuite/Makefile

Net/testsuite/TestSuite.vmsbuild

Net/testsuite/TestSuite_vs71.vcproj

Net/testsuite/TestSuite_vs80.vcproj

Net/testsuite/src

Net/testsuite/src/DNSTest.cpp

Net/testsuite/src/DNSTest.h

Net/testsuite/src/DatagramSocketTest.cpp

Net/testsuite/src/DatagramSocketTest.h

Net/testsuite/src/DialogServer.cpp

Net/testsuite/src/DialogServer.h

Net/testsuite/src/DialogSocketTest.cpp

Net/testsuite/src/DialogSocketTest.h

Net/testsuite/src/Driver.cpp

Net/testsuite/src/EchoServer.cpp

Net/testsuite/src/EchoServer.h

Net/testsuite/src/FTPClientSessionTest.cpp

Net/testsuite/src/FTPClientSessionTest.h

Net/testsuite/src/FTPClientTestSuite.cpp

Net/testsuite/src/FTPClientTestSuite.h

Net/testsuite/src/FTPStreamFactoryTest.cpp

Net/testsuite/src/FTPStreamFactoryTest.h

Net/testsuite/src/HTMLFormTest.cpp

Net/testsuite/src/HTMLFormTest.h

Net/testsuite/src/HTMLTestSuite.cpp

Net/testsuite/src/HTMLTestSuite.h

Net/testsuite/src/HTTPClientSessionTest.cpp

Net/testsuite/src/HTTPClientSessionTest.h

Net/testsuite/src/HTTPClientTestSuite.cpp

Net/testsuite/src/HTTPClientTestSuite.h

Net/testsuite/src/HTTPCookieTest.cpp

Net/testsuite/src/HTTPCookieTest.h

Net/testsuite/src/HTTPCredentialsTest.cpp

Net/testsuite/src/HTTPCredentialsTest.h

Net/testsuite/src/HTTPRequestTest.cpp

Net/testsuite/src/HTTPRequestTest.h

Net/testsuite/src/HTTPResponseTest.cpp

Net/testsuite/src/HTTPResponseTest.h

Net/testsuite/src/HTTPServerTest.cpp

Net/testsuite/src/HTTPServerTest.h

Net/testsuite/src/HTTPServerTestSuite.cpp

Net/testsuite/src/HTTPServerTestSuite.h

Net/testsuite/src/HTTPStreamFactoryTest.cpp

Net/testsuite/src/HTTPStreamFactoryTest.h

Net/testsuite/src/HTTPTestServer.cpp

Net/testsuite/src/HTTPTestServer.h

Net/testsuite/src/HTTPTestSuite.cpp

Net/testsuite/src/HTTPTestSuite.h

Net/testsuite/src/ICMPClientTest.cpp

Net/testsuite/src/ICMPClientTest.h

Net/testsuite/src/ICMPClientTestSuite.cpp

Net/testsuite/src/ICMPClientTestSuite.h

Net/testsuite/src/ICMPSocketTest.cpp

Net/testsuite/src/ICMPSocketTest.h

Net/testsuite/src/IPAddressTest.cpp

Net/testsuite/src/IPAddressTest.h

Net/testsuite/src/MailMessageTest.cpp

Net/testsuite/src/MailMessageTest.h

Net/testsuite/src/MailStreamTest.cpp

Net/testsuite/src/MailStreamTest.h

Net/testsuite/src/MailTestSuite.cpp

Net/testsuite/src/MailTestSuite.h

Net/testsuite/src/MediaTypeTest.cpp

Net/testsuite/src/MediaTypeTest.h

Net/testsuite/src/MessageHeaderTest.cpp

Net/testsuite/src/MessageHeaderTest.h

Net/testsuite/src/MessagesTestSuite.cpp

Net/testsuite/src/MessagesTestSuite.h

Net/testsuite/src/MulticastEchoServer.cpp

Net/testsuite/src/MulticastEchoServer.h

Net/testsuite/src/MulticastSocketTest.cpp

Net/testsuite/src/MulticastSocketTest.h

Net/testsuite/src/MultipartReaderTest.cpp

Net/testsuite/src/MultipartReaderTest.h

Net/testsuite/src/MultipartWriterTest.cpp

Net/testsuite/src/MultipartWriterTest.h

Net/testsuite/src/NameValueCollectionTest.cpp

Net/testsuite/src/NameValueCollectionTest.h

Net/testsuite/src/NetCoreTestSuite.cpp

Net/testsuite/src/NetCoreTestSuite.h

Net/testsuite/src/NetTestSuite.cpp

Net/testsuite/src/NetTestSuite.h

Net/testsuite/src/NetworkInterfaceTest.cpp

Net/testsuite/src/NetworkInterfaceTest.h

Net/testsuite/src/POP3ClientSessionTest.cpp

Net/testsuite/src/POP3ClientSessionTest.h

Net/testsuite/src/QuotedPrintableTest.cpp

Net/testsuite/src/QuotedPrintableTest.h

Net/testsuite/src/RawSocketTest.cpp

Net/testsuite/src/RawSocketTest.h

Net/testsuite/src/ReactorTestSuite.cpp

Net/testsuite/src/ReactorTestSuite.h

Net/testsuite/src/SMTPClientSessionTest.cpp

Net/testsuite/src/SMTPClientSessionTest.h

Net/testsuite/src/SocketAddressTest.cpp

Net/testsuite/src/SocketAddressTest.h

Net/testsuite/src/SocketReactorTest.cpp

Net/testsuite/src/SocketReactorTest.h

Net/testsuite/src/SocketStreamTest.cpp

Net/testsuite/src/SocketStreamTest.h

Net/testsuite/src/SocketTest.cpp

Net/testsuite/src/SocketTest.h

Net/testsuite/src/SocketsTestSuite.cpp

Net/testsuite/src/SocketsTestSuite.h

Net/testsuite/src/TCPServerTest.cpp

Net/testsuite/src/TCPServerTest.h

Net/testsuite/src/TCPServerTestSuite.cpp

Net/testsuite/src/TCPServerTestSuite.h

Net/testsuite/src/UDPEchoServer.cpp

Net/testsuite/src/UDPEchoServer.h

Net/testsuite/src/WinDriver.cpp

NetSSL_OpenSSL

NetSSL_OpenSSL/Makefile

NetSSL_OpenSSL/NetSSL_OpenSSL_vs71.sln

NetSSL_OpenSSL/NetSSL_OpenSSL_vs71.vcproj

NetSSL_OpenSSL/NetSSL_OpenSSL_vs80.sln

NetSSL_OpenSSL/NetSSL_OpenSSL_vs80.vcproj

NetSSL_OpenSSL/doc

NetSSL_OpenSSL/doc/howtobuild.txt

NetSSL_OpenSSL/include

NetSSL_OpenSSL/include/Poco

NetSSL_OpenSSL/include/Poco/Net

NetSSL_OpenSSL/include/Poco/Net/AcceptCertificateHandler.h

NetSSL_OpenSSL/include/Poco/Net/CertificateHandlerFactory.h

NetSSL_OpenSSL/include/Poco/Net/CertificateHandlerFactoryMgr.h

NetSSL_OpenSSL/include/Poco/Net/ConsoleCertificateHandler.h

NetSSL_OpenSSL/include/Poco/Net/Context.h

NetSSL_OpenSSL/include/Poco/Net/HTTPSClientSession.h

NetSSL_OpenSSL/include/Poco/Net/HTTPSSessionInstantiator.h

NetSSL_OpenSSL/include/Poco/Net/HTTPSStreamFactory.h

NetSSL_OpenSSL/include/Poco/Net/InvalidCertificateHandler.h

NetSSL_OpenSSL/include/Poco/Net/KeyConsoleHandler.h

NetSSL_OpenSSL/include/Poco/Net/KeyFileHandler.h

NetSSL_OpenSSL/include/Poco/Net/NetSSL.h

NetSSL_OpenSSL/include/Poco/Net/PrivateKeyFactory.h

NetSSL_OpenSSL/include/Poco/Net/PrivateKeyFactoryMgr.h

NetSSL_OpenSSL/include/Poco/Net/PrivateKeyPassphraseHandler.h

NetSSL_OpenSSL/include/Poco/Net/SSLException.h

NetSSL_OpenSSL/include/Poco/Net/SSLInitializer.h

NetSSL_OpenSSL/include/Poco/Net/SSLManager.h

NetSSL_OpenSSL/include/Poco/Net/SecureServerSocket.h

NetSSL_OpenSSL/include/Poco/Net/SecureServerSocketImpl.h

NetSSL_OpenSSL/include/Poco/Net/SecureSocketImpl.h

NetSSL_OpenSSL/include/Poco/Net/SecureStreamSocket.h

NetSSL_OpenSSL/include/Poco/Net/SecureStreamSocketImpl.h

NetSSL_OpenSSL/include/Poco/Net/Utility.h

NetSSL_OpenSSL/include/Poco/Net/VerificationErrorArgs.h

NetSSL_OpenSSL/include/Poco/Net/X509Certificate.h

NetSSL_OpenSSL/samples

NetSSL_OpenSSL/samples/HTTPSTimeServer

NetSSL_OpenSSL/samples/HTTPSTimeServer/HTTPSTimeServer.properties

NetSSL_OpenSSL/samples/HTTPSTimeServer/HTTPSTimeServer.vmsbuild

NetSSL_OpenSSL/samples/HTTPSTimeServer/HTTPSTimeServer_vs71.vcproj

NetSSL_OpenSSL/samples/HTTPSTimeServer/HTTPSTimeServer_vs80.vcproj

NetSSL_OpenSSL/samples/HTTPSTimeServer/Makefile

NetSSL_OpenSSL/samples/HTTPSTimeServer/any.pem

NetSSL_OpenSSL/samples/HTTPSTimeServer/bin

NetSSL_OpenSSL/samples/HTTPSTimeServer/rootcert.pem

NetSSL_OpenSSL/samples/HTTPSTimeServer/src

NetSSL_OpenSSL/samples/HTTPSTimeServer/src/HTTPSTimeServer.cpp

NetSSL_OpenSSL/samples/Makefile

NetSSL_OpenSSL/samples/download

NetSSL_OpenSSL/samples/download/Makefile

NetSSL_OpenSSL/samples/download/any.pem

NetSSL_OpenSSL/samples/download/bin

NetSSL_OpenSSL/samples/download/download.vmsbuild

NetSSL_OpenSSL/samples/download/download_vs71.vcproj

NetSSL_OpenSSL/samples/download/download_vs80.vcproj

NetSSL_OpenSSL/samples/download/rootcert.pem

NetSSL_OpenSSL/samples/download/src

NetSSL_OpenSSL/samples/download/src/download.cpp

NetSSL_OpenSSL/samples/samples_vs71.sln

NetSSL_OpenSSL/samples/samples_vs80.sln

NetSSL_OpenSSL/src

NetSSL_OpenSSL/src/AcceptCertificateHandler.cpp

NetSSL_OpenSSL/src/CertificateHandlerFactory.cpp

NetSSL_OpenSSL/src/CertificateHandlerFactoryMgr.cpp

NetSSL_OpenSSL/src/ConsoleCertificateHandler.cpp

NetSSL_OpenSSL/src/Context.cpp

NetSSL_OpenSSL/src/HTTPSClientSession.cpp

NetSSL_OpenSSL/src/HTTPSSessionInstantiator.cpp

NetSSL_OpenSSL/src/HTTPSStreamFactory.cpp

NetSSL_OpenSSL/src/InvalidCertificateHandler.cpp

NetSSL_OpenSSL/src/KeyConsoleHandler.cpp

NetSSL_OpenSSL/src/KeyFileHandler.cpp

NetSSL_OpenSSL/src/PrivateKeyFactory.cpp

NetSSL_OpenSSL/src/PrivateKeyFactoryMgr.cpp

NetSSL_OpenSSL/src/PrivateKeyPassphraseHandler.cpp

NetSSL_OpenSSL/src/SSLException.cpp

NetSSL_OpenSSL/src/SSLInitializer.cpp

NetSSL_OpenSSL/src/SSLManager.cpp

NetSSL_OpenSSL/src/SecureServerSocket.cpp

NetSSL_OpenSSL/src/SecureServerSocketImpl.cpp

NetSSL_OpenSSL/src/SecureSocketImpl.cpp

NetSSL_OpenSSL/src/SecureStreamSocket.cpp

NetSSL_OpenSSL/src/SecureStreamSocketImpl.cpp

NetSSL_OpenSSL/src/Utility.cpp

NetSSL_OpenSSL/src/VerificationErrorArgs.cpp

NetSSL_OpenSSL/src/X509Certificate.cpp

NetSSL_OpenSSL/testsuite

NetSSL_OpenSSL/testsuite/Makefile

NetSSL_OpenSSL/testsuite/TestSuite.vmsbuild

NetSSL_OpenSSL/testsuite/TestSuite.xml

NetSSL_OpenSSL/testsuite/TestSuite_vs71.vcproj

NetSSL_OpenSSL/testsuite/TestSuite_vs80.vcproj

NetSSL_OpenSSL/testsuite/any.pem

NetSSL_OpenSSL/testsuite/rootcert.pem

NetSSL_OpenSSL/testsuite/src

NetSSL_OpenSSL/testsuite/src/Driver.cpp

NetSSL_OpenSSL/testsuite/src/HTTPSClientSessionTest.cpp

NetSSL_OpenSSL/testsuite/src/HTTPSClientSessionTest.h

NetSSL_OpenSSL/testsuite/src/HTTPSClientTestSuite.cpp

NetSSL_OpenSSL/testsuite/src/HTTPSClientTestSuite.h

NetSSL_OpenSSL/testsuite/src/HTTPSServerTest.cpp

NetSSL_OpenSSL/testsuite/src/HTTPSServerTest.h

NetSSL_OpenSSL/testsuite/src/HTTPSServerTestSuite.cpp

NetSSL_OpenSSL/testsuite/src/HTTPSServerTestSuite.h

NetSSL_OpenSSL/testsuite/src/HTTPSStreamFactoryTest.cpp

NetSSL_OpenSSL/testsuite/src/HTTPSStreamFactoryTest.h

NetSSL_OpenSSL/testsuite/src/HTTPSTestServer.cpp

NetSSL_OpenSSL/testsuite/src/HTTPSTestServer.h

NetSSL_OpenSSL/testsuite/src/NetSSLTestSuite.cpp

NetSSL_OpenSSL/testsuite/src/NetSSLTestSuite.h

NetSSL_OpenSSL/testsuite/src/TCPServerTest.cpp

NetSSL_OpenSSL/testsuite/src/TCPServerTest.h

NetSSL_OpenSSL/testsuite/src/TCPServerTestSuite.cpp

NetSSL_OpenSSL/testsuite/src/TCPServerTestSuite.h

NetSSL_OpenSSL/testsuite/src/WinDriver.cpp

NetSSL_OpenSSL/testsuite/testrunner.xml

README

Util

Util/Makefile

Util/Util.vmsbuild

Util/Util_vs71.sln

Util/Util_vs71.vcproj

Util/Util_vs80.sln

Util/Util_vs80.vcproj

Util/include

Util/include/Poco

Util/include/Poco/Util

Util/include/Poco/Util/AbstractConfiguration.h

Util/include/Poco/Util/Application.h

Util/include/Poco/Util/ConfigurationMapper.h

Util/include/Poco/Util/ConfigurationView.h

Util/include/Poco/Util/FilesystemConfiguration.h

Util/include/Poco/Util/HelpFormatter.h

Util/include/Poco/Util/IniFileConfiguration.h

Util/include/Poco/Util/IntValidator.h

Util/include/Poco/Util/LayeredConfiguration.h

Util/include/Poco/Util/LoggingConfigurator.h

Util/include/Poco/Util/LoggingSubsystem.h

Util/include/Poco/Util/MapConfiguration.h

Util/include/Poco/Util/Option.h

Util/include/Poco/Util/OptionCallback.h

Util/include/Poco/Util/OptionException.h

Util/include/Poco/Util/OptionProcessor.h

Util/include/Poco/Util/OptionSet.h

Util/include/Poco/Util/PropertyFileConfiguration.h

Util/include/Poco/Util/RegExpValidator.h

Util/include/Poco/Util/ServerApplication.h

Util/include/Poco/Util/Subsystem.h

Util/include/Poco/Util/SystemConfiguration.h

Util/include/Poco/Util/Util.h

Util/include/Poco/Util/Validator.h

Util/include/Poco/Util/WinRegistryConfiguration.h

Util/include/Poco/Util/WinRegistryKey.h

Util/include/Poco/Util/WinService.h

Util/include/Poco/Util/XMLConfiguration.h

Util/samples

Util/samples/Makefile

Util/samples/SampleApp

Util/samples/SampleApp/Makefile

Util/samples/SampleApp/SampleApp.properties

Util/samples/SampleApp/SampleApp.vmsbuild

Util/samples/SampleApp/SampleApp_vs71.vcproj

Util/samples/SampleApp/SampleApp_vs80.vcproj

Util/samples/SampleApp/bin

Util/samples/SampleApp/src

Util/samples/SampleApp/src/SampleApp.cpp

Util/samples/SampleServer

Util/samples/SampleServer/Makefile

Util/samples/SampleServer/SampleServer.properties

Util/samples/SampleServer/SampleServer.vmsbuild

Util/samples/SampleServer/SampleServer_vs71.vcproj

Util/samples/SampleServer/SampleServer_vs80.vcproj

Util/samples/SampleServer/bin

Util/samples/SampleServer/src

Util/samples/SampleServer/src/SampleServer.cpp

Util/samples/samples_vs71.sln

Util/samples/samples_vs80.sln

Util/src

Util/src/AbstractConfiguration.cpp

Util/src/Application.cpp

Util/src/ConfigurationMapper.cpp

Util/src/ConfigurationView.cpp

Util/src/FilesystemConfiguration.cpp

Util/src/HelpFormatter.cpp

Util/src/IniFileConfiguration.cpp

Util/src/IntValidator.cpp

Util/src/LayeredConfiguration.cpp

Util/src/LoggingConfigurator.cpp

Util/src/LoggingSubsystem.cpp

Util/src/MapConfiguration.cpp

Util/src/Option.cpp

Util/src/OptionCallback.cpp

Util/src/OptionException.cpp

Util/src/OptionProcessor.cpp

Util/src/OptionSet.cpp

Util/src/PropertyFileConfiguration.cpp

Util/src/RegExpValidator.cpp

Util/src/ServerApplication.cpp

Util/src/Subsystem.cpp

Util/src/SystemConfiguration.cpp

Util/src/Validator.cpp

Util/src/WinRegistryConfiguration.cpp

Util/src/WinRegistryKey.cpp

Util/src/WinService.cpp

Util/src/XMLConfiguration.cpp

Util/testsuite

Util/testsuite/Makefile

Util/testsuite/TestSuite.vmsbuild

Util/testsuite/TestSuite_vs71.vcproj

Util/testsuite/TestSuite_vs80.vcproj

Util/testsuite/src

Util/testsuite/src/AbstractConfigurationTest.cpp

Util/testsuite/src/AbstractConfigurationTest.h

Util/testsuite/src/ConfigurationMapperTest.cpp

Util/testsuite/src/ConfigurationMapperTest.h

Util/testsuite/src/ConfigurationTestSuite.cpp

Util/testsuite/src/ConfigurationTestSuite.h

Util/testsuite/src/ConfigurationViewTest.cpp

Util/testsuite/src/ConfigurationViewTest.h

Util/testsuite/src/Driver.cpp

Util/testsuite/src/FilesystemConfigurationTest.cpp

Util/testsuite/src/FilesystemConfigurationTest.h

Util/testsuite/src/HelpFormatterTest.cpp

Util/testsuite/src/HelpFormatterTest.h

Util/testsuite/src/IniFileConfigurationTest.cpp

Util/testsuite/src/IniFileConfigurationTest.h

Util/testsuite/src/LayeredConfigurationTest.cpp

Util/testsuite/src/LayeredConfigurationTest.h

Util/testsuite/src/LoggingConfiguratorTest.cpp

Util/testsuite/src/LoggingConfiguratorTest.h

Util/testsuite/src/MapConfigurationTest.cpp

Util/testsuite/src/MapConfigurationTest.h

Util/testsuite/src/OptionProcessorTest.cpp

Util/testsuite/src/OptionProcessorTest.h

Util/testsuite/src/OptionSetTest.cpp

Util/testsuite/src/OptionSetTest.h

Util/testsuite/src/OptionTest.cpp

Util/testsuite/src/OptionTest.h

Util/testsuite/src/OptionsTestSuite.cpp

Util/testsuite/src/OptionsTestSuite.h

Util/testsuite/src/PropertyFileConfigurationTest.cpp

Util/testsuite/src/PropertyFileConfigurationTest.h

Util/testsuite/src/SystemConfigurationTest.cpp

Util/testsuite/src/SystemConfigurationTest.h

Util/testsuite/src/UtilTestSuite.cpp

Util/testsuite/src/UtilTestSuite.h

Util/testsuite/src/ValidatorTest.cpp

Util/testsuite/src/ValidatorTest.h

Util/testsuite/src/WinConfigurationTest.cpp

Util/testsuite/src/WinConfigurationTest.h

Util/testsuite/src/WinDriver.cpp

Util/testsuite/src/WinRegistryTest.cpp

Util/testsuite/src/WinRegistryTest.h

Util/testsuite/src/WindowsTestSuite.cpp

Util/testsuite/src/WindowsTestSuite.h

Util/testsuite/src/XMLConfigurationTest.cpp

Util/testsuite/src/XMLConfigurationTest.h

VERSION

XML/Makefile

XML/XML.vmsbuild

XML/XML_vs71.sln

XML/XML_vs71.vcproj

XML/XML_vs80.sln

XML/XML_vs80.vcproj

XML/include

XML/include/Poco

XML/include/Poco/DOM

XML/include/Poco/DOM/AbstractContainerNode.h

XML/include/Poco/DOM/AbstractNode.h

XML/include/Poco/DOM/Attr.h

XML/include/Poco/DOM/AttrMap.h

XML/include/Poco/DOM/AutoPtr.h

XML/include/Poco/DOM/CDATASection.h

XML/include/Poco/DOM/CharacterData.h

XML/include/Poco/DOM/ChildNodesList.h

XML/include/Poco/DOM/Comment.h

XML/include/Poco/DOM/DOMBuilder.h

XML/include/Poco/DOM/DOMException.h

XML/include/Poco/DOM/DOMImplementation.h

XML/include/Poco/DOM/DOMObject.h

XML/include/Poco/DOM/DOMParser.h

XML/include/Poco/DOM/DOMSerializer.h

XML/include/Poco/DOM/DOMWriter.h

XML/include/Poco/DOM/DTDMap.h

XML/include/Poco/DOM/Document.h

XML/include/Poco/DOM/DocumentEvent.h

XML/include/Poco/DOM/DocumentFragment.h

XML/include/Poco/DOM/DocumentType.h

XML/include/Poco/DOM/Element.h

XML/include/Poco/DOM/ElementsByTagNameList.h

XML/include/Poco/DOM/Entity.h

XML/include/Poco/DOM/EntityReference.h

XML/include/Poco/DOM/Event.h

XML/include/Poco/DOM/EventDispatcher.h

XML/include/Poco/DOM/EventException.h

XML/include/Poco/DOM/EventListener.h

XML/include/Poco/DOM/EventTarget.h

XML/include/Poco/DOM/MutationEvent.h

XML/include/Poco/DOM/NamedNodeMap.h

XML/include/Poco/DOM/Node.h

XML/include/Poco/DOM/NodeFilter.h

XML/include/Poco/DOM/NodeIterator.h

XML/include/Poco/DOM/NodeList.h

XML/include/Poco/DOM/Notation.h

XML/include/Poco/DOM/ProcessingInstruction.h

XML/include/Poco/DOM/Text.h

XML/include/Poco/DOM/TreeWalker.h

XML/include/Poco/SAX

XML/include/Poco/SAX/Attributes.h

XML/include/Poco/SAX/AttributesImpl.h

XML/include/Poco/SAX/ContentHandler.h

XML/include/Poco/SAX/DTDHandler.h

XML/include/Poco/SAX/DeclHandler.h

XML/include/Poco/SAX/DefaultHandler.h

XML/include/Poco/SAX/EntityResolver.h

XML/include/Poco/SAX/EntityResolverImpl.h

XML/include/Poco/SAX/ErrorHandler.h

XML/include/Poco/SAX/InputSource.h

XML/include/Poco/SAX/LexicalHandler.h

XML/include/Poco/SAX/Locator.h

XML/include/Poco/SAX/LocatorImpl.h

XML/include/Poco/SAX/NamespaceSupport.h

XML/include/Poco/SAX/SAXException.h

XML/include/Poco/SAX/SAXParser.h

XML/include/Poco/SAX/WhitespaceFilter.h

XML/include/Poco/SAX/XMLFilter.h

XML/include/Poco/SAX/XMLFilterImpl.h

XML/include/Poco/SAX/XMLReader.h

XML/include/Poco/XML

XML/include/Poco/XML/Name.h

XML/include/Poco/XML/NamePool.h

XML/include/Poco/XML/NamespaceStrategy.h

XML/include/Poco/XML/ParserEngine.h

XML/include/Poco/XML/XML.h

XML/include/Poco/XML/XMLException.h

XML/include/Poco/XML/XMLStream.h

XML/include/Poco/XML/XMLString.h

XML/include/Poco/XML/XMLWriter.h

XML/include/Poco/XML/expat.h

XML/include/Poco/XML/expat_external.h

XML/samples

XML/samples/DOMParser

XML/samples/DOMParser/DOMParser.vmsbuild

XML/samples/DOMParser/DOMParser_vs71.vcproj

XML/samples/DOMParser/DOMParser_vs80.vcproj

XML/samples/DOMParser/Makefile

XML/samples/DOMParser/bin

XML/samples/DOMParser/src

XML/samples/DOMParser/src/DOMParser.cpp

XML/samples/DOMWriter

XML/samples/DOMWriter/DOMWriter.vmsbuild

XML/samples/DOMWriter/DOMWriter_vs71.vcproj

XML/samples/DOMWriter/DOMWriter_vs80.vcproj

XML/samples/DOMWriter/Makefile

XML/samples/DOMWriter/bin

XML/samples/DOMWriter/src

XML/samples/DOMWriter/src/DOMWriter.cpp

XML/samples/Makefile

XML/samples/PrettyPrint

XML/samples/PrettyPrint/Makefile

XML/samples/PrettyPrint/PrettyPrint.vmsbuild

XML/samples/PrettyPrint/PrettyPrint_vs71.vcproj

XML/samples/PrettyPrint/PrettyPrint_vs80.vcproj

XML/samples/PrettyPrint/bin

XML/samples/PrettyPrint/src

XML/samples/PrettyPrint/src/PrettyPrint.cpp

XML/samples/SAXParser

XML/samples/SAXParser/Makefile

XML/samples/SAXParser/SAXParser.vmsbuild

XML/samples/SAXParser/SAXParser_vs71.vcproj

XML/samples/SAXParser/SAXParser_vs80.vcproj

XML/samples/SAXParser/bin

XML/samples/SAXParser/src

XML/samples/SAXParser/src/SAXParser.cpp

XML/samples/data

XML/samples/data/sample.xml

XML/samples/samples_vs71.sln

XML/samples/samples_vs80.sln

XML/src

XML/src/AbstractContainerNode.cpp

XML/src/AbstractNode.cpp

XML/src/Attr.cpp

XML/src/AttrMap.cpp

XML/src/Attributes.cpp

XML/src/AttributesImpl.cpp

XML/src/CDATASection.cpp

XML/src/CharacterData.cpp

XML/src/ChildNodesList.cpp

XML/src/Comment.cpp

XML/src/ContentHandler.cpp

XML/src/DOMBuilder.cpp

XML/src/DOMException.cpp

XML/src/DOMImplementation.cpp

XML/src/DOMObject.cpp

XML/src/DOMParser.cpp

XML/src/DOMSerializer.cpp

XML/src/DOMWriter.cpp

XML/src/DTDHandler.cpp

XML/src/DTDMap.cpp

XML/src/DeclHandler.cpp

XML/src/DefaultHandler.cpp

XML/src/Document.cpp

XML/src/DocumentEvent.cpp

XML/src/DocumentFragment.cpp

XML/src/DocumentType.cpp

XML/src/Element.cpp

XML/src/ElementsByTagNameList.cpp

XML/src/Entity.cpp

XML/src/EntityReference.cpp

XML/src/EntityResolver.cpp

XML/src/EntityResolverImpl.cpp

XML/src/ErrorHandler.cpp

XML/src/Event.cpp

XML/src/EventDispatcher.cpp

XML/src/EventException.cpp

XML/src/EventListener.cpp

XML/src/EventTarget.cpp

XML/src/InputSource.cpp

XML/src/LexicalHandler.cpp

XML/src/Locator.cpp

XML/src/LocatorImpl.cpp

XML/src/MutationEvent.cpp

XML/src/Name.cpp

XML/src/NamePool.cpp

XML/src/NamedNodeMap.cpp

XML/src/NamespaceStrategy.cpp

XML/src/NamespaceSupport.cpp

XML/src/Node.cpp

XML/src/NodeFilter.cpp

XML/src/NodeIterator.cpp

XML/src/NodeList.cpp

XML/src/Notation.cpp

XML/src/ParserEngine.cpp

XML/src/ProcessingInstruction.cpp

XML/src/SAXException.cpp

XML/src/SAXParser.cpp

XML/src/Text.cpp

XML/src/TreeWalker.cpp

XML/src/WhitespaceFilter.cpp

XML/src/XMLException.cpp

XML/src/XMLFilter.cpp

XML/src/XMLFilterImpl.cpp

XML/src/XMLReader.cpp

XML/src/XMLString.cpp

XML/src/XMLWriter.cpp

XML/src/ascii.h

XML/src/asciitab.h

XML/src/expat_config.h

XML/src/iasciitab.h

XML/src/internal.h

XML/src/latin1tab.h

XML/src/nametab.h

XML/src/utf8tab.h

XML/src/xmlparse.cpp

XML/src/xmlrole.c

XML/src/xmlrole.h

XML/src/xmltok.c

XML/src/xmltok.h

XML/src/xmltok_impl.c

XML/src/xmltok_impl.h

XML/src/xmltok_ns.c

XML/testsuite

XML/testsuite/Makefile

XML/testsuite/TestSuite.vmsbuild

XML/testsuite/TestSuite_vs71.vcproj

XML/testsuite/TestSuite_vs80.vcproj

XML/testsuite/src

XML/testsuite/src/AttributesImplTest.cpp

XML/testsuite/src/AttributesImplTest.h

XML/testsuite/src/ChildNodesTest.cpp

XML/testsuite/src/ChildNodesTest.h

XML/testsuite/src/DOMTestSuite.cpp

XML/testsuite/src/DOMTestSuite.h

XML/testsuite/src/DocumentTest.cpp

XML/testsuite/src/DocumentTest.h

XML/testsuite/src/DocumentTypeTest.cpp

XML/testsuite/src/DocumentTypeTest.h

XML/testsuite/src/Driver.cpp

XML/testsuite/src/ElementTest.cpp

XML/testsuite/src/ElementTest.h

XML/testsuite/src/EventTest.cpp

XML/testsuite/src/EventTest.h

XML/testsuite/src/NamePoolTest.cpp

XML/testsuite/src/NamePoolTest.h

XML/testsuite/src/NameTest.cpp

XML/testsuite/src/NameTest.h

XML/testsuite/src/NamespaceSupportTest.cpp

XML/testsuite/src/NamespaceSupportTest.h

XML/testsuite/src/NodeIteratorTest.cpp

XML/testsuite/src/NodeIteratorTest.h

XML/testsuite/src/NodeTest.cpp

XML/testsuite/src/NodeTest.h

XML/testsuite/src/ParserWriterTest.cpp

XML/testsuite/src/ParserWriterTest.h

XML/testsuite/src/SAXParserTest.cpp

XML/testsuite/src/SAXParserTest.h

XML/testsuite/src/SAXTestSuite.cpp

XML/testsuite/src/SAXTestSuite.h

XML/testsuite/src/TextTest.cpp

XML/testsuite/src/TextTest.h

XML/testsuite/src/TreeWalkerTest.cpp

XML/testsuite/src/TreeWalkerTest.h

XML/testsuite/src/WinDriver.cpp

XML/testsuite/src/XMLTestSuite.cpp

XML/testsuite/src/XMLTestSuite.h

XML/testsuite/src/XMLWriterTest.cpp

XML/testsuite/src/XMLWriterTest.h

build

build/config

build/config/ARM-Linux

build/config/CYGWIN

build/config/Darwin

build/config/Darwin7

build/config/FreeBSD

build/config/HP-UX

build/config/Linux

build/config/MinGW

build/config/OSF1

build/config/QNX

build/config/SunOS

build/rules

build/rules/compile

build/rules/dylib

build/rules/exec

build/rules/global

build/rules/lib

build/script

build/script/makedepend.SunCC

build/script/makedepend.aCC

build/script/makedepend.cxx

build/script/makedepend.gcc

build/script/makedepend.qcc

build/script/makeldpath

build/script/projname

build/script/shlibln

build/vms

build/vms/build.com

build/vms/pococc.com

build/vms/pococxx.com

build/vms/pocolib.com

build/vms/pocolnk.com

build_vs71.cmd

build_vs80.cmd

buildvms.com

components

configure

contrib

contrib/cmake

contrib/cmake/CMakeLists.txt

contrib/cmake/PocoConfig.cmake

contrib/cmake/README

contrib/poco-doc.pl

doc/Acknowledgements.html

libversion

Show diffs side-by-side

added added

removed removed

Foundation/src/pcre.c

/*************************************************

* Perl-Compatible Regular Expressions *

*************************************************/

This is a library of functions to support regular expressions whose syntax

and semantics are as close as possible to those of the Perl 5 language. See

the file Tech.Notes for some information on the internals.

Written by: Philip Hazel <ph10@cam.ac.uk>

-----------------------------------------------------------------------------

Redistribution and use in source and binary forms, with or without

modification, are permitted provided that the following conditions are met:

* Redistributions of source code must retain the above copyright notice,

this list of conditions and the following disclaimer.

* Redistributions in binary form must reproduce the above copyright

notice, this list of conditions and the following disclaimer in the

documentation and/or other materials provided with the distribution.

* Neither the name of the University of Cambridge nor the names of its

contributors may be used to endorse or promote products derived from

this software without specific prior written permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE

LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

POSSIBILITY OF SUCH DAMAGE.

-----------------------------------------------------------------------------

/* Define DEBUG to get debugging output on stdout. */

/* #define DEBUG */

/* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef

inline, and there are *still* stupid compilers about that don't like indented

pre-processor statements. I suppose it's only been 10 years... */

#ifdef DEBUG

#define DPRINTF(p) printf p

#else

#define DPRINTF(p) /*nothing*/

#endif

/* Include the internals header, which itself includes "config.h", the Standard

C headers, and the external pcre header. */

#include "pcreinternal.h"

/* If Unicode Property support is wanted, include a private copy of the

function that does it, and the table that translates names to numbers. */

#ifdef SUPPORT_UCP

#include "ucp.c"

#include "ucptypetable.c"

#endif

/* Maximum number of items on the nested bracket stacks at compile time. This

applies to the nesting of all kinds of parentheses. It does not limit

un-nested, non-capturing parentheses. This number can be made bigger if

necessary - it is used to dimension one int and one unsigned char vector at

compile time. */

#define BRASTACK_SIZE 200

/* Maximum number of ints of offset to save on the stack for recursive calls.

If the offset vector is bigger, malloc is used. This should be a multiple of 3,

because the offset vector is always a multiple of 3 long. */

#define REC_STACK_SAVE_MAX 30

/* The maximum remaining length of subject we are prepared to search for a

req_byte match. */

#define REQ_BYTE_MAX 1000

/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that

the definition is next to the definition of the opcodes in internal.h. */

static const uschar OP_lengths[] = { OP_LENGTHS };

/* Min and max values for the common repeats; for the maxima, 0 => infinity */

static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };

100

static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };

101

102

/* Table for handling escaped characters in the range '0'-'z'. Positive returns

103

are simple data values; negative values are for special things like \d and so

104

on. Zero means further processing is needed (for things like \x), or the escape

105

is invalid. */

106

107

#if !EBCDIC /* This is the "normal" table for ASCII systems */

108

static const short int escapes[] = {

109

0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 7 */

110

0, 0, ':', ';', '<', '=', '>', '?', /* 8 - ? */

111

'@', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E, 0, -ESC_G, /* @ - G */

112

0, 0, 0, 0, 0, 0, 0, 0, /* H - O */

113

-ESC_P, -ESC_Q, 0, -ESC_S, 0, 0, 0, -ESC_W, /* P - W */

114

-ESC_X, 0, -ESC_Z, '[', '\\', ']', '^', '_', /* X - _ */

115

'`', 7, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0, /* ` - g */

116

0, 0, 0, 0, 0, 0, ESC_n, 0, /* h - o */

117

-ESC_p, 0, ESC_r, -ESC_s, ESC_tee, 0, 0, -ESC_w, /* p - w */

118

0, 0, -ESC_z /* x - z */

119

};

120

121

#else /* This is the "abnormal" table for EBCDIC systems */

122

static const short int escapes[] = {

123

/* 48 */ 0, 0, 0, '.', '<', '(', '+', '|',

124

/* 50 */ '&', 0, 0, 0, 0, 0, 0, 0,

125

/* 58 */ 0, 0, '!', '$', '*', ')', ';', '~',

126

/* 60 */ '-', '/', 0, 0, 0, 0, 0, 0,

127

/* 68 */ 0, 0, '|', ',', '%', '_', '>', '?',

128

/* 70 */ 0, 0, 0, 0, 0, 0, 0, 0,

129

/* 78 */ 0, '`', ':', '#', '@', '\'', '=', '"',

130

/* 80 */ 0, 7, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0,

131

/* 88 */ 0, 0, 0, '{', 0, 0, 0, 0,

132

/* 90 */ 0, 0, 0, 'l', 0, ESC_n, 0, -ESC_p,

133

/* 98 */ 0, ESC_r, 0, '}', 0, 0, 0, 0,

134

/* A0 */ 0, '~', -ESC_s, ESC_tee, 0, 0, -ESC_w, 0,

135

/* A8 */ 0,-ESC_z, 0, 0, 0, '[', 0, 0,

136

/* B0 */ 0, 0, 0, 0, 0, 0, 0, 0,

137

/* B8 */ 0, 0, 0, 0, 0, ']', '=', '-',

138

/* C0 */ '{',-ESC_A, -ESC_B, -ESC_C, -ESC_D,-ESC_E, 0, -ESC_G,

139

/* C8 */ 0, 0, 0, 0, 0, 0, 0, 0,

140

/* D0 */ '}', 0, 0, 0, 0, 0, 0, -ESC_P,

141

/* D8 */-ESC_Q, 0, 0, 0, 0, 0, 0, 0,

142

/* E0 */ '\\', 0, -ESC_S, 0, 0, 0, -ESC_W, -ESC_X,

143

/* E8 */ 0,-ESC_Z, 0, 0, 0, 0, 0, 0,

144

/* F0 */ 0, 0, 0, 0, 0, 0, 0, 0,

145

/* F8 */ 0, 0, 0, 0, 0, 0, 0, 0

146

};

147

#endif

148

149

150

/* Tables of names of POSIX character classes and their lengths. The list is

151

terminated by a zero length entry. The first three must be alpha, upper, lower,

152

as this is assumed for handling case independence. */

153

154

static const char *const posix_names[] = {

155

"alpha", "lower", "upper",

156

"alnum", "ascii", "blank", "cntrl", "digit", "graph",

157

"print", "punct", "space", "word", "xdigit" };

158

159

static const uschar posix_name_lengths[] = {

160

5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };

161

162

/* Table of class bit maps for each POSIX class; up to three may be combined

163

to form the class. The table for [:blank:] is dynamically modified to remove

164

the vertical space characters. */

165

166

static const int posix_class_maps[] = {

167

cbit_lower, cbit_upper, -1, /* alpha */

168

cbit_lower, -1, -1, /* lower */

169

cbit_upper, -1, -1, /* upper */

170

cbit_digit, cbit_lower, cbit_upper, /* alnum */

171

cbit_print, cbit_cntrl, -1, /* ascii */

172

cbit_space, -1, -1, /* blank - a GNU extension */

173

cbit_cntrl, -1, -1, /* cntrl */

174

cbit_digit, -1, -1, /* digit */

175

cbit_graph, -1, -1, /* graph */

176

cbit_print, -1, -1, /* print */

177

cbit_punct, -1, -1, /* punct */

178

cbit_space, -1, -1, /* space */

179

cbit_word, -1, -1, /* word - a Perl extension */

180

cbit_xdigit,-1, -1 /* xdigit */

181

};

182

183

/* Table to identify digits and hex digits. This is used when compiling

184

patterns. Note that the tables in chartables are dependent on the locale, and

185

may mark arbitrary characters as digits - but the PCRE compiling code expects

186

to handle only 0-9, a-z, and A-Z as digits when compiling. That is why we have

187

a private table here. It costs 256 bytes, but it is a lot faster than doing

188

character value tests (at least in some simple cases I timed), and in some

189

applications one wants PCRE to compile efficiently as well as match

190

efficiently.

191

192

For convenience, we use the same bit definitions as in chartables:

193

194

0x04 decimal digit

195

0x08 hexadecimal digit

196

197

Then we can use ctype_digit and ctype_xdigit in the code. */

198

199

#if !EBCDIC /* This is the "normal" case, for ASCII systems */

200

static const unsigned char digitab[] =

201

{

202

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */

203

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 8- 15 */

204

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */

205

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */

206

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - ' */

207

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ( - / */

208

0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /* 0 - 7 */

209

0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00, /* 8 - ? */

210

0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* @ - G */

211

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* H - O */

212

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* P - W */

213

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* X - _ */

214

0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* ` - g */

215

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* h - o */

216

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* p - w */

217

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* x -127 */

218

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */

219

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */

220

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */

221

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */

222

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */

223

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */

224

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */

225

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */

226

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */

227

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */

228

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */

229

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */

230

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */

231

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */

232

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */

233

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */

234

235

#else /* This is the "abnormal" case, for EBCDIC systems */

236

static const unsigned char digitab[] =

237

{

238

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 0 */

239

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 8- 15 */

240

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 10 */

241

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */

242

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 32- 39 20 */

243

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 40- 47 */

244

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 48- 55 30 */

245

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 56- 63 */

246

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - 71 40 */

247

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 72- | */

248

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* & - 87 50 */

249

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 88- � */

250

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - -103 60 */

251

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 104- ? */

252

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 70 */

253

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 120- " */

254

0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* 128- g 80 */

255

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* h -143 */

256

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144- p 90 */

257

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* q -159 */

258

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160- x A0 */

259

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* y -175 */

260

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ^ -183 B0 */

261

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */

262

0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* { - G C0 */

263

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* H -207 */

264

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* } - P D0 */

265

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Q -223 */

266

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* \ - X E0 */

267

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Y -239 */

268

0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /* 0 - 7 F0 */

269

0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00};/* 8 -255 */

270

271

static const unsigned char ebcdic_chartab[] = { /* chartable partial dup */

272

0x80,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 0- 7 */

273

0x00,0x00,0x00,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */

274

0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 16- 23 */

275

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */

276

0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 32- 39 */

277

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 40- 47 */

278

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 48- 55 */

279

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 56- 63 */

280

0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - 71 */

281

0x00,0x00,0x00,0x80,0x00,0x80,0x80,0x80, /* 72- | */

282

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* & - 87 */

283

0x00,0x00,0x00,0x80,0x80,0x80,0x00,0x00, /* 88- � */

284

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - -103 */

285

0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x80, /* 104- ? */

286

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 */

287

0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 120- " */

288

0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* 128- g */

289

0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* h -143 */

290

0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* 144- p */

291

0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* q -159 */

292

0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12, /* 160- x */

293

0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* y -175 */

294

0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ^ -183 */

295

0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00, /* 184-191 */

296

0x80,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* { - G */

297

0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* H -207 */

298

0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* } - P */

299

0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* Q -223 */

300

0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12, /* \ - X */

301

0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* Y -239 */

302

0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */

303

0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00};/* 8 -255 */

304

#endif

305

306

307

/* Definition to allow mutual recursion */

308

309

static BOOL

310

compile_regex(int, int, int *, uschar **, const uschar **, const char **,

311

BOOL, int, int *, int *, branch_chain *, compile_data *);

312

313

/* Structure for building a chain of data that actually lives on the

314

stack, for holding the values of the subject pointer at the start of each

315

subpattern, so as to detect when an empty string has been matched by a

316

subpattern - to break infinite loops. When NO_RECURSE is set, these blocks

317

are on the heap, not on the stack. */

318

319

typedef struct eptrblock {

320

struct eptrblock *epb_prev;

321

const uschar *epb_saved_eptr;

322

} eptrblock;

323

324

/* Flag bits for the match() function */

325

326

#define match_condassert 0x01 /* Called to check a condition assertion */

327

#define match_isgroup 0x02 /* Set if start of bracketed group */

328

329

/* Non-error returns from the match() function. Error returns are externally

330

defined PCRE_ERROR_xxx codes, which are all negative. */

331

332

#define MATCH_MATCH 1

333

#define MATCH_NOMATCH 0

334

335

336

337

/*************************************************

338

* Global variables *

339

*************************************************/

340

341

/* PCRE is thread-clean and doesn't use any global variables in the normal

342

sense. However, it calls memory allocation and free functions via the four

343

indirections below, and it can optionally do callouts. These values can be

344

changed by the caller, but are shared between all threads. However, when

345

compiling for Virtual Pascal, things are done differently (see pcre.in). */

346

347

#ifndef VPCOMPAT

348

#ifdef __cplusplus

349

extern "C" void *(*pcre_malloc)(size_t) = malloc;

350

extern "C" void (*pcre_free)(void *) = free;

351

extern "C" void *(*pcre_stack_malloc)(size_t) = malloc;

352

extern "C" void (*pcre_stack_free)(void *) = free;

353

extern "C" int (*pcre_callout)(pcre_callout_block *) = NULL;

354

#else

355

void *(*pcre_malloc)(size_t) = malloc;

356

void (*pcre_free)(void *) = free;

357

void *(*pcre_stack_malloc)(size_t) = malloc;

358

void (*pcre_stack_free)(void *) = free;

359

int (*pcre_callout)(pcre_callout_block *) = NULL;

360

#endif

361

#endif

362

363

364

/*************************************************

365

* Macros and tables for character handling *

366

*************************************************/

367

368

/* When UTF-8 encoding is being used, a character is no longer just a single

369

byte. The macros for character handling generate simple sequences when used in

370

byte-mode, and more complicated ones for UTF-8 characters. */

371

372

#ifndef SUPPORT_UTF8

373

#define GETCHAR(c, eptr) c = *eptr;

374

#define GETCHARINC(c, eptr) c = *eptr++;

375

#define GETCHARINCTEST(c, eptr) c = *eptr++;

376

#define GETCHARLEN(c, eptr, len) c = *eptr;

377

#define BACKCHAR(eptr)

378

379

#else /* SUPPORT_UTF8 */

380

381

/* Get the next UTF-8 character, not advancing the pointer. This is called when

382

we know we are in UTF-8 mode. */

383

384

#define GETCHAR(c, eptr) \

385

c = *eptr; \

386

if ((c & 0xc0) == 0xc0) \

387

{ \

388

int gcii; \

389

int gcaa = utf8_table4[c & 0x3f]; /* Number of additional bytes */ \

390

int gcss = 6*gcaa; \

391

c = (c & utf8_table3[gcaa]) << gcss; \

392

for (gcii = 1; gcii <= gcaa; gcii++) \

393

{ \

394

gcss -= 6; \

395

c |= (eptr[gcii] & 0x3f) << gcss; \

396

} \

397

}

398

399

/* Get the next UTF-8 character, advancing the pointer. This is called when we

400

know we are in UTF-8 mode. */

401

402

#define GETCHARINC(c, eptr) \

403

c = *eptr++; \

404

if ((c & 0xc0) == 0xc0) \

405

{ \

406

int gcaa = utf8_table4[c & 0x3f]; /* Number of additional bytes */ \

407

int gcss = 6*gcaa; \

408

c = (c & utf8_table3[gcaa]) << gcss; \

409

while (gcaa-- > 0) \

410

{ \

411

gcss -= 6; \

412

c |= (*eptr++ & 0x3f) << gcss; \

413

} \

414

}

415

416

/* Get the next character, testing for UTF-8 mode, and advancing the pointer */

417

418

#define GETCHARINCTEST(c, eptr) \

419

c = *eptr++; \

420

if (md->utf8 && (c & 0xc0) == 0xc0) \

421

{ \

422

int gcaa = utf8_table4[c & 0x3f]; /* Number of additional bytes */ \

423

int gcss = 6*gcaa; \

424

c = (c & utf8_table3[gcaa]) << gcss; \

425

while (gcaa-- > 0) \

426

{ \

427

gcss -= 6; \

428

c |= (*eptr++ & 0x3f) << gcss; \

429

} \

430

}

431

432

/* Get the next UTF-8 character, not advancing the pointer, incrementing length

433

if there are extra bytes. This is called when we know we are in UTF-8 mode. */

434

435

#define GETCHARLEN(c, eptr, len) \

436

c = *eptr; \

437

if ((c & 0xc0) == 0xc0) \

438

{ \

439

int gcii; \

440

int gcaa = utf8_table4[c & 0x3f]; /* Number of additional bytes */ \

441

int gcss = 6*gcaa; \

442

c = (c & utf8_table3[gcaa]) << gcss; \

443

for (gcii = 1; gcii <= gcaa; gcii++) \

444

{ \

445

gcss -= 6; \

446

c |= (eptr[gcii] & 0x3f) << gcss; \

447

} \

448

len += gcaa; \

449

}

450

451

/* If the pointer is not at the start of a character, move it back until

452

it is. Called only in UTF-8 mode. */

453

454

#define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr--;

455

456

#endif

457

458

459

460

/*************************************************

461

* Default character tables *

462

*************************************************/

463

464

/* A default set of character tables is included in the PCRE binary. Its source

465

is built by the maketables auxiliary program, which uses the default C ctypes

466

functions, and put in the file chartables.c. These tables are used by PCRE

467

whenever the caller of pcre_compile() does not provide an alternate set of

468

tables. */

469

470

#include "chartables.c"

471

472

473

474

#ifdef SUPPORT_UTF8

475

/*************************************************

476

* Tables for UTF-8 support *

477

*************************************************/

478

479

/* These are the breakpoints for different numbers of bytes in a UTF-8

480

character. */

481

482

static const int utf8_table1[] =

483

{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};

484

485

/* These are the indicator bits and the mask for the data bits to set in the

486

first byte of a character, indexed by the number of additional bytes. */

487

488

static const int utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};

489

static const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};

490

491

/* Table of the number of extra characters, indexed by the first character

492

masked with 0x3f. The highest number for a valid UTF-8 character is in fact

493

0x3d. */

494

495

static const uschar utf8_table4[] = {

496

1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,

497

1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,

498

2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,

499

3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };

500

501

502

/*************************************************

503

* Convert character value to UTF-8 *

504

*************************************************/

505

506

/* This function takes an integer value in the range 0 - 0x7fffffff

507

and encodes it as a UTF-8 character in 0 to 6 bytes.

508

509

Arguments:

510

cvalue the character value

511

buffer pointer to buffer for result - at least 6 bytes long

512

513

Returns: number of characters placed in the buffer

514

515

516

static int

517

ord2utf8(int cvalue, uschar *buffer)

518

{

519

520

for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)

521

if (cvalue <= utf8_table1[i]) break;

522

buffer += i;

523

for (j = i; j > 0; j--)

524

{

525

*buffer-- = 0x80 | (cvalue & 0x3f);

526

cvalue >>= 6;

527

}

528

*buffer = utf8_table2[i] | cvalue;

529

return i + 1;

530

}

531

#endif

532

533

534

535

/*************************************************

536

* Print compiled regex *

537

*************************************************/

538

539

/* The code for doing this is held in a separate file that is also included in

540

pcretest.c. It defines a function called print_internals(). */

541

542

#ifdef DEBUG

543

#include "printint.c"

544

#endif

545

546

547

548

/*************************************************

549

* Return version string *

550

*************************************************/

551

552

#define STRING(a) # a

553

#define XSTRING(s) STRING(s)

554

555

EXPORT const char *

556

pcre_version(void)

557

{

558

return XSTRING(PCRE_MAJOR) "." XSTRING(PCRE_MINOR) " " XSTRING(PCRE_DATE);

559

}

560

561

562

563

564

/*************************************************

565

* Flip bytes in an integer *

566

*************************************************/

567

568

/* This function is called when the magic number in a regex doesn't match in

569

order to flip its bytes to see if we are dealing with a pattern that was

570

compiled on a host of different endianness. If so, this function is used to

571

flip other byte values.

572

573

Arguments:

574

value the number to flip

575

n the number of bytes to flip (assumed to be 2 or 4)

576

577

Returns: the flipped value

578

579

580

static long int

581

byteflip(long int value, int n)

582

{

583

if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);

584

return ((value & 0x000000ff) << 24) |

585

((value & 0x0000ff00) << 8) |

586

((value & 0x00ff0000) >> 8) |

587

((value & 0xff000000) >> 24);

588

}

589

590

591

592

/*************************************************

593

* Test for a byte-flipped compiled regex *

594

*************************************************/

595

596

/* This function is called from pce_exec() and also from pcre_fullinfo(). Its

597

job is to test whether the regex is byte-flipped - that is, it was compiled on

598

a system of opposite endianness. The function is called only when the native

599

MAGIC_NUMBER test fails. If the regex is indeed flipped, we flip all the

600

relevant values into a different data block, and return it.

601

602

Arguments:

603

re points to the regex

604

study points to study data, or NULL

605

internal_re points to a new regex block

606

internal_study points to a new study block

607

608

Returns: the new block if is is indeed a byte-flipped regex

609

NULL if it is not

610

611

612

static real_pcre *

613

try_flipped(const real_pcre *re, real_pcre *internal_re,

614

const pcre_study_data *study, pcre_study_data *internal_study)

615

{

616

if (byteflip(re->magic_number, sizeof(re->magic_number)) != MAGIC_NUMBER)

617

return NULL;

618

619

*internal_re = *re; /* To copy other fields */

620

internal_re->size = byteflip(re->size, sizeof(re->size));

621

internal_re->options = byteflip(re->options, sizeof(re->options));

622

internal_re->top_bracket = byteflip(re->top_bracket, sizeof(re->top_bracket));

623

internal_re->top_backref = byteflip(re->top_backref, sizeof(re->top_backref));

624

internal_re->first_byte = byteflip(re->first_byte, sizeof(re->first_byte));

625

internal_re->req_byte = byteflip(re->req_byte, sizeof(re->req_byte));

626

internal_re->name_table_offset = byteflip(re->name_table_offset,

627

sizeof(re->name_table_offset));

628

internal_re->name_entry_size = byteflip(re->name_entry_size,

629

sizeof(re->name_entry_size));

630

internal_re->name_count = byteflip(re->name_count, sizeof(re->name_count));

631

632

if (study != NULL)

633

{

634

*internal_study = *study; /* To copy other fields */

635

internal_study->size = byteflip(study->size, sizeof(study->size));

636

internal_study->options = byteflip(study->options, sizeof(study->options));

637

}

638

639

return internal_re;

640

}

641

642

643

644

/*************************************************

645

* (Obsolete) Return info about compiled pattern *

646

*************************************************/

647

648

/* This is the original "info" function. It picks potentially useful data out

649

of the private structure, but its interface was too rigid. It remains for

650

backwards compatibility. The public options are passed back in an int - though

651

the re->options field has been expanded to a long int, all the public options

652

at the low end of it, and so even on 16-bit systems this will still be OK.

653

Therefore, I haven't changed the API for pcre_info().

654

655

Arguments:

656

argument_re points to compiled code

657

optptr where to pass back the options

658

first_byte where to pass back the first character,

659

or -1 if multiline and all branches start ^,

660

or -2 otherwise

661

662

Returns: number of capturing subpatterns

663

or negative values on error

664

665

666

EXPORT int

667

pcre_info(const pcre *argument_re, int *optptr, int *first_byte)

668

{

669

real_pcre internal_re;

670

const real_pcre *re = (const real_pcre *)argument_re;

671

if (re == NULL) return PCRE_ERROR_NULL;

672

if (re->magic_number != MAGIC_NUMBER)

673

{

674

re = try_flipped(re, &internal_re, NULL, NULL);

675

if (re == NULL) return PCRE_ERROR_BADMAGIC;

676

}

677

if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_OPTIONS);

678

if (first_byte != NULL)

679

*first_byte = ((re->options & PCRE_FIRSTSET) != 0)? re->first_byte :

680

((re->options & PCRE_STARTLINE) != 0)? -1 : -2;

681

return re->top_bracket;

682

}

683

684

685

686

/*************************************************

687

* Return info about compiled pattern *

688

*************************************************/

689

690

/* This is a newer "info" function which has an extensible interface so

691

that additional items can be added compatibly.

692

693

Arguments:

694

argument_re points to compiled code

695

extra_data points extra data, or NULL

696

what what information is required

697

where where to put the information

698

699

Returns: 0 if data returned, negative on error

700

701

702

EXPORT int

703

pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what,

704

void *where)

705

{

706

real_pcre internal_re;

707

pcre_study_data internal_study;

708

const real_pcre *re = (const real_pcre *)argument_re;

709

const pcre_study_data *study = NULL;

710

711

if (re == NULL || where == NULL) return PCRE_ERROR_NULL;

712

713

if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0)

714

study = (const pcre_study_data *)extra_data->study_data;

715

716

if (re->magic_number != MAGIC_NUMBER)

717

{

718

re = try_flipped(re, &internal_re, study, &internal_study);

719

if (re == NULL) return PCRE_ERROR_BADMAGIC;

720

if (study != NULL) study = &internal_study;

721

}

722

723

switch (what)

724

{

725

case PCRE_INFO_OPTIONS:

726

*((unsigned long int *)where) = re->options & PUBLIC_OPTIONS;

727

break;

728

729

case PCRE_INFO_SIZE:

730

*((size_t *)where) = re->size;

731

break;

732

733

case PCRE_INFO_STUDYSIZE:

734

*((size_t *)where) = (study == NULL)? 0 : study->size;

735

break;

736

737

case PCRE_INFO_CAPTURECOUNT:

738

*((int *)where) = re->top_bracket;

739

break;

740

741

case PCRE_INFO_BACKREFMAX:

742

*((int *)where) = re->top_backref;

743

break;

744

745

case PCRE_INFO_FIRSTBYTE:

746

*((int *)where) =

747

((re->options & PCRE_FIRSTSET) != 0)? re->first_byte :

748

((re->options & PCRE_STARTLINE) != 0)? -1 : -2;

749

break;

750

751

/* Make sure we pass back the pointer to the bit vector in the external

752

block, not the internal copy (with flipped integer fields). */

753

754

case PCRE_INFO_FIRSTTABLE:

755

*((const uschar **)where) =

756

(study != NULL && (study->options & PCRE_STUDY_MAPPED) != 0)?

757

((const pcre_study_data *)extra_data->study_data)->start_bits : NULL;

758

break;

759

760

case PCRE_INFO_LASTLITERAL:

761

*((int *)where) =

762

((re->options & PCRE_REQCHSET) != 0)? re->req_byte : -1;

763

break;

764

765

case PCRE_INFO_NAMEENTRYSIZE:

766

*((int *)where) = re->name_entry_size;

767

break;

768

769

case PCRE_INFO_NAMECOUNT:

770

*((int *)where) = re->name_count;

771

break;

772

773

case PCRE_INFO_NAMETABLE:

774

*((const uschar **)where) = (const uschar *)re + re->name_table_offset;

775

break;

776

777

case PCRE_INFO_DEFAULT_TABLES:

778

*((const uschar **)where) = (const uschar *)pcre_default_tables;

779

break;

780

781

default: return PCRE_ERROR_BADOPTION;

782

}

783

784

return 0;

785

}

786

787

788

789

/*************************************************

790

* Return info about what features are configured *

791

*************************************************/

792

793

/* This is function which has an extensible interface so that additional items

794

can be added compatibly.

795

796

Arguments:

797

what what information is required

798

where where to put the information

799

800

Returns: 0 if data returned, negative on error

801

802

803

EXPORT int

804

pcre_config(int what, void *where)

805

{

806

switch (what)

807

{

808

case PCRE_CONFIG_UTF8:

809

#ifdef SUPPORT_UTF8

810

*((int *)where) = 1;

811

#else

812

*((int *)where) = 0;

813

#endif

814

break;

815

816

case PCRE_CONFIG_UNICODE_PROPERTIES:

817

#ifdef SUPPORT_UCP

818

*((int *)where) = 1;

819

#else

820

*((int *)where) = 0;

821

#endif

822

break;

823

824

case PCRE_CONFIG_NEWLINE:

825

*((int *)where) = NEWLINE;

826

break;

827

828

case PCRE_CONFIG_LINK_SIZE:

829

*((int *)where) = LINK_SIZE;

830

break;

831

832

case PCRE_CONFIG_POSIX_MALLOC_THRESHOLD:

833

*((int *)where) = POSIX_MALLOC_THRESHOLD;

834

break;

835

836

case PCRE_CONFIG_MATCH_LIMIT:

837

*((unsigned int *)where) = MATCH_LIMIT;

838

break;

839

840

case PCRE_CONFIG_STACKRECURSE:

841

#ifdef NO_RECURSE

842

*((int *)where) = 0;

843

#else

844

*((int *)where) = 1;

845

#endif

846

break;

847

848

default: return PCRE_ERROR_BADOPTION;

849

}

850

851

return 0;

852

}

853

854

855

856

#ifdef DEBUG

857

/*************************************************

858

* Debugging function to print chars *

859

*************************************************/

860

861

/* Print a sequence of chars in printable format, stopping at the end of the

862

subject if the requested.

863

864

Arguments:

865

p points to characters

866

length number to print

867

is_subject TRUE if printing from within md->start_subject

868

md pointer to matching data block, if is_subject is TRUE

869

870

Returns: nothing

871

872

873

static void

874

pchars(const uschar *p, int length, BOOL is_subject, match_data *md)

875

{

876

int c;

877

if (is_subject && length > md->end_subject - p) length = md->end_subject - p;

878

while (length-- > 0)

879

if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c);

880

}

881

#endif

882

883

884

885

886

/*************************************************

887

* Handle escapes *

888

*************************************************/

889

890

/* This function is called when a \ has been encountered. It either returns a

891

positive value for a simple escape such as \n, or a negative value which

892

encodes one of the more complicated things such as \d. When UTF-8 is enabled,

893

a positive value greater than 255 may be returned. On entry, ptr is pointing at

894

the \. On exit, it is on the final character of the escape sequence.

895

896

Arguments:

897

ptrptr points to the pattern position pointer

898

errorptr points to the pointer to the error message

899

bracount number of previous extracting brackets

900

options the options bits

901

isclass TRUE if inside a character class

902

903

Returns: zero or positive => a data character

904

negative => a special escape sequence

905

on error, errorptr is set

906

907

908

static int

909

check_escape(const uschar **ptrptr, const char **errorptr, int bracount,

910

int options, BOOL isclass)

911

{

912

const uschar *ptr = *ptrptr;

913

int c, i;

914

915

/* If backslash is at the end of the pattern, it's an error. */

916

917

c = *(++ptr);

918

if (c == 0) *errorptr = ERR1;

919

920

/* Non-alphamerics are literals. For digits or letters, do an initial lookup in

921

a table. A non-zero result is something that can be returned immediately.

922

Otherwise further processing may be required. */

923

924

#if !EBCDIC /* ASCII coding */

925

else if (c < '0' || c > 'z') {} /* Not alphameric */

926

else if ((i = escapes[c - '0']) != 0) c = i;

927

928

#else /* EBCDIC coding */

929

else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {} /* Not alphameric */

930

else if ((i = escapes[c - 0x48]) != 0) c = i;

931

#endif

932

933

/* Escapes that need further processing, or are illegal. */

934

935

else

936

{

937

const uschar *oldptr;

938

switch (c)

939

{

940

/* A number of Perl escapes are not handled by PCRE. We give an explicit

941

error. */

942

943

case 'l':

944

case 'L':

945

case 'N':

946

case 'u':

947

case 'U':

948

*errorptr = ERR37;

949

break;

950

951

/* The handling of escape sequences consisting of a string of digits

952

starting with one that is not zero is not straightforward. By experiment,

953

the way Perl works seems to be as follows:

954

955

Outside a character class, the digits are read as a decimal number. If the

956

number is less than 10, or if there are that many previous extracting

957

left brackets, then it is a back reference. Otherwise, up to three octal

958

digits are read to form an escaped byte. Thus \123 is likely to be octal

959

123 (cf \0123, which is octal 012 followed by the literal 3). If the octal

960

value is greater than 377, the least significant 8 bits are taken. Inside a

961

character class, \ followed by a digit is always an octal number. */

962

963

case '1': case '2': case '3': case '4': case '5':

964

case '6': case '7': case '8': case '9':

965

966

if (!isclass)

967

{

968

oldptr = ptr;

969

c -= '0';

970

while ((digitab[ptr[1]] & ctype_digit) != 0)

971

c = c * 10 + *(++ptr) - '0';

972

if (c < 10 || c <= bracount)

973

{

974

c = -(ESC_REF + c);

975

break;

976

}

977

ptr = oldptr; /* Put the pointer back and fall through */

978

}

979

980

/* Handle an octal number following \. If the first digit is 8 or 9, Perl

981

generates a binary zero byte and treats the digit as a following literal.

982

Thus we have to pull back the pointer by one. */

983

984

if ((c = *ptr) >= '8')

985

{

986

ptr--;

987

c = 0;

988

break;

989

}

990

991

/* \0 always starts an octal number, but we may drop through to here with a

992

larger first octal digit. */

993

994

case '0':

995

c -= '0';

996

while(i++ < 2 && ptr[1] >= '0' && ptr[1] <= '7')

997

c = c * 8 + *(++ptr) - '0';

998

c &= 255; /* Take least significant 8 bits */

999

break;

1000

1001

/* \x is complicated when UTF-8 is enabled. \x{ddd} is a character number

1002

which can be greater than 0xff, but only if the ddd are hex digits. */

1003

1004

case 'x':

1005

#ifdef SUPPORT_UTF8

1006

if (ptr[1] == '{' && (options & PCRE_UTF8) != 0)

1007

{

1008

const uschar *pt = ptr + 2;

1009

1010

c = 0;

1011

while ((digitab[*pt] & ctype_xdigit) != 0)

1012

{

1013

int cc = *pt++;

1014

count++;

1015

#if !EBCDIC /* ASCII coding */

1016

if (cc >= 'a') cc -= 32; /* Convert to upper case */

1017

c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10));

1018

#else /* EBCDIC coding */

1019

if (cc >= 'a' && cc <= 'z') cc += 64; /* Convert to upper case */

1020

c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10));

1021

#endif

1022

}

1023

if (*pt == '}')

1024

{

1025

if (c < 0 || count > 8) *errorptr = ERR34;

1026

ptr = pt;

1027

break;

1028

}

1029

/* If the sequence of hex digits does not end with '}', then we don't

1030

recognize this construct; fall through to the normal \x handling. */

1031

}

1032

#endif

1033

1034

/* Read just a single hex char */

1035

1036

c = 0;

1037

while (i++ < 2 && (digitab[ptr[1]] & ctype_xdigit) != 0)

1038

{

1039

int cc; /* Some compilers don't like ++ */

1040

cc = *(++ptr); /* in initializers */

1041

#if !EBCDIC /* ASCII coding */

1042

if (cc >= 'a') cc -= 32; /* Convert to upper case */

1043

c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10));

1044

#else /* EBCDIC coding */

1045

if (cc <= 'z') cc += 64; /* Convert to upper case */

1046

c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10));

1047

#endif

1048

}

1049

break;

1050

1051

/* Other special escapes not starting with a digit are straightforward */

1052

1053

case 'c':

1054

c = *(++ptr);

1055

if (c == 0)

1056

{

1057

*errorptr = ERR2;

1058

return 0;

1059

}

1060

1061

/* A letter is upper-cased; then the 0x40 bit is flipped. This coding

1062

is ASCII-specific, but then the whole concept of \cx is ASCII-specific.

1063

(However, an EBCDIC equivalent has now been added.) */

1064

1065

#if !EBCDIC /* ASCII coding */

1066

if (c >= 'a' && c <= 'z') c -= 32;

1067

c ^= 0x40;

1068

#else /* EBCDIC coding */

1069

if (c >= 'a' && c <= 'z') c += 64;

1070

c ^= 0xC0;

1071

#endif

1072

break;

1073

1074

/* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any

1075

other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,

1076

for Perl compatibility, it is a literal. This code looks a bit odd, but

1077

there used to be some cases other than the default, and there may be again

1078

in future, so I haven't "optimized" it. */

1079

1080

default:

1081

if ((options & PCRE_EXTRA) != 0) switch(c)

1082

{

1083

default:

1084

*errorptr = ERR3;

1085

break;

1086

}

1087

break;

1088

}

1089

}

1090

1091

*ptrptr = ptr;

1092

return c;

1093

}

1094

1095

1096

1097

#ifdef SUPPORT_UCP

1098

/*************************************************

1099

* Handle \P and \p *

1100

*************************************************/

1101

1102

/* This function is called after \P or \p has been encountered, provided that

1103

PCRE is compiled with support for Unicode properties. On entry, ptrptr is

1104

pointing at the P or p. On exit, it is pointing at the final character of the

1105

escape sequence.

1106

1107

Argument:

1108

ptrptr points to the pattern position pointer

1109

negptr points to a boolean that is set TRUE for negation else FALSE

1110

errorptr points to the pointer to the error message

1111

1112

Returns: value from ucp_type_table, or -1 for an invalid type

1113

1114

1115

static int

1116

get_ucp(const uschar **ptrptr, BOOL *negptr, const char **errorptr)

1117

{

1118

int c, i, bot, top;

1119

const uschar *ptr = *ptrptr;

1120

char name[4];

1121

1122

c = *(++ptr);

1123

if (c == 0) goto ERROR_RETURN;

1124

1125

*negptr = FALSE;

1126

1127

/* \P or \p can be followed by a one- or two-character name in {}, optionally

1128

preceded by ^ for negation. */

1129

1130

if (c == '{')

1131

{

1132

if (ptr[1] == '^')

1133

{

1134

*negptr = TRUE;

1135

ptr++;

1136

}

1137

for (i = 0; i <= 2; i++)

1138

{

1139

c = *(++ptr);

1140

if (c == 0) goto ERROR_RETURN;

1141

if (c == '}') break;

1142

name[i] = c;

1143

}

1144

if (c !='}') /* Try to distinguish error cases */

1145

{

1146

while (*(++ptr) != 0 && *ptr != '}');

1147

if (*ptr == '}') goto UNKNOWN_RETURN; else goto ERROR_RETURN;

1148

}

1149

name[i] = 0;

1150

}

1151

1152

/* Otherwise there is just one following character */

1153

1154

else

1155

{

1156

name[0] = c;

1157

name[1] = 0;

1158

}

1159

1160

*ptrptr = ptr;

1161

1162

/* Search for a recognized property name using binary chop */

1163

1164

bot = 0;

1165

top = sizeof(utt)/sizeof(ucp_type_table);

1166

1167

while (bot < top)

1168

{

1169

i = (bot + top)/2;

1170

c = strcmp(name, utt[i].name);

1171

if (c == 0) return utt[i].value;

1172

if (c > 0) bot = i + 1; else top = i;

1173

}

1174

1175

UNKNOWN_RETURN:

1176

*errorptr = ERR47;

1177

*ptrptr = ptr;

1178

return -1;

1179

1180

ERROR_RETURN:

1181

*errorptr = ERR46;

1182

*ptrptr = ptr;

1183

return -1;

1184

}

1185

#endif

1186

1187

1188

1189

1190

/*************************************************

1191

* Check for counted repeat *

1192

*************************************************/

1193

1194

/* This function is called when a '{' is encountered in a place where it might

1195

start a quantifier. It looks ahead to see if it really is a quantifier or not.

1196

It is only a quantifier if it is one of the forms {ddd} {ddd,} or {ddd,ddd}

1197

where the ddds are digits.

1198

1199

Arguments:

1200

p pointer to the first char after '{'

1201

1202

Returns: TRUE or FALSE

1203

1204

1205

static BOOL

1206

is_counted_repeat(const uschar *p)

1207

{

1208

if ((digitab[*p++] & ctype_digit) == 0) return FALSE;

1209

while ((digitab[*p] & ctype_digit) != 0) p++;

1210

if (*p == '}') return TRUE;

1211

1212

if (*p++ != ',') return FALSE;

1213

if (*p == '}') return TRUE;

1214

1215

if ((digitab[*p++] & ctype_digit) == 0) return FALSE;

1216

while ((digitab[*p] & ctype_digit) != 0) p++;

1217

1218

return (*p == '}');

1219

}

1220

1221

1222

1223

/*************************************************

1224

* Read repeat counts *

1225

*************************************************/

1226

1227

/* Read an item of the form {n,m} and return the values. This is called only

1228

after is_counted_repeat() has confirmed that a repeat-count quantifier exists,

1229

so the syntax is guaranteed to be correct, but we need to check the values.

1230

1231

Arguments:

1232

p pointer to first char after '{'

1233

minp pointer to int for min

1234

maxp pointer to int for max

1235

returned as -1 if no max

1236

errorptr points to pointer to error message

1237

1238

Returns: pointer to '}' on success;

1239

current ptr on error, with errorptr set

1240

1241

1242

static const uschar *

1243

read_repeat_counts(const uschar *p, int *minp, int *maxp, const char **errorptr)

1244

{

1245

int min = 0;

1246

int max = -1;

1247

1248

while ((digitab[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';

1249

1250

if (*p == '}') max = min; else

1251

{

1252

if (*(++p) != '}')

1253

{

1254

max = 0;

1255

while((digitab[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';

1256

if (max < min)

1257

{

1258

*errorptr = ERR4;

1259

return p;

1260

}

1261

}

1262

}

1263

1264

/* Do paranoid checks, then fill in the required variables, and pass back the

1265

pointer to the terminating '}'. */

1266

1267

if (min > 65535 || max > 65535)

1268

*errorptr = ERR5;

1269

else

1270

{

1271

*minp = min;

1272

*maxp = max;

1273

}

1274

return p;

1275

}

1276

1277

1278

1279

/*************************************************

1280

* Find first significant op code *

1281

*************************************************/

1282

1283

/* This is called by several functions that scan a compiled expression looking

1284

for a fixed first character, or an anchoring op code etc. It skips over things

1285

that do not influence this. For some calls, a change of option is important.

1286

For some calls, it makes sense to skip negative forward and all backward

1287

assertions, and also the \b assertion; for others it does not.

1288

1289

Arguments:

1290

code pointer to the start of the group

1291

options pointer to external options

1292

optbit the option bit whose changing is significant, or

1293

zero if none are

1294

skipassert TRUE if certain assertions are to be skipped

1295

1296

Returns: pointer to the first significant opcode

1297

1298

1299

static const uschar*

1300

first_significant_code(const uschar *code, int *options, int optbit,

1301

BOOL skipassert)

1302

{

1303

for (;;)

1304

{

1305

switch ((int)*code)

1306

{

1307

case OP_OPT:

1308

if (optbit > 0 && ((int)code[1] & optbit) != (*options & optbit))

1309

*options = (int)code[1];

1310

code += 2;

1311

break;

1312

1313

case OP_ASSERT_NOT:

1314

case OP_ASSERTBACK:

1315

case OP_ASSERTBACK_NOT:

1316

if (!skipassert) return code;

1317

do code += GET(code, 1); while (*code == OP_ALT);

1318

code += OP_lengths[*code];

1319

break;

1320

1321

case OP_WORD_BOUNDARY:

1322

case OP_NOT_WORD_BOUNDARY:

1323

if (!skipassert) return code;

1324

/* Fall through */

1325

1326

case OP_CALLOUT:

1327

case OP_CREF:

1328

case OP_BRANUMBER:

1329

code += OP_lengths[*code];

1330

break;

1331

1332

default:

1333

return code;

1334

}

1335

}

1336

/* Control never reaches here */

1337

}

1338

1339

1340

1341

1342

/*************************************************

1343

* Find the fixed length of a pattern *

1344

*************************************************/

1345

1346

/* Scan a pattern and compute the fixed length of subject that will match it,

1347

if the length is fixed. This is needed for dealing with backward assertions.

1348

In UTF8 mode, the result is in characters rather than bytes.

1349

1350

Arguments:

1351

code points to the start of the pattern (the bracket)

1352

options the compiling options

1353

1354

Returns: the fixed length, or -1 if there is no fixed length,

1355

or -2 if \C was encountered

1356

1357

1358

static int

1359

find_fixedlength(uschar *code, int options)

1360

{

1361

int length = -1;

1362

1363

1364

1365

1366

/* Scan along the opcodes for this branch. If we get to the end of the

1367

branch, check the length against that of the other branches. */

1368

1369

for (;;)

1370

{

1371

int d;

1372

1373

if (op >= OP_BRA) op = OP_BRA;

1374

1375

switch (op)

1376

{

1377

case OP_BRA:

1378

case OP_ONCE:

1379

case OP_COND:

1380

d = find_fixedlength(cc, options);

1381

if (d < 0) return d;

1382

branchlength += d;

1383

do cc += GET(cc, 1); while (*cc == OP_ALT);

1384

cc += 1 + LINK_SIZE;

1385

break;

1386

1387

/* Reached end of a branch; if it's a ket it is the end of a nested

1388

call. If it's ALT it is an alternation in a nested call. If it is

1389

END it's the end of the outer call. All can be handled by the same code. */

1390

1391

case OP_ALT:

1392

case OP_KET:

1393

case OP_KETRMAX:

1394

case OP_KETRMIN:

1395

case OP_END:

1396

if (length < 0) length = branchlength;

1397

else if (length != branchlength) return -1;

1398

if (*cc != OP_ALT) return length;

1399

cc += 1 + LINK_SIZE;

1400

branchlength = 0;

1401

break;

1402

1403

/* Skip over assertive subpatterns */

1404

1405

case OP_ASSERT:

1406

case OP_ASSERT_NOT:

1407

case OP_ASSERTBACK:

1408

case OP_ASSERTBACK_NOT:

1409

do cc += GET(cc, 1); while (*cc == OP_ALT);

1410

/* Fall through */

1411

1412

/* Skip over things that don't match chars */

1413

1414

case OP_REVERSE:

1415

case OP_BRANUMBER:

1416

case OP_CREF:

1417

case OP_OPT:

1418

case OP_CALLOUT:

1419

case OP_SOD:

1420

case OP_SOM:

1421

case OP_EOD:

1422

case OP_EODN:

1423

case OP_CIRC:

1424

case OP_DOLL:

1425

case OP_NOT_WORD_BOUNDARY:

1426

case OP_WORD_BOUNDARY:

1427

cc += OP_lengths[*cc];

1428

break;

1429

1430

/* Handle literal characters */

1431

1432

case OP_CHAR:

1433

case OP_CHARNC:

1434

branchlength++;

1435

cc += 2;

1436

#ifdef SUPPORT_UTF8

1437

if ((options & PCRE_UTF8) != 0)

1438

{

1439

while ((*cc & 0xc0) == 0x80) cc++;

1440

}

1441

#endif

1442

break;

1443

1444

/* Handle exact repetitions. The count is already in characters, but we

1445

need to skip over a multibyte character in UTF8 mode. */

1446

1447

case OP_EXACT:

1448

branchlength += GET2(cc,1);

1449

cc += 4;

1450

#ifdef SUPPORT_UTF8

1451

if ((options & PCRE_UTF8) != 0)

1452

{

1453

while((*cc & 0x80) == 0x80) cc++;

1454

}

1455

#endif

1456

break;

1457

1458

case OP_TYPEEXACT:

1459

branchlength += GET2(cc,1);

1460

cc += 4;

1461

break;

1462

1463

/* Handle single-char matchers */

1464

1465

case OP_PROP:

1466

case OP_NOTPROP:

1467

cc++;

1468

/* Fall through */

1469

1470

case OP_NOT_DIGIT:

1471

case OP_DIGIT:

1472

case OP_NOT_WHITESPACE:

1473

case OP_WHITESPACE:

1474

case OP_NOT_WORDCHAR:

1475

case OP_WORDCHAR:

1476

case OP_ANY:

1477

branchlength++;

1478

cc++;

1479

break;

1480

1481

/* The single-byte matcher isn't allowed */

1482

1483

case OP_ANYBYTE:

1484

return -2;

1485

1486

/* Check a class for variable quantification */

1487

1488

#ifdef SUPPORT_UTF8

1489

case OP_XCLASS:

1490

cc += GET(cc, 1) - 33;

1491

/* Fall through */

1492

#endif

1493

1494

case OP_CLASS:

1495

case OP_NCLASS:

1496

cc += 33;

1497

1498

switch (*cc)

1499

{

1500

case OP_CRSTAR:

1501

case OP_CRMINSTAR:

1502

case OP_CRQUERY:

1503

case OP_CRMINQUERY:

1504

return -1;

1505

1506

case OP_CRRANGE:

1507

case OP_CRMINRANGE:

1508

if (GET2(cc,1) != GET2(cc,3)) return -1;

1509

branchlength += GET2(cc,1);

1510

cc += 5;

1511

break;

1512

1513

default:

1514

branchlength++;

1515

}

1516

break;

1517

1518

/* Anything else is variable length */

1519

1520

default:

1521

return -1;

1522

}

1523

}

1524

/* Control never gets here */

1525

}

1526

1527

1528

1529

1530

/*************************************************

1531

* Scan compiled regex for numbered bracket *

1532

*************************************************/

1533

1534

/* This little function scans through a compiled pattern until it finds a

1535

capturing bracket with the given number.

1536

1537

Arguments:

1538

code points to start of expression

1539

utf8 TRUE in UTF-8 mode

1540

number the required bracket number

1541

1542

Returns: pointer to the opcode for the bracket, or NULL if not found

1543

1544

1545

static const uschar *

1546

find_bracket(const uschar *code, BOOL utf8, int number)

1547

{

1548

#ifndef SUPPORT_UTF8

1549

utf8 = utf8; /* Stop pedantic compilers complaining */

1550

#endif

1551

1552

for (;;)

1553

{

1554

1555

if (c == OP_END) return NULL;

1556

else if (c > OP_BRA)

1557

{

1558

int n = c - OP_BRA;

1559

if (n > EXTRACT_BASIC_MAX) n = GET2(code, 2+LINK_SIZE);

1560

if (n == number) return (uschar *)code;

1561

code += OP_lengths[OP_BRA];

1562

}

1563

else

1564

{

1565

code += OP_lengths[c];

1566

1567

#ifdef SUPPORT_UTF8

1568

1569

/* In UTF-8 mode, opcodes that are followed by a character may be followed

1570

by a multi-byte character. The length in the table is a minimum, so we have

1571

to scan along to skip the extra bytes. All opcodes are less than 128, so we

1572

can use relatively efficient code. */

1573

1574

if (utf8) switch(c)

1575

{

1576

case OP_CHAR:

1577

case OP_CHARNC:

1578

case OP_EXACT:

1579

case OP_UPTO:

1580

case OP_MINUPTO:

1581

case OP_STAR:

1582

case OP_MINSTAR:

1583

case OP_PLUS:

1584

case OP_MINPLUS:

1585

case OP_QUERY:

1586

case OP_MINQUERY:

1587

while ((*code & 0xc0) == 0x80) code++;

1588

break;

1589

1590

/* XCLASS is used for classes that cannot be represented just by a bit

1591

map. This includes negated single high-valued characters. The length in

1592

the table is zero; the actual length is stored in the compiled code. */

1593

1594

case OP_XCLASS:

1595

code += GET(code, 1) + 1;

1596

break;

1597

}

1598

#endif

1599

}

1600

}

1601

}

1602

1603

1604

1605

/*************************************************

1606

* Scan compiled regex for recursion reference *

1607

*************************************************/

1608

1609

/* This little function scans through a compiled pattern until it finds an

1610

instance of OP_RECURSE.

1611

1612

Arguments:

1613

code points to start of expression

1614

utf8 TRUE in UTF-8 mode

1615

1616

Returns: pointer to the opcode for OP_RECURSE, or NULL if not found

1617

1618

1619

static const uschar *

1620

find_recurse(const uschar *code, BOOL utf8)

1621

{

1622

#ifndef SUPPORT_UTF8

1623

utf8 = utf8; /* Stop pedantic compilers complaining */

1624

#endif

1625

1626

for (;;)

1627

{

1628

1629

if (c == OP_END) return NULL;

1630

else if (c == OP_RECURSE) return code;

1631

else if (c > OP_BRA)

1632

{

1633

code += OP_lengths[OP_BRA];

1634

}

1635

else

1636

{

1637

code += OP_lengths[c];

1638

1639

#ifdef SUPPORT_UTF8

1640

1641

/* In UTF-8 mode, opcodes that are followed by a character may be followed

1642

by a multi-byte character. The length in the table is a minimum, so we have

1643

to scan along to skip the extra bytes. All opcodes are less than 128, so we

1644

can use relatively efficient code. */

1645

1646

if (utf8) switch(c)

1647

{

1648

case OP_CHAR:

1649

case OP_CHARNC:

1650

case OP_EXACT:

1651

case OP_UPTO:

1652

case OP_MINUPTO:

1653

case OP_STAR:

1654

case OP_MINSTAR:

1655

case OP_PLUS:

1656

case OP_MINPLUS:

1657

case OP_QUERY:

1658

case OP_MINQUERY:

1659

while ((*code & 0xc0) == 0x80) code++;

1660

break;

1661

1662

/* XCLASS is used for classes that cannot be represented just by a bit

1663

map. This includes negated single high-valued characters. The length in

1664

the table is zero; the actual length is stored in the compiled code. */

1665

1666

case OP_XCLASS:

1667

code += GET(code, 1) + 1;

1668

break;

1669

}

1670

#endif

1671

}

1672

}

1673

}

1674

1675

1676

1677

/*************************************************

1678

* Scan compiled branch for non-emptiness *

1679

*************************************************/

1680

1681

/* This function scans through a branch of a compiled pattern to see whether it

1682

can match the empty string or not. It is called only from could_be_empty()

1683

below. Note that first_significant_code() skips over assertions. If we hit an

1684

unclosed bracket, we return "empty" - this means we've struck an inner bracket

1685

whose current branch will already have been scanned.

1686

1687

Arguments:

1688

code points to start of search

1689

endcode points to where to stop

1690

utf8 TRUE if in UTF8 mode

1691

1692

Returns: TRUE if what is matched could be empty

1693

1694

1695

static BOOL

1696

could_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8)

1697

{

1698

1699

for (code = first_significant_code(code + 1 + LINK_SIZE, NULL, 0, TRUE);

1700

code < endcode;

1701

code = first_significant_code(code + OP_lengths[c], NULL, 0, TRUE))

1702

{

1703

const uschar *ccode;

1704

1705

c = *code;

1706

1707

if (c >= OP_BRA)

1708

{

1709

BOOL empty_branch;

1710

if (GET(code, 1) == 0) return TRUE; /* Hit unclosed bracket */

1711

1712

/* Scan a closed bracket */

1713

1714

empty_branch = FALSE;

1715

1716

{

1717

if (!empty_branch && could_be_empty_branch(code, endcode, utf8))

1718

empty_branch = TRUE;

1719

code += GET(code, 1);

1720

}

1721

while (*code == OP_ALT);

1722

if (!empty_branch) return FALSE; /* All branches are non-empty */

1723

code += 1 + LINK_SIZE;

1724

c = *code;

1725

}

1726

1727

else switch (c)

1728

{

1729

/* Check for quantifiers after a class */

1730

1731

#ifdef SUPPORT_UTF8

1732

case OP_XCLASS:

1733

ccode = code + GET(code, 1);

1734

goto CHECK_CLASS_REPEAT;

1735

#endif

1736

1737

case OP_CLASS:

1738

case OP_NCLASS:

1739

ccode = code + 33;

1740

1741

#ifdef SUPPORT_UTF8

1742

CHECK_CLASS_REPEAT:

1743

#endif

1744

1745

switch (*ccode)

1746

{

1747

case OP_CRSTAR: /* These could be empty; continue */

1748

case OP_CRMINSTAR:

1749

case OP_CRQUERY:

1750

case OP_CRMINQUERY:

1751

break;

1752

1753

default: /* Non-repeat => class must match */

1754

case OP_CRPLUS: /* These repeats aren't empty */

1755

case OP_CRMINPLUS:

1756

return FALSE;

1757

1758

case OP_CRRANGE:

1759

case OP_CRMINRANGE:

1760

if (GET2(ccode, 1) > 0) return FALSE; /* Minimum > 0 */

1761

break;

1762

}

1763

break;

1764

1765

/* Opcodes that must match a character */

1766

1767

case OP_PROP:

1768

case OP_NOTPROP:

1769

case OP_EXTUNI:

1770

case OP_NOT_DIGIT:

1771

case OP_DIGIT:

1772

case OP_NOT_WHITESPACE:

1773

case OP_WHITESPACE:

1774

case OP_NOT_WORDCHAR:

1775

case OP_WORDCHAR:

1776

case OP_ANY:

1777

case OP_ANYBYTE:

1778

case OP_CHAR:

1779

case OP_CHARNC:

1780

case OP_NOT:

1781

case OP_PLUS:

1782

case OP_MINPLUS:

1783

case OP_EXACT:

1784

case OP_NOTPLUS:

1785

case OP_NOTMINPLUS:

1786

case OP_NOTEXACT:

1787

case OP_TYPEPLUS:

1788

case OP_TYPEMINPLUS:

1789

case OP_TYPEEXACT:

1790

return FALSE;

1791

1792

/* End of branch */

1793

1794

case OP_KET:

1795

case OP_KETRMAX:

1796

case OP_KETRMIN:

1797

case OP_ALT:

1798

return TRUE;

1799

1800

/* In UTF-8 mode, STAR, MINSTAR, QUERY, MINQUERY, UPTO, and MINUPTO may be

1801

followed by a multibyte character */

1802

1803

#ifdef SUPPORT_UTF8

1804

case OP_STAR:

1805

case OP_MINSTAR:

1806

case OP_QUERY:

1807

case OP_MINQUERY:

1808

case OP_UPTO:

1809

case OP_MINUPTO:

1810

if (utf8) while ((code[2] & 0xc0) == 0x80) code++;

1811

break;

1812

#endif

1813

}

1814

}

1815

1816

return TRUE;

1817

}

1818

1819

1820

1821

/*************************************************

1822

* Scan compiled regex for non-emptiness *

1823

*************************************************/

1824

1825

/* This function is called to check for left recursive calls. We want to check

1826

the current branch of the current pattern to see if it could match the empty

1827

string. If it could, we must look outwards for branches at other levels,

1828

stopping when we pass beyond the bracket which is the subject of the recursion.

1829

1830

Arguments:

1831

code points to start of the recursion

1832

endcode points to where to stop (current RECURSE item)

1833

bcptr points to the chain of current (unclosed) branch starts

1834

utf8 TRUE if in UTF-8 mode

1835

1836

Returns: TRUE if what is matched could be empty

1837

1838

1839

static BOOL

1840

could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,

1841

BOOL utf8)

1842

{

1843

while (bcptr != NULL && bcptr->current >= code)

1844

{

1845

if (!could_be_empty_branch(bcptr->current, endcode, utf8)) return FALSE;

1846

bcptr = bcptr->outer;

1847

}

1848

return TRUE;

1849

}

1850

1851

1852

1853

/*************************************************

1854

* Check for POSIX class syntax *

1855

*************************************************/

1856

1857

/* This function is called when the sequence "[:" or "[." or "[=" is

1858

encountered in a character class. It checks whether this is followed by an

1859

optional ^ and then a sequence of letters, terminated by a matching ":]" or

1860

".]" or "=]".

1861

1862

Argument:

1863

ptr pointer to the initial [

1864

endptr where to return the end pointer

1865

cd pointer to compile data

1866

1867

Returns: TRUE or FALSE

1868

1869

1870

static BOOL

1871

check_posix_syntax(const uschar *ptr, const uschar **endptr, compile_data *cd)

1872

{

1873

int terminator; /* Don't combine these lines; the Solaris cc */

1874

terminator = *(++ptr); /* compiler warns about "non-constant" initializer. */

1875

if (*(++ptr) == '^') ptr++;

1876

while ((cd->ctypes[*ptr] & ctype_letter) != 0) ptr++;

1877

if (*ptr == terminator && ptr[1] == ']')

1878

{

1879

*endptr = ptr;

1880

return TRUE;

1881

}

1882

return FALSE;

1883

}

1884

1885

1886

1887

1888

/*************************************************

1889

* Check POSIX class name *

1890

*************************************************/

1891

1892

/* This function is called to check the name given in a POSIX-style class entry

1893

such as [:alnum:].

1894

1895

Arguments:

1896

ptr points to the first letter

1897

len the length of the name

1898

1899

Returns: a value representing the name, or -1 if unknown

1900

1901

1902

static int

1903

check_posix_name(const uschar *ptr, int len)

1904

{

1905

1906

while (posix_name_lengths[yield] != 0)

1907

{

1908

if (len == posix_name_lengths[yield] &&

1909

strncmp((const char *)ptr, posix_names[yield], len) == 0) return yield;

1910

yield++;

1911

}

1912

return -1;

1913

}

1914

1915

1916

/*************************************************

1917

* Adjust OP_RECURSE items in repeated group *

1918

*************************************************/

1919

1920

/* OP_RECURSE items contain an offset from the start of the regex to the group

1921

that is referenced. This means that groups can be replicated for fixed

1922

repetition simply by copying (because the recursion is allowed to refer to

1923

earlier groups that are outside the current group). However, when a group is

1924

optional (i.e. the minimum quantifier is zero), OP_BRAZERO is inserted before

1925

it, after it has been compiled. This means that any OP_RECURSE items within it

1926

that refer to the group itself or any contained groups have to have their

1927

offsets adjusted. That is the job of this function. Before it is called, the

1928

partially compiled regex must be temporarily terminated with OP_END.

1929

1930

Arguments:

1931

group points to the start of the group

1932

adjust the amount by which the group is to be moved

1933

utf8 TRUE in UTF-8 mode

1934

cd contains pointers to tables etc.

1935

1936

Returns: nothing

1937

1938

1939

static void

1940

adjust_recurse(uschar *group, int adjust, BOOL utf8, compile_data *cd)

1941

{

1942

uschar *ptr = group;

1943

while ((ptr = (uschar *)find_recurse(ptr, utf8)) != NULL)

1944

{

1945

int offset = GET(ptr, 1);

1946

if (cd->start_code + offset >= group) PUT(ptr, 1, offset + adjust);

1947

ptr += 1 + LINK_SIZE;

1948

}

1949

}

1950

1951

1952

1953

/*************************************************

1954

* Insert an automatic callout point *

1955

*************************************************/

1956

1957

/* This function is called when the PCRE_AUTO_CALLOUT option is set, to insert

1958

callout points before each pattern item.

1959

1960

Arguments:

1961

code current code pointer

1962

ptr current pattern pointer

1963

cd pointers to tables etc

1964

1965

Returns: new code pointer

1966

1967

1968

static uschar *

1969

auto_callout(uschar *code, const uschar *ptr, compile_data *cd)

1970

{

1971

*code++ = OP_CALLOUT;

1972

*code++ = 255;

1973

PUT(code, 0, ptr - cd->start_pattern); /* Pattern offset */

1974

PUT(code, LINK_SIZE, 0); /* Default length */

1975

return code + 2*LINK_SIZE;

1976

}

1977

1978

1979

1980

/*************************************************

1981

* Complete a callout item *

1982

*************************************************/

1983

1984

/* A callout item contains the length of the next item in the pattern, which

1985

we can't fill in till after we have reached the relevant point. This is used

1986

for both automatic and manual callouts.

1987

1988

Arguments:

1989

previous_callout points to previous callout item

1990

ptr current pattern pointer

1991

cd pointers to tables etc

1992

1993

Returns: nothing

1994

1995

1996

static void

1997

complete_callout(uschar *previous_callout, const uschar *ptr, compile_data *cd)

1998

{

1999

int length = ptr - cd->start_pattern - GET(previous_callout, 2);

2000

PUT(previous_callout, 2 + LINK_SIZE, length);

2001

}

2002

2003

2004

2005

#ifdef SUPPORT_UCP

2006

/*************************************************

2007

* Get othercase range *

2008

*************************************************/

2009

2010

/* This function is passed the start and end of a class range, in UTF-8 mode

2011

with UCP support. It searches up the characters, looking for internal ranges of

2012

characters in the "other" case. Each call returns the next one, updating the

2013

start address.

2014

2015

Arguments:

2016

cptr points to starting character value; updated

2017

d end value

2018

ocptr where to put start of othercase range

2019

odptr where to put end of othercase range

2020

2021

Yield: TRUE when range returned; FALSE when no more

2022

2023

2024

static BOOL

2025

get_othercase_range(int *cptr, int d, int *ocptr, int *odptr)

2026

{

2027

int c, chartype, othercase, next;

2028

2029

for (c = *cptr; c <= d; c++)

2030

{

2031

if (ucp_findchar(c, &chartype, &othercase) == ucp_L && othercase != 0) break;

2032

}

2033

2034

if (c > d) return FALSE;

2035

2036

*ocptr = othercase;

2037

next = othercase + 1;

2038

2039

for (++c; c <= d; c++)

2040

{

2041

if (ucp_findchar(c, &chartype, &othercase) != ucp_L || othercase != next)

2042

break;

2043

next++;

2044

}

2045

2046

*odptr = next - 1;

2047

*cptr = c;

2048

2049

return TRUE;

2050

}

2051

#endif /* SUPPORT_UCP */

2052

2053

2054

/*************************************************

2055

* Compile one branch *

2056

*************************************************/

2057

2058

/* Scan the pattern, compiling it into the code vector. If the options are

2059

changed during the branch, the pointer is used to change the external options

2060

bits.

2061

2062

Arguments:

2063

optionsptr pointer to the option bits

2064

brackets points to number of extracting brackets used

2065

codeptr points to the pointer to the current code point

2066

ptrptr points to the current pattern pointer

2067

errorptr points to pointer to error message

2068

firstbyteptr set to initial literal character, or < 0 (REQ_UNSET, REQ_NONE)

2069

reqbyteptr set to the last literal character required, else < 0

2070

bcptr points to current branch chain

2071

cd contains pointers to tables etc.

2072

2073

Returns: TRUE on success

2074

FALSE, with *errorptr set on error

2075

2076

2077

static BOOL

2078

compile_branch(int *optionsptr, int *brackets, uschar **codeptr,

2079

const uschar **ptrptr, const char **errorptr, int *firstbyteptr,

2080

int *reqbyteptr, branch_chain *bcptr, compile_data *cd)

2081

{

2082

int repeat_type, op_type;

2083

int repeat_min = 0, repeat_max = 0; /* To please picky compilers */

2084

int bravalue = 0;

2085

int greedy_default, greedy_non_default;

2086

int firstbyte, reqbyte;

2087

int zeroreqbyte, zerofirstbyte;

2088

int req_caseopt, reqvary, tempreqvary;

2089

int condcount = 0;

2090

int options = *optionsptr;

2091

int after_manual_callout = 0;

2092

2093

2094

uschar *tempcode;

2095

BOOL inescq = FALSE;

2096

BOOL groupsetfirstbyte = FALSE;

2097

const uschar *ptr = *ptrptr;

2098

const uschar *tempptr;

2099

uschar *previous = NULL;

2100

uschar *previous_callout = NULL;

2101

uschar classbits[32];

2102

2103

#ifdef SUPPORT_UTF8

2104

BOOL class_utf8;

2105

BOOL utf8 = (options & PCRE_UTF8) != 0;

2106

uschar *class_utf8data;

2107

uschar utf8_char[6];

2108

#else

2109

BOOL utf8 = FALSE;

2110

#endif

2111

2112

/* Set up the default and non-default settings for greediness */

2113

2114

greedy_default = ((options & PCRE_UNGREEDY) != 0);

2115

greedy_non_default = greedy_default ^ 1;

2116

2117

/* Initialize no first byte, no required byte. REQ_UNSET means "no char

2118

matching encountered yet". It gets changed to REQ_NONE if we hit something that

2119

matches a non-fixed char first char; reqbyte just remains unset if we never

2120

find one.

2121

2122

When we hit a repeat whose minimum is zero, we may have to adjust these values

2123

to take the zero repeat into account. This is implemented by setting them to

2124

zerofirstbyte and zeroreqbyte when such a repeat is encountered. The individual

2125

item types that can be repeated set these backoff variables appropriately. */

2126

2127

firstbyte = reqbyte = zerofirstbyte = zeroreqbyte = REQ_UNSET;

2128

2129

/* The variable req_caseopt contains either the REQ_CASELESS value or zero,

2130

according to the current setting of the caseless flag. REQ_CASELESS is a bit

2131

value > 255. It is added into the firstbyte or reqbyte variables to record the

2132

case status of the value. This is used only for ASCII characters. */

2133

2134

req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;

2135

2136

/* Switch on next character until the end of the branch */

2137

2138

for (;; ptr++)

2139

{

2140

BOOL negate_class;

2141

BOOL possessive_quantifier;

2142

BOOL is_quantifier;

2143

int class_charcount;

2144

int class_lastchar;

2145

int newoptions;

2146

int recno;

2147

int skipbytes;

2148

int subreqbyte;

2149

int subfirstbyte;

2150

int mclength;

2151

uschar mcbuffer[8];

2152

2153

/* Next byte in the pattern */

2154

2155

c = *ptr;

2156

2157

/* If in \Q...\E, check for the end; if not, we have a literal */

2158

2159

if (inescq && c != 0)

2160

{

2161

if (c == '\\' && ptr[1] == 'E')

2162

{

2163

inescq = FALSE;

2164

ptr++;

2165

continue;

2166

}

2167

else

2168

{

2169

if (previous_callout != NULL)

2170

{

2171

complete_callout(previous_callout, ptr, cd);

2172

previous_callout = NULL;

2173

}

2174

if ((options & PCRE_AUTO_CALLOUT) != 0)

2175

{

2176

previous_callout = code;

2177

code = auto_callout(code, ptr, cd);

2178

}

2179

goto NORMAL_CHAR;

2180

}

2181

}

2182

2183

/* Fill in length of a previous callout, except when the next thing is

2184

a quantifier. */

2185

2186

is_quantifier = c == '*' || c == '+' || c == '?' ||

2187

(c == '{' && is_counted_repeat(ptr+1));

2188

2189

if (!is_quantifier && previous_callout != NULL &&

2190

after_manual_callout-- <= 0)

2191

{

2192

complete_callout(previous_callout, ptr, cd);

2193

previous_callout = NULL;

2194

}

2195

2196

/* In extended mode, skip white space and comments */

2197

2198

if ((options & PCRE_EXTENDED) != 0)

2199

{

2200

if ((cd->ctypes[c] & ctype_space) != 0) continue;

2201

if (c == '#')

2202

{

2203

/* The space before the ; is to avoid a warning on a silly compiler

2204

on the Macintosh. */

2205

while ((c = *(++ptr)) != 0 && c != NEWLINE) ;

2206

if (c != 0) continue; /* Else fall through to handle end of string */

2207

}

2208

}

2209

2210

/* No auto callout for quantifiers. */

2211

2212

if ((options & PCRE_AUTO_CALLOUT) != 0 && !is_quantifier)

2213

{

2214

previous_callout = code;

2215

code = auto_callout(code, ptr, cd);

2216

}

2217

2218

switch(c)

2219

{

2220

/* The branch terminates at end of string, |, or ). */

2221

2222

case 0:

2223

case '|':

2224

case ')':

2225

*firstbyteptr = firstbyte;

2226

*reqbyteptr = reqbyte;

2227

*codeptr = code;

2228

*ptrptr = ptr;

2229

return TRUE;

2230

2231

/* Handle single-character metacharacters. In multiline mode, ^ disables

2232

the setting of any following char as a first character. */

2233

2234

case '^':

2235

if ((options & PCRE_MULTILINE) != 0)

2236

{

2237

if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;

2238

}

2239

previous = NULL;

2240

*code++ = OP_CIRC;

2241

break;

2242

2243

case '$':

2244

previous = NULL;

2245

*code++ = OP_DOLL;

2246

break;

2247

2248

/* There can never be a first char if '.' is first, whatever happens about

2249

repeats. The value of reqbyte doesn't change either. */

2250

2251

case '.':

2252

if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;

2253

zerofirstbyte = firstbyte;

2254

zeroreqbyte = reqbyte;

2255

previous = code;

2256

*code++ = OP_ANY;

2257

break;

2258

2259

/* Character classes. If the included characters are all < 255 in value, we

2260

build a 32-byte bitmap of the permitted characters, except in the special

2261

case where there is only one such character. For negated classes, we build

2262

the map as usual, then invert it at the end. However, we use a different

2263

opcode so that data characters > 255 can be handled correctly.

2264

2265

If the class contains characters outside the 0-255 range, a different

2266

opcode is compiled. It may optionally have a bit map for characters < 256,

2267

but those above are are explicitly listed afterwards. A flag byte tells

2268

whether the bitmap is present, and whether this is a negated class or not.

2269

2270

2271

case '[':

2272

previous = code;

2273

2274

/* PCRE supports POSIX class stuff inside a class. Perl gives an error if

2275

they are encountered at the top level, so we'll do that too. */

2276

2277

if ((ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&

2278

check_posix_syntax(ptr, &tempptr, cd))

2279

{

2280

*errorptr = (ptr[1] == ':')? ERR13 : ERR31;

2281

goto FAILED;

2282

}

2283

2284

/* If the first character is '^', set the negation flag and skip it. */

2285

2286

if ((c = *(++ptr)) == '^')

2287

{

2288

negate_class = TRUE;

2289

c = *(++ptr);

2290

}

2291

else

2292

{

2293

negate_class = FALSE;

2294

}

2295

2296

/* Keep a count of chars with values < 256 so that we can optimize the case

2297

of just a single character (as long as it's < 256). For higher valued UTF-8

2298

characters, we don't yet do any optimization. */

2299

2300

class_charcount = 0;

2301

class_lastchar = -1;

2302

2303

#ifdef SUPPORT_UTF8

2304

class_utf8 = FALSE; /* No chars >= 256 */

2305

class_utf8data = code + LINK_SIZE + 34; /* For UTF-8 items */

2306

#endif

2307

2308

/* Initialize the 32-char bit map to all zeros. We have to build the

2309

map in a temporary bit of store, in case the class contains only 1

2310

character (< 256), because in that case the compiled code doesn't use the

2311

bit map. */

2312

2313

memset(classbits, 0, 32 * sizeof(uschar));

2314

2315

/* Process characters until ] is reached. By writing this as a "do" it

2316

means that an initial ] is taken as a data character. The first pass

2317

through the regex checked the overall syntax, so we don't need to be very

2318

strict here. At the start of the loop, c contains the first byte of the

2319

character. */

2320

2321

2322

{

2323

#ifdef SUPPORT_UTF8

2324

if (utf8 && c > 127)

2325

{ /* Braces are required because the */

2326

GETCHARLEN(c, ptr, ptr); /* macro generates multiple statements */

2327

}

2328

#endif

2329

2330

/* Inside \Q...\E everything is literal except \E */

2331

2332

if (inescq)

2333

{

2334

if (c == '\\' && ptr[1] == 'E')

2335

{

2336

inescq = FALSE;

2337

ptr++;

2338

continue;

2339

}

2340

else goto LONE_SINGLE_CHARACTER;

2341

}

2342

2343

/* Handle POSIX class names. Perl allows a negation extension of the

2344

form [:^name:]. A square bracket that doesn't match the syntax is

2345

treated as a literal. We also recognize the POSIX constructions

2346

[.ch.] and [=ch=] ("collating elements") and fault them, as Perl

2347

5.6 and 5.8 do. */

2348

2349

if (c == '[' &&

2350

(ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&

2351

check_posix_syntax(ptr, &tempptr, cd))

2352

{

2353

BOOL local_negate = FALSE;

2354

int posix_class, i;

2355

2356

2357

if (ptr[1] != ':')

2358

{

2359

*errorptr = ERR31;

2360

goto FAILED;

2361

}

2362

2363

ptr += 2;

2364

if (*ptr == '^')

2365

{

2366

local_negate = TRUE;

2367

ptr++;

2368

}

2369

2370

posix_class = check_posix_name(ptr, tempptr - ptr);

2371

if (posix_class < 0)

2372

{

2373

*errorptr = ERR30;

2374

goto FAILED;

2375

}

2376

2377

/* If matching is caseless, upper and lower are converted to

2378

alpha. This relies on the fact that the class table starts with

2379

alpha, lower, upper as the first 3 entries. */

2380

2381

if ((options & PCRE_CASELESS) != 0 && posix_class <= 2)

2382

posix_class = 0;

2383

2384

/* Or into the map we are building up to 3 of the static class

2385

tables, or their negations. The [:blank:] class sets up the same

2386

chars as the [:space:] class (all white space). We remove the vertical

2387

white space chars afterwards. */

2388

2389

posix_class *= 3;

2390

for (i = 0; i < 3; i++)

2391

{

2392

BOOL blankclass = strncmp((char *)ptr, "blank", 5) == 0;

2393

int taboffset = posix_class_maps[posix_class + i];

2394

if (taboffset < 0) break;

2395

if (local_negate)

2396

{

2397

if (i == 0)

2398

for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+taboffset];

2399

else

2400

for (c = 0; c < 32; c++) classbits[c] &= ~cbits[c+taboffset];

2401

if (blankclass) classbits[1] |= 0x3c;

2402

}

2403

else

2404

{

2405

for (c = 0; c < 32; c++) classbits[c] |= cbits[c+taboffset];

2406

if (blankclass) classbits[1] &= ~0x3c;

2407

}

2408

}

2409

2410

ptr = tempptr + 1;

2411

class_charcount = 10; /* Set > 1; assumes more than 1 per class */

2412

continue; /* End of POSIX syntax handling */

2413

}

2414

2415

/* Backslash may introduce a single character, or it may introduce one

2416

of the specials, which just set a flag. Escaped items are checked for

2417

validity in the pre-compiling pass. The sequence \b is a special case.

2418

Inside a class (and only there) it is treated as backspace. Elsewhere

2419

it marks a word boundary. Other escapes have preset maps ready to

2420

or into the one we are building. We assume they have more than one

2421

character in them, so set class_charcount bigger than one. */

2422

2423

if (c == '\\')

2424

{

2425

c = check_escape(&ptr, errorptr, *brackets, options, TRUE);

2426

2427

if (-c == ESC_b) c = '\b'; /* \b is backslash in a class */

2428

else if (-c == ESC_X) c = 'X'; /* \X is literal X in a class */

2429

else if (-c == ESC_Q) /* Handle start of quoted string */

2430

{

2431

if (ptr[1] == '\\' && ptr[2] == 'E')

2432

{

2433

ptr += 2; /* avoid empty string */

2434

}

2435

else inescq = TRUE;

2436

continue;

2437

}

2438

2439

if (c < 0)

2440

{

2441

2442

class_charcount += 2; /* Greater than 1 is what matters */

2443

switch (-c)

2444

{

2445

case ESC_d:

2446

for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_digit];

2447

continue;

2448

2449

case ESC_D:

2450

for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit];

2451

continue;

2452

2453

case ESC_w:

2454

for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_word];

2455

continue;

2456

2457

case ESC_W:

2458

for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word];

2459

continue;

2460

2461

case ESC_s:

2462

for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_space];

2463

classbits[1] &= ~0x08; /* Perl 5.004 onwards omits VT from \s */

2464

continue;

2465

2466

case ESC_S:

2467

for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space];

2468

classbits[1] |= 0x08; /* Perl 5.004 onwards omits VT from \s */

2469

continue;

2470

2471

#ifdef SUPPORT_UCP

2472

case ESC_p:

2473

case ESC_P:

2474

{

2475

BOOL negated;

2476

int property = get_ucp(&ptr, &negated, errorptr);

2477

if (property < 0) goto FAILED;

2478

class_utf8 = TRUE;

2479

*class_utf8data++ = ((-c == ESC_p) != negated)?

2480

XCL_PROP : XCL_NOTPROP;

2481

*class_utf8data++ = property;

2482

class_charcount -= 2; /* Not a < 256 character */

2483

}

2484

continue;

2485

#endif

2486

2487

/* Unrecognized escapes are faulted if PCRE is running in its

2488

strict mode. By default, for compatibility with Perl, they are

2489

treated as literals. */

2490

2491

default:

2492

if ((options & PCRE_EXTRA) != 0)

2493

{

2494

*errorptr = ERR7;

2495

goto FAILED;

2496

}

2497

c = *ptr; /* The final character */

2498

class_charcount -= 2; /* Undo the default count from above */

2499

}

2500

}

2501

2502

/* Fall through if we have a single character (c >= 0). This may be

2503

> 256 in UTF-8 mode. */

2504

2505

} /* End of backslash handling */

2506

2507

/* A single character may be followed by '-' to form a range. However,

2508

Perl does not permit ']' to be the end of the range. A '-' character

2509

here is treated as a literal. */

2510

2511

if (ptr[1] == '-' && ptr[2] != ']')

2512

{

2513

int d;

2514

ptr += 2;

2515

2516

#ifdef SUPPORT_UTF8

2517

if (utf8)

2518

{ /* Braces are required because the */

2519

GETCHARLEN(d, ptr, ptr); /* macro generates multiple statements */

2520

}

2521

else

2522

#endif

2523

d = *ptr; /* Not UTF-8 mode */

2524

2525

/* The second part of a range can be a single-character escape, but

2526

not any of the other escapes. Perl 5.6 treats a hyphen as a literal

2527

in such circumstances. */

2528

2529

if (d == '\\')

2530

{

2531

const uschar *oldptr = ptr;

2532

d = check_escape(&ptr, errorptr, *brackets, options, TRUE);

2533

2534

/* \b is backslash; \X is literal X; any other special means the '-'

2535

was literal */

2536

2537

if (d < 0)

2538

{

2539

if (d == -ESC_b) d = '\b';

2540

else if (d == -ESC_X) d = 'X'; else

2541

{

2542

ptr = oldptr - 2;

2543

goto LONE_SINGLE_CHARACTER; /* A few lines below */

2544

}

2545

}

2546

}

2547

2548

/* The check that the two values are in the correct order happens in

2549

the pre-pass. Optimize one-character ranges */

2550

2551

if (d == c) goto LONE_SINGLE_CHARACTER; /* A few lines below */

2552

2553

/* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless

2554

matching, we have to use an XCLASS with extra data items. Caseless

2555

matching for characters > 127 is available only if UCP support is

2556

available. */

2557

2558

#ifdef SUPPORT_UTF8

2559

if (utf8 && (d > 255 || ((options & PCRE_CASELESS) != 0 && d > 127)))

2560

{

2561

class_utf8 = TRUE;

2562

2563

/* With UCP support, we can find the other case equivalents of

2564

the relevant characters. There may be several ranges. Optimize how

2565

they fit with the basic range. */

2566

2567

#ifdef SUPPORT_UCP

2568

if ((options & PCRE_CASELESS) != 0)

2569

{

2570

int occ, ocd;

2571

int cc = c;

2572

int origd = d;

2573

while (get_othercase_range(&cc, origd, &occ, &ocd))

2574

{

2575

if (occ >= c && ocd <= d) continue; /* Skip embedded ranges */

2576

2577

if (occ < c && ocd >= c - 1) /* Extend the basic range */

2578

{ /* if there is overlap, */

2579

c = occ; /* noting that if occ < c */

2580

continue; /* we can't have ocd > d */

2581

} /* because a subrange is */

2582

if (ocd > d && occ <= d + 1) /* always shorter than */

2583

{ /* the basic range. */

2584

d = ocd;

2585

continue;

2586

}

2587

2588

if (occ == ocd)

2589

{

2590

*class_utf8data++ = XCL_SINGLE;

2591

}

2592

else

2593

{

2594

*class_utf8data++ = XCL_RANGE;

2595

class_utf8data += ord2utf8(occ, class_utf8data);

2596

}

2597

class_utf8data += ord2utf8(ocd, class_utf8data);

2598

}

2599

}

2600

#endif /* SUPPORT_UCP */

2601

2602

/* Now record the original range, possibly modified for UCP caseless

2603

overlapping ranges. */

2604

2605

*class_utf8data++ = XCL_RANGE;

2606

class_utf8data += ord2utf8(c, class_utf8data);

2607

class_utf8data += ord2utf8(d, class_utf8data);

2608

2609

/* With UCP support, we are done. Without UCP support, there is no

2610

caseless matching for UTF-8 characters > 127; we can use the bit map

2611

for the smaller ones. */

2612

2613

#ifdef SUPPORT_UCP

2614

continue; /* With next character in the class */

2615

#else

2616

if ((options & PCRE_CASELESS) == 0 || c > 127) continue;

2617

2618

/* Adjust upper limit and fall through to set up the map */

2619

2620

d = 127;

2621

2622

#endif /* SUPPORT_UCP */

2623

}

2624

#endif /* SUPPORT_UTF8 */

2625

2626

/* We use the bit map for all cases when not in UTF-8 mode; else

2627

ranges that lie entirely within 0-127 when there is UCP support; else

2628

for partial ranges without UCP support. */

2629

2630

for (; c <= d; c++)

2631

{

2632

classbits[c/8] |= (1 << (c&7));

2633

if ((options & PCRE_CASELESS) != 0)

2634

{

2635

int uc = cd->fcc[c]; /* flip case */

2636

classbits[uc/8] |= (1 << (uc&7));

2637

}

2638

class_charcount++; /* in case a one-char range */

2639

class_lastchar = c;

2640

}

2641

2642

continue; /* Go get the next char in the class */

2643

}

2644

2645

/* Handle a lone single character - we can get here for a normal

2646

non-escape char, or after \ that introduces a single character or for an

2647

apparent range that isn't. */

2648

2649

LONE_SINGLE_CHARACTER:

2650

2651

/* Handle a character that cannot go in the bit map */

2652

2653

#ifdef SUPPORT_UTF8

2654

if (utf8 && (c > 255 || ((options & PCRE_CASELESS) != 0 && c > 127)))

2655

{

2656

class_utf8 = TRUE;

2657

*class_utf8data++ = XCL_SINGLE;

2658

class_utf8data += ord2utf8(c, class_utf8data);

2659

2660

#ifdef SUPPORT_UCP

2661

if ((options & PCRE_CASELESS) != 0)

2662

{

2663

int chartype;

2664

int othercase;

2665

if (ucp_findchar(c, &chartype, &othercase) >= 0 && othercase > 0)

2666

{

2667

*class_utf8data++ = XCL_SINGLE;

2668

class_utf8data += ord2utf8(othercase, class_utf8data);

2669

}

2670

}

2671

#endif /* SUPPORT_UCP */

2672

2673

}

2674

else

2675

#endif /* SUPPORT_UTF8 */

2676

2677

/* Handle a single-byte character */

2678

{

2679

classbits[c/8] |= (1 << (c&7));

2680

if ((options & PCRE_CASELESS) != 0)

2681

{

2682

c = cd->fcc[c]; /* flip case */

2683

classbits[c/8] |= (1 << (c&7));

2684

}

2685

class_charcount++;

2686

class_lastchar = c;

2687

}

2688

}

2689

2690

/* Loop until ']' reached; the check for end of string happens inside the

2691

loop. This "while" is the end of the "do" above. */

2692

2693

while ((c = *(++ptr)) != ']' || inescq);

2694

2695

/* If class_charcount is 1, we saw precisely one character whose value is

2696

less than 256. In non-UTF-8 mode we can always optimize. In UTF-8 mode, we

2697

can optimize the negative case only if there were no characters >= 128

2698

because OP_NOT and the related opcodes like OP_NOTSTAR operate on

2699

single-bytes only. This is an historical hangover. Maybe one day we can

2700

tidy these opcodes to handle multi-byte characters.

2701

2702

The optimization throws away the bit map. We turn the item into a

2703

1-character OP_CHAR[NC] if it's positive, or OP_NOT if it's negative. Note

2704

that OP_NOT does not support multibyte characters. In the positive case, it

2705

can cause firstbyte to be set. Otherwise, there can be no first char if

2706

this item is first, whatever repeat count may follow. In the case of

2707

reqbyte, save the previous value for reinstating. */

2708

2709

#ifdef SUPPORT_UTF8

2710

if (class_charcount == 1 &&

2711

(!utf8 ||

2712

(!class_utf8 && (!negate_class || class_lastchar < 128))))

2713

2714

#else

2715

if (class_charcount == 1)

2716

#endif

2717

{

2718

zeroreqbyte = reqbyte;

2719

2720

/* The OP_NOT opcode works on one-byte characters only. */

2721

2722

if (negate_class)

2723

{

2724

if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;

2725

zerofirstbyte = firstbyte;

2726

*code++ = OP_NOT;

2727

*code++ = class_lastchar;

2728

break;

2729

}

2730

2731

/* For a single, positive character, get the value into mcbuffer, and

2732

then we can handle this with the normal one-character code. */

2733

2734

#ifdef SUPPORT_UTF8

2735

if (utf8 && class_lastchar > 127)

2736

mclength = ord2utf8(class_lastchar, mcbuffer);

2737

else

2738

#endif

2739

{

2740

mcbuffer[0] = class_lastchar;

2741

mclength = 1;

2742

}

2743

goto ONE_CHAR;

2744

} /* End of 1-char optimization */

2745

2746

/* The general case - not the one-char optimization. If this is the first

2747

thing in the branch, there can be no first char setting, whatever the

2748

repeat count. Any reqbyte setting must remain unchanged after any kind of

2749

repeat. */

2750

2751

if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;

2752

zerofirstbyte = firstbyte;

2753

zeroreqbyte = reqbyte;

2754

2755

/* If there are characters with values > 255, we have to compile an

2756

extended class, with its own opcode. If there are no characters < 256,

2757

we can omit the bitmap. */

2758

2759

#ifdef SUPPORT_UTF8

2760

if (class_utf8)

2761

{

2762

*class_utf8data++ = XCL_END; /* Marks the end of extra data */

2763

*code++ = OP_XCLASS;

2764

code += LINK_SIZE;

2765

*code = negate_class? XCL_NOT : 0;

2766

2767

/* If the map is required, install it, and move on to the end of

2768

the extra data */

2769

2770

if (class_charcount > 0)

2771

{

2772

*code++ |= XCL_MAP;

2773

memcpy(code, classbits, 32);

2774

code = class_utf8data;

2775

}

2776

2777

/* If the map is not required, slide down the extra data. */

2778

2779

else

2780

{

2781

int len = class_utf8data - (code + 33);

2782

memmove(code + 1, code + 33, len);

2783

code += len + 1;

2784

}

2785

2786

/* Now fill in the complete length of the item */

2787

2788

PUT(previous, 1, code - previous);

2789

break; /* End of class handling */

2790

}

2791

#endif

2792

2793

/* If there are no characters > 255, negate the 32-byte map if necessary,

2794

and copy it into the code vector. If this is the first thing in the branch,

2795

there can be no first char setting, whatever the repeat count. Any reqbyte

2796

setting must remain unchanged after any kind of repeat. */

2797

2798

if (negate_class)

2799

{

2800

*code++ = OP_NCLASS;

2801

for (c = 0; c < 32; c++) code[c] = ~classbits[c];

2802

}

2803

else

2804

{

2805

*code++ = OP_CLASS;

2806

memcpy(code, classbits, 32);

2807

}

2808

code += 32;

2809

break;

2810

2811

/* Various kinds of repeat; '{' is not necessarily a quantifier, but this

2812

has been tested above. */

2813

2814

case '{':

2815

if (!is_quantifier) goto NORMAL_CHAR;

2816

ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr);

2817

if (*errorptr != NULL) goto FAILED;

2818

goto REPEAT;

2819

2820

case '*':

2821

repeat_min = 0;

2822

repeat_max = -1;

2823

goto REPEAT;

2824

2825

case '+':

2826

repeat_min = 1;

2827

repeat_max = -1;

2828

goto REPEAT;

2829

2830

case '?':

2831

repeat_min = 0;

2832

repeat_max = 1;

2833

2834

REPEAT:

2835

if (previous == NULL)

2836

{

2837

*errorptr = ERR9;

2838

goto FAILED;

2839

}

2840

2841

if (repeat_min == 0)

2842

{

2843

firstbyte = zerofirstbyte; /* Adjust for zero repeat */

2844

reqbyte = zeroreqbyte; /* Ditto */

2845

}

2846

2847

/* Remember whether this is a variable length repeat */

2848

2849

reqvary = (repeat_min == repeat_max)? 0 : REQ_VARY;

2850

2851

op_type = 0; /* Default single-char op codes */

2852

possessive_quantifier = FALSE; /* Default not possessive quantifier */

2853

2854

/* Save start of previous item, in case we have to move it up to make space

2855

for an inserted OP_ONCE for the additional '+' extension. */

2856

2857

tempcode = previous;

2858

2859

/* If the next character is '+', we have a possessive quantifier. This

2860

implies greediness, whatever the setting of the PCRE_UNGREEDY option.

2861

If the next character is '?' this is a minimizing repeat, by default,

2862

but if PCRE_UNGREEDY is set, it works the other way round. We change the

2863

repeat type to the non-default. */

2864

2865

if (ptr[1] == '+')

2866

{

2867

repeat_type = 0; /* Force greedy */

2868

possessive_quantifier = TRUE;

2869

ptr++;

2870

}

2871

else if (ptr[1] == '?')

2872

{

2873

repeat_type = greedy_non_default;

2874

ptr++;

2875

}

2876

else repeat_type = greedy_default;

2877

2878

/* If previous was a recursion, we need to wrap it inside brackets so that

2879

it can be replicated if necessary. */

2880

2881

if (*previous == OP_RECURSE)

2882

{

2883

memmove(previous + 1 + LINK_SIZE, previous, 1 + LINK_SIZE);

2884

code += 1 + LINK_SIZE;

2885

*previous = OP_BRA;

2886

PUT(previous, 1, code - previous);

2887

*code = OP_KET;

2888

PUT(code, 1, code - previous);

2889

code += 1 + LINK_SIZE;

2890

}

2891

2892

/* If previous was a character match, abolish the item and generate a

2893

repeat item instead. If a char item has a minumum of more than one, ensure

2894

that it is set in reqbyte - it might not be if a sequence such as x{3} is

2895

the first thing in a branch because the x will have gone into firstbyte

2896

instead. */

2897

2898

if (*previous == OP_CHAR || *previous == OP_CHARNC)

2899

{

2900

/* Deal with UTF-8 characters that take up more than one byte. It's

2901

easier to write this out separately than try to macrify it. Use c to

2902

hold the length of the character in bytes, plus 0x80 to flag that it's a

2903

length rather than a small character. */

2904

2905

#ifdef SUPPORT_UTF8

2906

if (utf8 && (code[-1] & 0x80) != 0)

2907

{

2908

uschar *lastchar = code - 1;

2909

while((*lastchar & 0xc0) == 0x80) lastchar--;

2910

c = code - lastchar; /* Length of UTF-8 character */

2911

memcpy(utf8_char, lastchar, c); /* Save the char */

2912

c |= 0x80; /* Flag c as a length */

2913

}

2914

else

2915

#endif

2916

2917

/* Handle the case of a single byte - either with no UTF8 support, or

2918

with UTF-8 disabled, or for a UTF-8 character < 128. */

2919

2920

{

2921

c = code[-1];

2922

if (repeat_min > 1) reqbyte = c | req_caseopt | cd->req_varyopt;

2923

}

2924

2925

goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */

2926

}

2927

2928

/* If previous was a single negated character ([^a] or similar), we use

2929

one of the special opcodes, replacing it. The code is shared with single-

2930

character repeats by setting opt_type to add a suitable offset into

2931

repeat_type. OP_NOT is currently used only for single-byte chars. */

2932

2933

else if (*previous == OP_NOT)

2934

{

2935

op_type = OP_NOTSTAR - OP_STAR; /* Use "not" opcodes */

2936

c = previous[1];

2937

goto OUTPUT_SINGLE_REPEAT;

2938

}

2939

2940

/* If previous was a character type match (\d or similar), abolish it and

2941

create a suitable repeat item. The code is shared with single-character

2942

repeats by setting op_type to add a suitable offset into repeat_type. Note

2943

the the Unicode property types will be present only when SUPPORT_UCP is

2944

defined, but we don't wrap the little bits of code here because it just

2945

makes it horribly messy. */

2946

2947

else if (*previous < OP_EODN)

2948

{

2949

uschar *oldcode;

2950

int prop_type;

2951

op_type = OP_TYPESTAR - OP_STAR; /* Use type opcodes */

2952

c = *previous;

2953

2954

OUTPUT_SINGLE_REPEAT:

2955

prop_type = (*previous == OP_PROP || *previous == OP_NOTPROP)?

2956

previous[1] : -1;

2957

2958

oldcode = code;

2959

code = previous; /* Usually overwrite previous item */

2960

2961

/* If the maximum is zero then the minimum must also be zero; Perl allows

2962

this case, so we do too - by simply omitting the item altogether. */

2963

2964

if (repeat_max == 0) goto END_REPEAT;

2965

2966

/* All real repeats make it impossible to handle partial matching (maybe

2967

one day we will be able to remove this restriction). */

2968

2969

if (repeat_max != 1) cd->nopartial = TRUE;

2970

2971

/* Combine the op_type with the repeat_type */

2972

2973

repeat_type += op_type;

2974

2975

/* A minimum of zero is handled either as the special case * or ?, or as

2976

an UPTO, with the maximum given. */

2977

2978

if (repeat_min == 0)

2979

{

2980

if (repeat_max == -1) *code++ = OP_STAR + repeat_type;

2981

else if (repeat_max == 1) *code++ = OP_QUERY + repeat_type;

2982

else

2983

{

2984

*code++ = OP_UPTO + repeat_type;

2985

PUT2INC(code, 0, repeat_max);

2986

}

2987

}

2988

2989

/* A repeat minimum of 1 is optimized into some special cases. If the

2990

maximum is unlimited, we use OP_PLUS. Otherwise, the original item it

2991

left in place and, if the maximum is greater than 1, we use OP_UPTO with

2992

one less than the maximum. */

2993

2994

else if (repeat_min == 1)

2995

{

2996

if (repeat_max == -1)

2997

*code++ = OP_PLUS + repeat_type;

2998

else

2999

{

3000

code = oldcode; /* leave previous item in place */

3001

if (repeat_max == 1) goto END_REPEAT;

3002

*code++ = OP_UPTO + repeat_type;

3003

PUT2INC(code, 0, repeat_max - 1);

3004

}

3005

}

3006

3007

/* The case {n,n} is just an EXACT, while the general case {n,m} is

3008

handled as an EXACT followed by an UPTO. */

3009

3010

else

3011

{

3012

*code++ = OP_EXACT + op_type; /* NB EXACT doesn't have repeat_type */

3013

PUT2INC(code, 0, repeat_min);

3014

3015

/* If the maximum is unlimited, insert an OP_STAR. Before doing so,

3016

we have to insert the character for the previous code. For a repeated

3017

Unicode property match, there is an extra byte that defines the

3018

required property. In UTF-8 mode, long characters have their length in

3019

c, with the 0x80 bit as a flag. */

3020

3021

if (repeat_max < 0)

3022

{

3023

#ifdef SUPPORT_UTF8

3024

if (utf8 && c >= 128)

3025

{

3026

memcpy(code, utf8_char, c & 7);

3027

code += c & 7;

3028

}

3029

else

3030

#endif

3031

{

3032

*code++ = c;

3033

if (prop_type >= 0) *code++ = prop_type;

3034

}

3035

*code++ = OP_STAR + repeat_type;

3036

}

3037

3038

/* Else insert an UPTO if the max is greater than the min, again

3039

preceded by the character, for the previously inserted code. */

3040

3041

else if (repeat_max != repeat_min)

3042

{

3043

#ifdef SUPPORT_UTF8

3044

if (utf8 && c >= 128)

3045

{

3046

memcpy(code, utf8_char, c & 7);

3047

code += c & 7;

3048

}

3049

else

3050

#endif

3051

*code++ = c;

3052

if (prop_type >= 0) *code++ = prop_type;

3053

repeat_max -= repeat_min;

3054

*code++ = OP_UPTO + repeat_type;

3055

PUT2INC(code, 0, repeat_max);

3056

}

3057

}

3058

3059

/* The character or character type itself comes last in all cases. */

3060

3061

#ifdef SUPPORT_UTF8

3062

if (utf8 && c >= 128)

3063

{

3064

memcpy(code, utf8_char, c & 7);

3065

code += c & 7;

3066

}

3067

else

3068

#endif

3069

*code++ = c;

3070

3071

/* For a repeated Unicode property match, there is an extra byte that

3072

defines the required property. */

3073

3074

#ifdef SUPPORT_UCP

3075

if (prop_type >= 0) *code++ = prop_type;

3076

#endif

3077

}

3078

3079

/* If previous was a character class or a back reference, we put the repeat

3080

stuff after it, but just skip the item if the repeat was {0,0}. */

3081

3082

else if (*previous == OP_CLASS ||

3083

*previous == OP_NCLASS ||

3084

#ifdef SUPPORT_UTF8

3085

*previous == OP_XCLASS ||

3086

#endif

3087

*previous == OP_REF)

3088

{

3089

if (repeat_max == 0)

3090

{

3091

code = previous;

3092

goto END_REPEAT;

3093

}

3094

3095

/* All real repeats make it impossible to handle partial matching (maybe

3096

one day we will be able to remove this restriction). */

3097

3098

if (repeat_max != 1) cd->nopartial = TRUE;

3099

3100

if (repeat_min == 0 && repeat_max == -1)

3101

*code++ = OP_CRSTAR + repeat_type;

3102

else if (repeat_min == 1 && repeat_max == -1)

3103

*code++ = OP_CRPLUS + repeat_type;

3104

else if (repeat_min == 0 && repeat_max == 1)

3105

*code++ = OP_CRQUERY + repeat_type;

3106

else

3107

{

3108

*code++ = OP_CRRANGE + repeat_type;

3109

PUT2INC(code, 0, repeat_min);

3110

if (repeat_max == -1) repeat_max = 0; /* 2-byte encoding for max */

3111

PUT2INC(code, 0, repeat_max);

3112

}

3113

}

3114

3115

/* If previous was a bracket group, we may have to replicate it in certain

3116

cases. */

3117

3118

else if (*previous >= OP_BRA || *previous == OP_ONCE ||

3119

*previous == OP_COND)

3120

{

3121

3122

int ketoffset = 0;

3123

int len = code - previous;

3124

uschar *bralink = NULL;

3125

3126

/* If the maximum repeat count is unlimited, find the end of the bracket

3127

by scanning through from the start, and compute the offset back to it

3128

from the current code pointer. There may be an OP_OPT setting following

3129

the final KET, so we can't find the end just by going back from the code

3130

pointer. */

3131

3132

if (repeat_max == -1)

3133

{

3134

3135

do ket += GET(ket, 1); while (*ket != OP_KET);

3136

ketoffset = code - ket;

3137

}

3138

3139

/* The case of a zero minimum is special because of the need to stick

3140

OP_BRAZERO in front of it, and because the group appears once in the

3141

data, whereas in other cases it appears the minimum number of times. For

3142

this reason, it is simplest to treat this case separately, as otherwise

3143

the code gets far too messy. There are several special subcases when the

3144

minimum is zero. */

3145

3146

if (repeat_min == 0)

3147

{

3148

/* If the maximum is also zero, we just omit the group from the output

3149

altogether. */

3150

3151

if (repeat_max == 0)

3152

{

3153

code = previous;

3154

goto END_REPEAT;

3155

}

3156

3157

/* If the maximum is 1 or unlimited, we just have to stick in the

3158

BRAZERO and do no more at this point. However, we do need to adjust

3159

any OP_RECURSE calls inside the group that refer to the group itself or

3160

any internal group, because the offset is from the start of the whole

3161

regex. Temporarily terminate the pattern while doing this. */

3162

3163

if (repeat_max <= 1)

3164

{

3165

*code = OP_END;

3166

adjust_recurse(previous, 1, utf8, cd);

3167

memmove(previous+1, previous, len);

3168

code++;

3169

*previous++ = OP_BRAZERO + repeat_type;

3170

}

3171

3172

/* If the maximum is greater than 1 and limited, we have to replicate

3173

in a nested fashion, sticking OP_BRAZERO before each set of brackets.

3174

The first one has to be handled carefully because it's the original

3175

copy, which has to be moved up. The remainder can be handled by code

3176

that is common with the non-zero minimum case below. We have to

3177

adjust the value or repeat_max, since one less copy is required. Once

3178

again, we may have to adjust any OP_RECURSE calls inside the group. */

3179

3180

else

3181

{

3182

int offset;

3183

*code = OP_END;

3184

adjust_recurse(previous, 2 + LINK_SIZE, utf8, cd);

3185

memmove(previous + 2 + LINK_SIZE, previous, len);

3186

code += 2 + LINK_SIZE;

3187

*previous++ = OP_BRAZERO + repeat_type;

3188

*previous++ = OP_BRA;

3189

3190

/* We chain together the bracket offset fields that have to be

3191

filled in later when the ends of the brackets are reached. */

3192

3193

offset = (bralink == NULL)? 0 : previous - bralink;

3194

bralink = previous;

3195

PUTINC(previous, 0, offset);

3196

}

3197

3198

repeat_max--;

3199

}

3200

3201

/* If the minimum is greater than zero, replicate the group as many

3202

times as necessary, and adjust the maximum to the number of subsequent

3203

copies that we need. If we set a first char from the group, and didn't

3204

set a required char, copy the latter from the former. */

3205

3206

else

3207

{

3208

if (repeat_min > 1)

3209

{

3210

if (groupsetfirstbyte && reqbyte < 0) reqbyte = firstbyte;

3211

for (i = 1; i < repeat_min; i++)

3212

{

3213

memcpy(code, previous, len);

3214

code += len;

3215

}

3216

}

3217

if (repeat_max > 0) repeat_max -= repeat_min;

3218

}

3219

3220

/* This code is common to both the zero and non-zero minimum cases. If

3221

the maximum is limited, it replicates the group in a nested fashion,

3222

remembering the bracket starts on a stack. In the case of a zero minimum,

3223

the first one was set up above. In all cases the repeat_max now specifies

3224

the number of additional copies needed. */

3225

3226

if (repeat_max >= 0)

3227

{

3228

for (i = repeat_max - 1; i >= 0; i--)

3229

{

3230

*code++ = OP_BRAZERO + repeat_type;

3231

3232

/* All but the final copy start a new nesting, maintaining the

3233

chain of brackets outstanding. */

3234

3235

if (i != 0)

3236

{

3237

int offset;

3238

*code++ = OP_BRA;

3239

offset = (bralink == NULL)? 0 : code - bralink;

3240

bralink = code;

3241

PUTINC(code, 0, offset);

3242

}

3243

3244

memcpy(code, previous, len);

3245

code += len;

3246

}

3247

3248

/* Now chain through the pending brackets, and fill in their length

3249

fields (which are holding the chain links pro tem). */

3250

3251

while (bralink != NULL)

3252

{

3253

int oldlinkoffset;

3254

int offset = code - bralink + 1;

3255

uschar *bra = code - offset;

3256

oldlinkoffset = GET(bra, 1);

3257

bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;

3258

*code++ = OP_KET;

3259

PUTINC(code, 0, offset);

3260

PUT(bra, 1, offset);

3261

}

3262

}

3263

3264

/* If the maximum is unlimited, set a repeater in the final copy. We

3265

can't just offset backwards from the current code point, because we

3266

don't know if there's been an options resetting after the ket. The

3267

correct offset was computed above. */

3268

3269

else code[-ketoffset] = OP_KETRMAX + repeat_type;

3270

}

3271

3272

/* Else there's some kind of shambles */

3273

3274

else

3275

{

3276

*errorptr = ERR11;

3277

goto FAILED;

3278

}

3279

3280

/* If the character following a repeat is '+', we wrap the entire repeated

3281

item inside OP_ONCE brackets. This is just syntactic sugar, taken from

3282

Sun's Java package. The repeated item starts at tempcode, not at previous,

3283

which might be the first part of a string whose (former) last char we

3284

repeated. However, we don't support '+' after a greediness '?'. */

3285

3286

if (possessive_quantifier)

3287

{

3288

int len = code - tempcode;

3289

memmove(tempcode + 1+LINK_SIZE, tempcode, len);

3290

code += 1 + LINK_SIZE;

3291

len += 1 + LINK_SIZE;

3292

tempcode[0] = OP_ONCE;

3293

*code++ = OP_KET;

3294

PUTINC(code, 0, len);

3295

PUT(tempcode, 1, len);

3296

}

3297

3298

/* In all case we no longer have a previous item. We also set the

3299

"follows varying string" flag for subsequently encountered reqbytes if

3300

it isn't already set and we have just passed a varying length item. */

3301

3302

END_REPEAT:

3303

previous = NULL;

3304

cd->req_varyopt |= reqvary;

3305

break;

3306

3307

3308

/* Start of nested bracket sub-expression, or comment or lookahead or

3309

lookbehind or option setting or condition. First deal with special things

3310

that can come after a bracket; all are introduced by ?, and the appearance

3311

of any of them means that this is not a referencing group. They were

3312

checked for validity in the first pass over the string, so we don't have to

3313

check for syntax errors here. */

3314

3315

case '(':

3316

newoptions = options;

3317

skipbytes = 0;

3318

3319

if (*(++ptr) == '?')

3320

{

3321

int set, unset;

3322

int *optset;

3323

3324

switch (*(++ptr))

3325

{

3326

case '#': /* Comment; skip to ket */

3327

ptr++;

3328

while (*ptr != ')') ptr++;

3329

continue;

3330

3331

case ':': /* Non-extracting bracket */

3332

bravalue = OP_BRA;

3333

ptr++;

3334

break;

3335

3336

case '(':

3337

bravalue = OP_COND; /* Conditional group */

3338

3339

/* Condition to test for recursion */

3340

3341

if (ptr[1] == 'R')

3342

{

3343

code[1+LINK_SIZE] = OP_CREF;

3344

PUT2(code, 2+LINK_SIZE, CREF_RECURSE);

3345

skipbytes = 3;

3346

ptr += 3;

3347

}

3348

3349

/* Condition to test for a numbered subpattern match. We know that

3350

if a digit follows ( then there will just be digits until ) because

3351

the syntax was checked in the first pass. */

3352

3353

else if ((digitab[ptr[1]] && ctype_digit) != 0)

3354

{

3355

int condref; /* Don't amalgamate; some compilers */

3356

condref = *(++ptr) - '0'; /* grumble at autoincrement in declaration */

3357

while (*(++ptr) != ')') condref = condref*10 + *ptr - '0';

3358

if (condref == 0)

3359

{

3360

*errorptr = ERR35;

3361

goto FAILED;

3362

}

3363

ptr++;

3364

code[1+LINK_SIZE] = OP_CREF;

3365

PUT2(code, 2+LINK_SIZE, condref);

3366

skipbytes = 3;

3367

}

3368

/* For conditions that are assertions, we just fall through, having

3369

set bravalue above. */

3370

break;

3371

3372

case '=': /* Positive lookahead */

3373

bravalue = OP_ASSERT;

3374

ptr++;

3375

break;

3376

3377

case '!': /* Negative lookahead */

3378

bravalue = OP_ASSERT_NOT;

3379

ptr++;

3380

break;

3381

3382

case '<': /* Lookbehinds */

3383

switch (*(++ptr))

3384

{

3385

case '=': /* Positive lookbehind */

3386

bravalue = OP_ASSERTBACK;

3387

ptr++;

3388

break;

3389

3390

case '!': /* Negative lookbehind */

3391

bravalue = OP_ASSERTBACK_NOT;

3392

ptr++;

3393

break;

3394

}

3395

break;

3396

3397

case '>': /* One-time brackets */

3398

bravalue = OP_ONCE;

3399

ptr++;

3400

break;

3401

3402

case 'C': /* Callout - may be followed by digits; */

3403

previous_callout = code; /* Save for later completion */

3404

after_manual_callout = 1; /* Skip one item before completing */

3405

*code++ = OP_CALLOUT; /* Already checked that the terminating */

3406

{ /* closing parenthesis is present. */

3407

int n = 0;

3408

while ((digitab[*(++ptr)] & ctype_digit) != 0)

3409

n = n * 10 + *ptr - '0';

3410

if (n > 255)

3411

{

3412

*errorptr = ERR38;

3413

goto FAILED;

3414

}

3415

*code++ = n;

3416

PUT(code, 0, ptr - cd->start_pattern + 1); /* Pattern offset */

3417

PUT(code, LINK_SIZE, 0); /* Default length */

3418

code += 2 * LINK_SIZE;

3419

}

3420

previous = NULL;

3421

continue;

3422

3423

case 'P': /* Named subpattern handling */

3424

if (*(++ptr) == '<') /* Definition */

3425

{

3426

int i, namelen;

3427

uschar *slot = cd->name_table;

3428

const uschar *name; /* Don't amalgamate; some compilers */

3429

name = ++ptr; /* grumble at autoincrement in declaration */

3430

3431

while (*ptr++ != '>');

3432

namelen = ptr - name - 1;

3433

3434

for (i = 0; i < cd->names_found; i++)

3435

{

3436

int crc = memcmp(name, slot+2, namelen);

3437

if (crc == 0)

3438

{

3439

if (slot[2+namelen] == 0)

3440

{

3441

*errorptr = ERR43;

3442

goto FAILED;

3443

}

3444

crc = -1; /* Current name is substring */

3445

}

3446

if (crc < 0)

3447

{

3448

memmove(slot + cd->name_entry_size, slot,

3449

(cd->names_found - i) * cd->name_entry_size);

3450

break;

3451

}

3452

slot += cd->name_entry_size;

3453

}

3454

3455

PUT2(slot, 0, *brackets + 1);

3456

memcpy(slot + 2, name, namelen);

3457

slot[2+namelen] = 0;

3458

cd->names_found++;

3459

goto NUMBERED_GROUP;

3460

}

3461

3462

if (*ptr == '=' || *ptr == '>') /* Reference or recursion */

3463

{

3464

int i, namelen;

3465

int type = *ptr++;

3466

const uschar *name = ptr;

3467

uschar *slot = cd->name_table;

3468

3469

while (*ptr != ')') ptr++;

3470

namelen = ptr - name;

3471

3472

for (i = 0; i < cd->names_found; i++)

3473

{

3474

if (strncmp((char *)name, (char *)slot+2, namelen) == 0) break;

3475

slot += cd->name_entry_size;

3476

}

3477

if (i >= cd->names_found)

3478

{

3479

*errorptr = ERR15;

3480

goto FAILED;

3481

}

3482

3483

recno = GET2(slot, 0);

3484

3485

if (type == '>') goto HANDLE_RECURSION; /* A few lines below */

3486

3487

/* Back reference */

3488

3489

previous = code;

3490

*code++ = OP_REF;

3491

PUT2INC(code, 0, recno);

3492

cd->backref_map |= (recno < 32)? (1 << recno) : 1;

3493

if (recno > cd->top_backref) cd->top_backref = recno;

3494

continue;

3495

}

3496

3497

/* Should never happen */

3498

break;

3499

3500

case 'R': /* Pattern recursion */

3501

ptr++; /* Same as (?0) */

3502

/* Fall through */

3503

3504

/* Recursion or "subroutine" call */

3505

3506

case '0': case '1': case '2': case '3': case '4':

3507

case '5': case '6': case '7': case '8': case '9':

3508

{

3509

const uschar *called;

3510

recno = 0;

3511

while((digitab[*ptr] & ctype_digit) != 0)

3512

recno = recno * 10 + *ptr++ - '0';

3513

3514

/* Come here from code above that handles a named recursion */

3515

3516

HANDLE_RECURSION:

3517

3518

previous = code;

3519

3520

/* Find the bracket that is being referenced. Temporarily end the

3521

regex in case it doesn't exist. */

3522

3523

*code = OP_END;

3524

called = (recno == 0)?

3525

cd->start_code : find_bracket(cd->start_code, utf8, recno);

3526

3527

if (called == NULL)

3528

{

3529

*errorptr = ERR15;

3530

goto FAILED;

3531

}

3532

3533

/* If the subpattern is still open, this is a recursive call. We

3534

check to see if this is a left recursion that could loop for ever,

3535

and diagnose that case. */

3536

3537

if (GET(called, 1) == 0 && could_be_empty(called, code, bcptr, utf8))

3538

{

3539

*errorptr = ERR40;

3540

goto FAILED;

3541

}

3542

3543

/* Insert the recursion/subroutine item */

3544

3545

*code = OP_RECURSE;

3546

PUT(code, 1, called - cd->start_code);

3547

code += 1 + LINK_SIZE;

3548

}

3549

continue;

3550

3551

/* Character after (? not specially recognized */

3552

3553

default: /* Option setting */

3554

set = unset = 0;

3555

optset = &set;

3556

3557

while (*ptr != ')' && *ptr != ':')

3558

{

3559

switch (*ptr++)

3560

{

3561

case '-': optset = &unset; break;

3562

3563

case 'i': *optset |= PCRE_CASELESS; break;

3564

case 'm': *optset |= PCRE_MULTILINE; break;

3565

case 's': *optset |= PCRE_DOTALL; break;

3566

case 'x': *optset |= PCRE_EXTENDED; break;

3567

case 'U': *optset |= PCRE_UNGREEDY; break;

3568

case 'X': *optset |= PCRE_EXTRA; break;

3569

}

3570

}

3571

3572

/* Set up the changed option bits, but don't change anything yet. */

3573

3574

newoptions = (options | set) & (~unset);

3575

3576

/* If the options ended with ')' this is not the start of a nested

3577

group with option changes, so the options change at this level. Compile

3578

code to change the ims options if this setting actually changes any of

3579

them. We also pass the new setting back so that it can be put at the

3580

start of any following branches, and when this group ends (if we are in

3581

a group), a resetting item can be compiled.

3582

3583

Note that if this item is right at the start of the pattern, the

3584

options will have been abstracted and made global, so there will be no

3585

change to compile. */

3586

3587

if (*ptr == ')')

3588

{

3589

if ((options & PCRE_IMS) != (newoptions & PCRE_IMS))

3590

{

3591

*code++ = OP_OPT;

3592

*code++ = newoptions & PCRE_IMS;

3593

}

3594

3595

/* Change options at this level, and pass them back for use

3596

in subsequent branches. Reset the greedy defaults and the case

3597

value for firstbyte and reqbyte. */

3598

3599

*optionsptr = options = newoptions;

3600

greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);

3601

greedy_non_default = greedy_default ^ 1;

3602

req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;

3603

3604

previous = NULL; /* This item can't be repeated */

3605

continue; /* It is complete */

3606

}

3607

3608

/* If the options ended with ':' we are heading into a nested group

3609

with possible change of options. Such groups are non-capturing and are

3610

not assertions of any kind. All we need to do is skip over the ':';

3611

the newoptions value is handled below. */

3612

3613

bravalue = OP_BRA;

3614

ptr++;

3615

}

3616

}

3617

3618

/* If PCRE_NO_AUTO_CAPTURE is set, all unadorned brackets become

3619

non-capturing and behave like (?:...) brackets */

3620

3621

else if ((options & PCRE_NO_AUTO_CAPTURE) != 0)

3622

{

3623

bravalue = OP_BRA;

3624

}

3625

3626

/* Else we have a referencing group; adjust the opcode. If the bracket

3627

number is greater than EXTRACT_BASIC_MAX, we set the opcode one higher, and

3628

arrange for the true number to follow later, in an OP_BRANUMBER item. */

3629

3630

else

3631

{

3632

NUMBERED_GROUP:

3633

if (++(*brackets) > EXTRACT_BASIC_MAX)

3634

{

3635

bravalue = OP_BRA + EXTRACT_BASIC_MAX + 1;

3636

code[1+LINK_SIZE] = OP_BRANUMBER;

3637

PUT2(code, 2+LINK_SIZE, *brackets);

3638

skipbytes = 3;

3639

}

3640

else bravalue = OP_BRA + *brackets;

3641

}

3642

3643

/* Process nested bracketed re. Assertions may not be repeated, but other

3644

kinds can be. We copy code into a non-register variable in order to be able

3645

to pass its address because some compilers complain otherwise. Pass in a

3646

new setting for the ims options if they have changed. */

3647

3648

previous = (bravalue >= OP_ONCE)? code : NULL;

3649

*code = bravalue;

3650

tempcode = code;

3651

tempreqvary = cd->req_varyopt; /* Save value before bracket */

3652

3653

if (!compile_regex(

3654

newoptions, /* The complete new option state */

3655

options & PCRE_IMS, /* The previous ims option state */

3656

brackets, /* Extracting bracket count */

3657

&tempcode, /* Where to put code (updated) */

3658

&ptr, /* Input pointer (updated) */

3659

errorptr, /* Where to put an error message */

3660

(bravalue == OP_ASSERTBACK ||

3661

bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */

3662

skipbytes, /* Skip over OP_COND/OP_BRANUMBER */

3663

&subfirstbyte, /* For possible first char */

3664

&subreqbyte, /* For possible last char */

3665

bcptr, /* Current branch chain */

3666

cd)) /* Tables block */

3667

goto FAILED;

3668

3669

/* At the end of compiling, code is still pointing to the start of the

3670

group, while tempcode has been updated to point past the end of the group

3671

and any option resetting that may follow it. The pattern pointer (ptr)

3672

is on the bracket. */

3673

3674

/* If this is a conditional bracket, check that there are no more than

3675

two branches in the group. */

3676

3677

else if (bravalue == OP_COND)

3678

{

3679

uschar *tc = code;

3680

condcount = 0;

3681

3682

do {

3683

condcount++;

3684

tc += GET(tc,1);

3685

}

3686

while (*tc != OP_KET);

3687

3688

if (condcount > 2)

3689

{

3690

*errorptr = ERR27;

3691

goto FAILED;

3692

}

3693

3694

/* If there is just one branch, we must not make use of its firstbyte or

3695

reqbyte, because this is equivalent to an empty second branch. */

3696

3697

if (condcount == 1) subfirstbyte = subreqbyte = REQ_NONE;

3698

}

3699

3700

/* Handle updating of the required and first characters. Update for normal

3701

brackets of all kinds, and conditions with two branches (see code above).

3702

If the bracket is followed by a quantifier with zero repeat, we have to

3703

back off. Hence the definition of zeroreqbyte and zerofirstbyte outside the

3704

main loop so that they can be accessed for the back off. */

3705

3706

zeroreqbyte = reqbyte;

3707

zerofirstbyte = firstbyte;

3708

groupsetfirstbyte = FALSE;

3709

3710

if (bravalue >= OP_BRA || bravalue == OP_ONCE || bravalue == OP_COND)

3711

{

3712

/* If we have not yet set a firstbyte in this branch, take it from the

3713

subpattern, remembering that it was set here so that a repeat of more

3714

than one can replicate it as reqbyte if necessary. If the subpattern has

3715

no firstbyte, set "none" for the whole branch. In both cases, a zero

3716

repeat forces firstbyte to "none". */

3717

3718

if (firstbyte == REQ_UNSET)

3719

{

3720

if (subfirstbyte >= 0)

3721

{

3722

firstbyte = subfirstbyte;

3723

groupsetfirstbyte = TRUE;

3724

}

3725

else firstbyte = REQ_NONE;

3726

zerofirstbyte = REQ_NONE;

3727

}

3728

3729

/* If firstbyte was previously set, convert the subpattern's firstbyte

3730

into reqbyte if there wasn't one, using the vary flag that was in

3731

existence beforehand. */

3732

3733

else if (subfirstbyte >= 0 && subreqbyte < 0)

3734

subreqbyte = subfirstbyte | tempreqvary;

3735

3736

/* If the subpattern set a required byte (or set a first byte that isn't

3737

really the first byte - see above), set it. */

3738

3739

if (subreqbyte >= 0) reqbyte = subreqbyte;

3740

}

3741

3742

/* For a forward assertion, we take the reqbyte, if set. This can be

3743

helpful if the pattern that follows the assertion doesn't set a different

3744

char. For example, it's useful for /(?=abcde).+/. We can't set firstbyte

3745

for an assertion, however because it leads to incorrect effect for patterns

3746

such as /(?=a)a.+/ when the "real" "a" would then become a reqbyte instead

3747

of a firstbyte. This is overcome by a scan at the end if there's no

3748

firstbyte, looking for an asserted first char. */

3749

3750

else if (bravalue == OP_ASSERT && subreqbyte >= 0) reqbyte = subreqbyte;

3751

3752

/* Now update the main code pointer to the end of the group. */

3753

3754

code = tempcode;

3755

3756

/* Error if hit end of pattern */

3757

3758

if (*ptr != ')')

3759

{

3760

*errorptr = ERR14;

3761

goto FAILED;

3762

}

3763

break;

3764

3765

/* Check \ for being a real metacharacter; if not, fall through and handle

3766

it as a data character at the start of a string. Escape items are checked

3767

for validity in the pre-compiling pass. */

3768

3769

case '\\':

3770

tempptr = ptr;

3771

c = check_escape(&ptr, errorptr, *brackets, options, FALSE);

3772

3773

/* Handle metacharacters introduced by \. For ones like \d, the ESC_ values

3774

are arranged to be the negation of the corresponding OP_values. For the

3775

back references, the values are ESC_REF plus the reference number. Only

3776

back references and those types that consume a character may be repeated.

3777

We can test for values between ESC_b and ESC_Z for the latter; this may

3778

have to change if any new ones are ever created. */

3779

3780

if (c < 0)

3781

{

3782

if (-c == ESC_Q) /* Handle start of quoted string */

3783

{

3784

if (ptr[1] == '\\' && ptr[2] == 'E') ptr += 2; /* avoid empty string */

3785

else inescq = TRUE;

3786

continue;

3787

}

3788

3789

/* For metasequences that actually match a character, we disable the

3790

setting of a first character if it hasn't already been set. */

3791

3792

if (firstbyte == REQ_UNSET && -c > ESC_b && -c < ESC_Z)

3793

firstbyte = REQ_NONE;

3794

3795

/* Set values to reset to if this is followed by a zero repeat. */

3796

3797

zerofirstbyte = firstbyte;

3798

zeroreqbyte = reqbyte;

3799

3800

/* Back references are handled specially */

3801

3802

if (-c >= ESC_REF)

3803

{

3804

int number = -c - ESC_REF;

3805

previous = code;

3806

*code++ = OP_REF;

3807

PUT2INC(code, 0, number);

3808

}

3809

3810

/* So are Unicode property matches, if supported. We know that get_ucp

3811

won't fail because it was tested in the pre-pass. */

3812

3813

#ifdef SUPPORT_UCP

3814

else if (-c == ESC_P || -c == ESC_p)

3815

{

3816

BOOL negated;

3817

int value = get_ucp(&ptr, &negated, errorptr);

3818

previous = code;

3819

*code++ = ((-c == ESC_p) != negated)? OP_PROP : OP_NOTPROP;

3820

*code++ = value;

3821

}

3822

#endif

3823

3824

/* For the rest, we can obtain the OP value by negating the escape

3825

value */

3826

3827

else

3828

{

3829

previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;

3830

*code++ = -c;

3831

}

3832

continue;

3833

}

3834

3835

/* We have a data character whose value is in c. In UTF-8 mode it may have

3836

a value > 127. We set its representation in the length/buffer, and then

3837

handle it as a data character. */

3838

3839

#ifdef SUPPORT_UTF8

3840

if (utf8 && c > 127)

3841

mclength = ord2utf8(c, mcbuffer);

3842

else

3843

#endif

3844

3845

{

3846

mcbuffer[0] = c;

3847

mclength = 1;

3848

}

3849

3850

goto ONE_CHAR;

3851

3852

/* Handle a literal character. It is guaranteed not to be whitespace or #

3853

when the extended flag is set. If we are in UTF-8 mode, it may be a

3854

multi-byte literal character. */

3855

3856

default:

3857

NORMAL_CHAR:

3858

mclength = 1;

3859

mcbuffer[0] = c;

3860

3861

#ifdef SUPPORT_UTF8

3862

if (utf8 && (c & 0xc0) == 0xc0)

3863

{

3864

while ((ptr[1] & 0xc0) == 0x80)

3865

mcbuffer[mclength++] = *(++ptr);

3866

}

3867

#endif

3868

3869

/* At this point we have the character's bytes in mcbuffer, and the length

3870

in mclength. When not in UTF-8 mode, the length is always 1. */

3871

3872

ONE_CHAR:

3873

previous = code;

3874

*code++ = ((options & PCRE_CASELESS) != 0)? OP_CHARNC : OP_CHAR;

3875

for (c = 0; c < mclength; c++) *code++ = mcbuffer[c];

3876

3877

/* Set the first and required bytes appropriately. If no previous first

3878

byte, set it from this character, but revert to none on a zero repeat.

3879

Otherwise, leave the firstbyte value alone, and don't change it on a zero

3880

repeat. */

3881

3882

if (firstbyte == REQ_UNSET)

3883

{

3884

zerofirstbyte = REQ_NONE;

3885

zeroreqbyte = reqbyte;

3886

3887

/* If the character is more than one byte long, we can set firstbyte

3888

only if it is not to be matched caselessly. */

3889

3890

if (mclength == 1 || req_caseopt == 0)

3891

{

3892

firstbyte = mcbuffer[0] | req_caseopt;

3893

if (mclength != 1) reqbyte = code[-1] | cd->req_varyopt;

3894

}

3895

else firstbyte = reqbyte = REQ_NONE;

3896

}

3897

3898

/* firstbyte was previously set; we can set reqbyte only the length is

3899

1 or the matching is caseful. */

3900

3901

else

3902

{

3903

zerofirstbyte = firstbyte;

3904

zeroreqbyte = reqbyte;

3905

if (mclength == 1 || req_caseopt == 0)

3906

reqbyte = code[-1] | req_caseopt | cd->req_varyopt;

3907

}

3908

3909

break; /* End of literal character handling */

3910

}

3911

} /* end of big loop */

3912

3913

/* Control never reaches here by falling through, only by a goto for all the

3914

error states. Pass back the position in the pattern so that it can be displayed

3915

to the user for diagnosing the error. */

3916

3917

FAILED:

3918

*ptrptr = ptr;

3919

return FALSE;

3920

}

3921

3922

3923

3924

3925

/*************************************************

3926

* Compile sequence of alternatives *

3927

*************************************************/

3928

3929

/* On entry, ptr is pointing past the bracket character, but on return

3930

it points to the closing bracket, or vertical bar, or end of string.

3931

The code variable is pointing at the byte into which the BRA operator has been

3932

stored. If the ims options are changed at the start (for a (?ims: group) or

3933

during any branch, we need to insert an OP_OPT item at the start of every

3934

following branch to ensure they get set correctly at run time, and also pass

3935

the new options into every subsequent branch compile.

3936

3937

Argument:

3938

options option bits, including any changes for this subpattern

3939

oldims previous settings of ims option bits

3940

brackets -> int containing the number of extracting brackets used

3941

codeptr -> the address of the current code pointer

3942

ptrptr -> the address of the current pattern pointer

3943

errorptr -> pointer to error message

3944

lookbehind TRUE if this is a lookbehind assertion

3945

skipbytes skip this many bytes at start (for OP_COND, OP_BRANUMBER)

3946

firstbyteptr place to put the first required character, or a negative number

3947

reqbyteptr place to put the last required character, or a negative number

3948

bcptr pointer to the chain of currently open branches

3949

cd points to the data block with tables pointers etc.

3950

3951

Returns: TRUE on success

3952

3953

3954

static BOOL

3955

compile_regex(int options, int oldims, int *brackets, uschar **codeptr,

3956

const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int skipbytes,

3957

int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr, compile_data *cd)

3958

{

3959

const uschar *ptr = *ptrptr;

3960

uschar *code = *codeptr;

3961

uschar *last_branch = code;

3962

uschar *start_bracket = code;

3963

uschar *reverse_count = NULL;

3964

int firstbyte, reqbyte;

3965

int branchfirstbyte, branchreqbyte;

3966

branch_chain bc;

3967

3968

bc.outer = bcptr;

3969

bc.current = code;

3970

3971

firstbyte = reqbyte = REQ_UNSET;

3972

3973

/* Offset is set zero to mark that this bracket is still open */

3974

3975

PUT(code, 1, 0);

3976

code += 1 + LINK_SIZE + skipbytes;

3977

3978

/* Loop for each alternative branch */

3979

3980

for (;;)

3981

{

3982

/* Handle a change of ims options at the start of the branch */

3983

3984

if ((options & PCRE_IMS) != oldims)

3985

{

3986

*code++ = OP_OPT;

3987

*code++ = options & PCRE_IMS;

3988

}

3989

3990

/* Set up dummy OP_REVERSE if lookbehind assertion */

3991

3992

if (lookbehind)

3993

{

3994

*code++ = OP_REVERSE;

3995

reverse_count = code;

3996

PUTINC(code, 0, 0);

3997

}

3998

3999

/* Now compile the branch */

4000

4001

if (!compile_branch(&options, brackets, &code, &ptr, errorptr,

4002

&branchfirstbyte, &branchreqbyte, &bc, cd))

4003

{

4004

*ptrptr = ptr;

4005

return FALSE;

4006

}

4007

4008

/* If this is the first branch, the firstbyte and reqbyte values for the

4009

branch become the values for the regex. */

4010

4011

if (*last_branch != OP_ALT)

4012

{

4013

firstbyte = branchfirstbyte;

4014

reqbyte = branchreqbyte;

4015

}

4016

4017

/* If this is not the first branch, the first char and reqbyte have to

4018

match the values from all the previous branches, except that if the previous

4019

value for reqbyte didn't have REQ_VARY set, it can still match, and we set

4020

REQ_VARY for the regex. */

4021

4022

else

4023

{

4024

/* If we previously had a firstbyte, but it doesn't match the new branch,

4025

we have to abandon the firstbyte for the regex, but if there was previously

4026

no reqbyte, it takes on the value of the old firstbyte. */

4027

4028

if (firstbyte >= 0 && firstbyte != branchfirstbyte)

4029

{

4030

if (reqbyte < 0) reqbyte = firstbyte;

4031

firstbyte = REQ_NONE;

4032

}

4033

4034

/* If we (now or from before) have no firstbyte, a firstbyte from the

4035

branch becomes a reqbyte if there isn't a branch reqbyte. */

4036

4037

if (firstbyte < 0 && branchfirstbyte >= 0 && branchreqbyte < 0)

4038

branchreqbyte = branchfirstbyte;

4039

4040

/* Now ensure that the reqbytes match */

4041

4042

if ((reqbyte & ~REQ_VARY) != (branchreqbyte & ~REQ_VARY))

4043

reqbyte = REQ_NONE;

4044

else reqbyte |= branchreqbyte; /* To "or" REQ_VARY */

4045

}

4046

4047

/* If lookbehind, check that this branch matches a fixed-length string,

4048

and put the length into the OP_REVERSE item. Temporarily mark the end of

4049

the branch with OP_END. */

4050

4051

if (lookbehind)

4052

{

4053

int length;

4054

*code = OP_END;

4055

length = find_fixedlength(last_branch, options);

4056

DPRINTF(("fixed length = %d\n", length));

4057

if (length < 0)

4058

{

4059

*errorptr = (length == -2)? ERR36 : ERR25;

4060

*ptrptr = ptr;

4061

return FALSE;

4062

}

4063

PUT(reverse_count, 0, length);

4064

}

4065

4066

/* Reached end of expression, either ')' or end of pattern. Go back through

4067

the alternative branches and reverse the chain of offsets, with the field in

4068

the BRA item now becoming an offset to the first alternative. If there are

4069

no alternatives, it points to the end of the group. The length in the

4070

terminating ket is always the length of the whole bracketed item. If any of

4071

the ims options were changed inside the group, compile a resetting op-code

4072

following, except at the very end of the pattern. Return leaving the pointer

4073

at the terminating char. */

4074

4075

if (*ptr != '|')

4076

{

4077

int length = code - last_branch;

4078

4079

{

4080

int prev_length = GET(last_branch, 1);

4081

PUT(last_branch, 1, length);

4082

length = prev_length;

4083

last_branch -= length;

4084

}

4085

while (length > 0);

4086

4087

/* Fill in the ket */

4088

4089

*code = OP_KET;

4090

PUT(code, 1, code - start_bracket);

4091

code += 1 + LINK_SIZE;

4092

4093

/* Resetting option if needed */

4094

4095

if ((options & PCRE_IMS) != oldims && *ptr == ')')

4096

{

4097

*code++ = OP_OPT;

4098

*code++ = oldims;

4099

}

4100

4101

/* Set values to pass back */

4102

4103

*codeptr = code;

4104

*ptrptr = ptr;

4105

*firstbyteptr = firstbyte;

4106

*reqbyteptr = reqbyte;

4107

return TRUE;

4108

}

4109

4110

/* Another branch follows; insert an "or" node. Its length field points back

4111

to the previous branch while the bracket remains open. At the end the chain

4112

is reversed. It's done like this so that the start of the bracket has a

4113

zero offset until it is closed, making it possible to detect recursion. */

4114

4115

*code = OP_ALT;

4116

PUT(code, 1, code - last_branch);

4117

bc.current = last_branch = code;

4118

code += 1 + LINK_SIZE;

4119

ptr++;

4120

}

4121

/* Control never reaches here */

4122

}

4123

4124

4125

4126

4127

/*************************************************

4128

* Check for anchored expression *

4129

*************************************************/

4130

4131

/* Try to find out if this is an anchored regular expression. Consider each

4132

alternative branch. If they all start with OP_SOD or OP_CIRC, or with a bracket

4133

all of whose alternatives start with OP_SOD or OP_CIRC (recurse ad lib), then

4134

it's anchored. However, if this is a multiline pattern, then only OP_SOD

4135

counts, since OP_CIRC can match in the middle.

4136

4137

We can also consider a regex to be anchored if OP_SOM starts all its branches.

4138

This is the code for \G, which means "match at start of match position, taking

4139

into account the match offset".

4140

4141

A branch is also implicitly anchored if it starts with .* and DOTALL is set,

4142

because that will try the rest of the pattern at all possible matching points,

4143

so there is no point trying again.... er ....

4144

4145

.... except when the .* appears inside capturing parentheses, and there is a

4146

subsequent back reference to those parentheses. We haven't enough information

4147

to catch that case precisely.

4148

4149

At first, the best we could do was to detect when .* was in capturing brackets

4150

and the highest back reference was greater than or equal to that level.

4151

However, by keeping a bitmap of the first 31 back references, we can catch some

4152

of the more common cases more precisely.

4153

4154

Arguments:

4155

code points to start of expression (the bracket)

4156

options points to the options setting

4157

bracket_map a bitmap of which brackets we are inside while testing; this

4158

handles up to substring 31; after that we just have to take

4159

the less precise approach

4160

backref_map the back reference bitmap

4161

4162

Returns: TRUE or FALSE

4163

4164

4165

static BOOL

4166

is_anchored(register const uschar *code, int *options, unsigned int bracket_map,

4167

unsigned int backref_map)

4168

{

4169

do {

4170

const uschar *scode =

4171

first_significant_code(code + 1+LINK_SIZE, options, PCRE_MULTILINE, FALSE);

4172

4173

4174

/* Capturing brackets */

4175

4176

if (op > OP_BRA)

4177

{

4178

int new_map;

4179

op -= OP_BRA;

4180

if (op > EXTRACT_BASIC_MAX) op = GET2(scode, 2+LINK_SIZE);

4181

new_map = bracket_map | ((op < 32)? (1 << op) : 1);

4182

if (!is_anchored(scode, options, new_map, backref_map)) return FALSE;

4183

}

4184

4185

/* Other brackets */

4186

4187

else if (op == OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)

4188

{

4189

if (!is_anchored(scode, options, bracket_map, backref_map)) return FALSE;

4190

}

4191

4192

/* .* is not anchored unless DOTALL is set and it isn't in brackets that

4193

are or may be referenced. */

4194

4195

else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR) &&

4196

(*options & PCRE_DOTALL) != 0)

4197

{

4198

if (scode[1] != OP_ANY || (bracket_map & backref_map) != 0) return FALSE;

4199

}

4200

4201

/* Check for explicit anchoring */

4202

4203

else if (op != OP_SOD && op != OP_SOM &&

4204

((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC))

4205

return FALSE;

4206

code += GET(code, 1);

4207

}

4208

while (*code == OP_ALT); /* Loop for each alternative */

4209

return TRUE;

4210

}

4211

4212

4213

4214

/*************************************************

4215

* Check for starting with ^ or .* *

4216

*************************************************/

4217

4218

/* This is called to find out if every branch starts with ^ or .* so that

4219

"first char" processing can be done to speed things up in multiline

4220

matching and for non-DOTALL patterns that start with .* (which must start at

4221

the beginning or after \n). As in the case of is_anchored() (see above), we

4222

have to take account of back references to capturing brackets that contain .*

4223

because in that case we can't make the assumption.

4224

4225

Arguments:

4226

code points to start of expression (the bracket)

4227

bracket_map a bitmap of which brackets we are inside while testing; this

4228

handles up to substring 31; after that we just have to take

4229

the less precise approach

4230

backref_map the back reference bitmap

4231

4232

Returns: TRUE or FALSE

4233

4234

4235

static BOOL

4236

is_startline(const uschar *code, unsigned int bracket_map,

4237

unsigned int backref_map)

4238

{

4239

do {

4240

const uschar *scode = first_significant_code(code + 1+LINK_SIZE, NULL, 0,

4241

FALSE);

4242

4243

4244

/* Capturing brackets */

4245

4246

if (op > OP_BRA)

4247

{

4248

int new_map;

4249

op -= OP_BRA;

4250

if (op > EXTRACT_BASIC_MAX) op = GET2(scode, 2+LINK_SIZE);

4251

new_map = bracket_map | ((op < 32)? (1 << op) : 1);

4252

if (!is_startline(scode, new_map, backref_map)) return FALSE;

4253

}

4254

4255

/* Other brackets */

4256

4257

else if (op == OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)

4258

{ if (!is_startline(scode, bracket_map, backref_map)) return FALSE; }

4259

4260

/* .* means "start at start or after \n" if it isn't in brackets that

4261

may be referenced. */

4262

4263

else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)

4264

{

4265

if (scode[1] != OP_ANY || (bracket_map & backref_map) != 0) return FALSE;

4266

}

4267

4268

/* Check for explicit circumflex */

4269

4270

else if (op != OP_CIRC) return FALSE;

4271

4272

/* Move on to the next alternative */

4273

4274

code += GET(code, 1);

4275

}

4276

while (*code == OP_ALT); /* Loop for each alternative */

4277

return TRUE;

4278

}

4279

4280

4281

4282

/*************************************************

4283

* Check for asserted fixed first char *

4284

*************************************************/

4285

4286

/* During compilation, the "first char" settings from forward assertions are

4287

discarded, because they can cause conflicts with actual literals that follow.

4288

However, if we end up without a first char setting for an unanchored pattern,

4289

it is worth scanning the regex to see if there is an initial asserted first

4290

char. If all branches start with the same asserted char, or with a bracket all

4291

of whose alternatives start with the same asserted char (recurse ad lib), then

4292

we return that char, otherwise -1.

4293

4294

Arguments:

4295

code points to start of expression (the bracket)

4296

options pointer to the options (used to check casing changes)

4297

inassert TRUE if in an assertion

4298

4299

Returns: -1 or the fixed first char

4300

4301

4302

static int

4303

find_firstassertedchar(const uschar *code, int *options, BOOL inassert)

4304

{

4305

4306

do {

4307

int d;

4308

const uschar *scode =

4309

first_significant_code(code + 1+LINK_SIZE, options, PCRE_CASELESS, TRUE);

4310

4311

4312

if (op >= OP_BRA) op = OP_BRA;

4313

4314

switch(op)

4315

{

4316

default:

4317

return -1;

4318

4319

case OP_BRA:

4320

case OP_ASSERT:

4321

case OP_ONCE:

4322

case OP_COND:

4323

if ((d = find_firstassertedchar(scode, options, op == OP_ASSERT)) < 0)

4324

return -1;

4325

if (c < 0) c = d; else if (c != d) return -1;

4326

break;

4327

4328

case OP_EXACT: /* Fall through */

4329

scode += 2;

4330

4331

case OP_CHAR:

4332

case OP_CHARNC:

4333

case OP_PLUS:

4334

case OP_MINPLUS:

4335

if (!inassert) return -1;

4336

if (c < 0)

4337

{

4338

c = scode[1];

4339

if ((*options & PCRE_CASELESS) != 0) c |= REQ_CASELESS;

4340

}

4341

else if (c != scode[1]) return -1;

4342

break;

4343

}

4344

4345

code += GET(code, 1);

4346

}

4347

while (*code == OP_ALT);

4348

return c;

4349

}

4350

4351

4352

4353

4354

#ifdef SUPPORT_UTF8

4355

/*************************************************

4356

* Validate a UTF-8 string *

4357

*************************************************/

4358

4359

/* This function is called (optionally) at the start of compile or match, to

4360

validate that a supposed UTF-8 string is actually valid. The early check means

4361

that subsequent code can assume it is dealing with a valid string. The check

4362

can be turned off for maximum performance, but then consequences of supplying

4363

an invalid string are then undefined.

4364

4365

Arguments:

4366

string points to the string

4367

length length of string, or -1 if the string is zero-terminated

4368

4369

Returns: < 0 if the string is a valid UTF-8 string

4370

>= 0 otherwise; the value is the offset of the bad byte

4371

4372

4373

static int

4374

valid_utf8(const uschar *string, int length)

4375

{

4376

4377

4378

if (length < 0)

4379

{

4380

for (p = string; *p != 0; p++);

4381

length = p - string;

4382

}

4383

4384

for (p = string; length-- > 0; p++)

4385

{

4386

4387

4388

if (c < 128) continue;

4389

if ((c & 0xc0) != 0xc0) return p - string;

4390

ab = utf8_table4[c & 0x3f]; /* Number of additional bytes */

4391

if (length < ab) return p - string;

4392

length -= ab;

4393

4394

/* Check top bits in the second byte */

4395

if ((*(++p) & 0xc0) != 0x80) return p - string;

4396

4397

/* Check for overlong sequences for each different length */

4398

switch (ab)

4399

{

4400

/* Check for xx00 000x */

4401

case 1:

4402

if ((c & 0x3e) == 0) return p - string;

4403

continue; /* We know there aren't any more bytes to check */

4404

4405

/* Check for 1110 0000, xx0x xxxx */

4406

case 2:

4407

if (c == 0xe0 && (*p & 0x20) == 0) return p - string;

4408

break;

4409

4410

/* Check for 1111 0000, xx00 xxxx */

4411

case 3:

4412

if (c == 0xf0 && (*p & 0x30) == 0) return p - string;

4413

break;

4414

4415

/* Check for 1111 1000, xx00 0xxx */

4416

case 4:

4417

if (c == 0xf8 && (*p & 0x38) == 0) return p - string;

4418

break;

4419

4420

/* Check for leading 0xfe or 0xff, and then for 1111 1100, xx00 00xx */

4421

case 5:

4422

if (c == 0xfe || c == 0xff ||

4423

(c == 0xfc && (*p & 0x3c) == 0)) return p - string;

4424

break;

4425

}

4426

4427

/* Check for valid bytes after the 2nd, if any; all must start 10 */

4428

while (--ab > 0)

4429

{

4430

if ((*(++p) & 0xc0) != 0x80) return p - string;

4431

}

4432

}

4433

4434

return -1;

4435

}

4436

#endif

4437

4438

4439

4440

/*************************************************

4441

* Compile a Regular Expression *

4442

*************************************************/

4443

4444

/* This function takes a string and returns a pointer to a block of store

4445

holding a compiled version of the expression.

4446

4447

Arguments:

4448

pattern the regular expression

4449

options various option bits

4450

errorptr pointer to pointer to error text

4451

erroroffset ptr offset in pattern where error was detected

4452

tables pointer to character tables or NULL

4453

4454

Returns: pointer to compiled data block, or NULL on error,

4455

with errorptr and erroroffset set

4456

4457

4458

EXPORT pcre *

4459

pcre_compile(const char *pattern, int options, const char **errorptr,

4460

int *erroroffset, const unsigned char *tables)

4461

{

4462

real_pcre *re;

4463

int length = 1 + LINK_SIZE; /* For initial BRA plus length */

4464

//int runlength;

4465

int c, firstbyte, reqbyte;

4466

int bracount = 0;

4467

int branch_extra = 0;

4468

int branch_newextra;

4469

int item_count = -1;

4470

int name_count = 0;

4471

int max_name_size = 0;

4472

int lastitemlength = 0;

4473

#ifdef SUPPORT_UTF8

4474

BOOL utf8;

4475

BOOL class_utf8;

4476

#endif

4477

BOOL inescq = FALSE;

4478

unsigned int brastackptr = 0;

4479

size_t size;

4480

uschar *code;

4481

const uschar *codestart;

4482

const uschar *ptr;

4483

compile_data compile_block;

4484

int brastack[BRASTACK_SIZE];

4485

uschar bralenstack[BRASTACK_SIZE];

4486

4487

/* We can't pass back an error message if errorptr is NULL; I guess the best we

4488

can do is just return NULL. */

4489

4490

if (errorptr == NULL) return NULL;

4491

*errorptr = NULL;

4492

4493

/* However, we can give a message for this error */

4494

4495

if (erroroffset == NULL)

4496

{

4497

*errorptr = ERR16;

4498

return NULL;

4499

}

4500

*erroroffset = 0;

4501

4502

/* Can't support UTF8 unless PCRE has been compiled to include the code. */

4503

4504

#ifdef SUPPORT_UTF8

4505

utf8 = (options & PCRE_UTF8) != 0;

4506

if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&

4507

(*erroroffset = valid_utf8((uschar *)pattern, -1)) >= 0)

4508

{

4509

*errorptr = ERR44;

4510

return NULL;

4511

}

4512

#else

4513

if ((options & PCRE_UTF8) != 0)

4514

{

4515

*errorptr = ERR32;

4516

return NULL;

4517

}

4518

#endif

4519

4520

if ((options & ~PUBLIC_OPTIONS) != 0)

4521

{

4522

*errorptr = ERR17;

4523

return NULL;

4524

}

4525

4526

/* Set up pointers to the individual character tables */

4527

4528

if (tables == NULL) tables = pcre_default_tables;

4529

compile_block.lcc = tables + lcc_offset;

4530

compile_block.fcc = tables + fcc_offset;

4531

compile_block.cbits = tables + cbits_offset;

4532

compile_block.ctypes = tables + ctypes_offset;

4533

4534

/* Maximum back reference and backref bitmap. This is updated for numeric

4535

references during the first pass, but for named references during the actual

4536

compile pass. The bitmap records up to 31 back references to help in deciding

4537

whether (.*) can be treated as anchored or not. */

4538

4539

compile_block.top_backref = 0;

4540

compile_block.backref_map = 0;

4541

4542

/* Reflect pattern for debugging output */

4543

4544

DPRINTF(("------------------------------------------------------------------\n"));

4545

DPRINTF(("%s\n", pattern));

4546

4547

/* The first thing to do is to make a pass over the pattern to compute the

4548

amount of store required to hold the compiled code. This does not have to be

4549

perfect as long as errors are overestimates. At the same time we can detect any

4550

flag settings right at the start, and extract them. Make an attempt to correct

4551

for any counted white space if an "extended" flag setting appears late in the

4552

pattern. We can't be so clever for #-comments. */

4553

4554

ptr = (const uschar *)(pattern - 1);

4555

while ((c = *(++ptr)) != 0)

4556

{

4557

int min, max;

4558

int class_optcount;

4559

int bracket_length;

4560

int duplength;

4561

4562

/* If we are inside a \Q...\E sequence, all chars are literal */

4563

4564

if (inescq)

4565

{

4566

if ((options & PCRE_AUTO_CALLOUT) != 0) length += 2 + 2*LINK_SIZE;

4567

goto NORMAL_CHAR;

4568

}

4569

4570

/* Otherwise, first check for ignored whitespace and comments */

4571

4572

if ((options & PCRE_EXTENDED) != 0)

4573

{

4574

if ((compile_block.ctypes[c] & ctype_space) != 0) continue;

4575

if (c == '#')

4576

{

4577

/* The space before the ; is to avoid a warning on a silly compiler

4578

on the Macintosh. */

4579

while ((c = *(++ptr)) != 0 && c != NEWLINE) ;

4580

if (c == 0) break;

4581

continue;

4582

}

4583

}

4584

4585

item_count++; /* Is zero for the first non-comment item */

4586

4587

/* Allow space for auto callout before every item except quantifiers. */

4588

4589

if ((options & PCRE_AUTO_CALLOUT) != 0 &&

4590

c != '*' && c != '+' && c != '?' &&

4591

(c != '{' || !is_counted_repeat(ptr + 1)))

4592

length += 2 + 2*LINK_SIZE;

4593

4594

switch(c)

4595

{

4596

/* A backslashed item may be an escaped data character or it may be a

4597

character type. */

4598

4599

case '\\':

4600

c = check_escape(&ptr, errorptr, bracount, options, FALSE);

4601

if (*errorptr != NULL) goto PCRE_ERROR_RETURN;

4602

4603

lastitemlength = 1; /* Default length of last item for repeats */

4604

4605

if (c >= 0) /* Data character */

4606

{

4607

length += 2; /* For a one-byte character */

4608

4609

#ifdef SUPPORT_UTF8

4610

if (utf8 && c > 127)

4611

{

4612

int i;

4613

for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)

4614

if (c <= utf8_table1[i]) break;

4615

length += i;

4616

lastitemlength += i;

4617

}

4618

#endif

4619

4620

continue;

4621

}

4622

4623

/* If \Q, enter "literal" mode */

4624

4625

if (-c == ESC_Q)

4626

{

4627

inescq = TRUE;

4628

continue;

4629

}

4630

4631

/* \X is supported only if Unicode property support is compiled */

4632

4633

#ifndef SUPPORT_UCP

4634

if (-c == ESC_X)

4635

{

4636

*errorptr = ERR45;

4637

goto PCRE_ERROR_RETURN;

4638

}

4639

#endif

4640

4641

/* \P and \p are for Unicode properties, but only when the support has

4642

been compiled. Each item needs 2 bytes. */

4643

4644

else if (-c == ESC_P || -c == ESC_p)

4645

{

4646

#ifdef SUPPORT_UCP

4647

BOOL negated;

4648

length += 2;

4649

lastitemlength = 2;

4650

if (get_ucp(&ptr, &negated, errorptr) < 0) goto PCRE_ERROR_RETURN;

4651

continue;

4652

#else

4653

*errorptr = ERR45;

4654

goto PCRE_ERROR_RETURN;

4655

#endif

4656

}

4657

4658

/* Other escapes need one byte */

4659

4660

length++;

4661

4662

/* A back reference needs an additional 2 bytes, plus either one or 5

4663

bytes for a repeat. We also need to keep the value of the highest

4664

back reference. */

4665

4666

if (c <= -ESC_REF)

4667

{

4668

int refnum = -c - ESC_REF;

4669

compile_block.backref_map |= (refnum < 32)? (1 << refnum) : 1;

4670

if (refnum > compile_block.top_backref)

4671

compile_block.top_backref = refnum;

4672

length += 2; /* For single back reference */

4673

if (ptr[1] == '{' && is_counted_repeat(ptr+2))

4674

{

4675

ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);

4676

if (*errorptr != NULL) goto PCRE_ERROR_RETURN;

4677

if ((min == 0 && (max == 1 || max == -1)) ||

4678

(min == 1 && max == -1))

4679

length++;

4680

else length += 5;

4681

if (ptr[1] == '?') ptr++;

4682

}

4683

}

4684

continue;

4685

4686

case '^': /* Single-byte metacharacters */

4687

case '.':

4688

case '$':

4689

length++;

4690

lastitemlength = 1;

4691

continue;

4692

4693

case '*': /* These repeats won't be after brackets; */

4694

case '+': /* those are handled separately */

4695

case '?':

4696

length++;

4697

goto POSESSIVE; /* A few lines below */

4698

4699

/* This covers the cases of braced repeats after a single char, metachar,

4700

class, or back reference. */

4701

4702

case '{':

4703

if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR;

4704

ptr = read_repeat_counts(ptr+1, &min, &max, errorptr);

4705

if (*errorptr != NULL) goto PCRE_ERROR_RETURN;

4706

4707

/* These special cases just insert one extra opcode */

4708

4709

if ((min == 0 && (max == 1 || max == -1)) ||

4710

(min == 1 && max == -1))

4711

length++;

4712

4713

/* These cases might insert additional copies of a preceding character. */

4714

4715

else

4716

{

4717

if (min != 1)

4718

{

4719

length -= lastitemlength; /* Uncount the original char or metachar */

4720

if (min > 0) length += 3 + lastitemlength;

4721

}

4722

length += lastitemlength + ((max > 0)? 3 : 1);

4723

}

4724

4725

if (ptr[1] == '?') ptr++; /* Needs no extra length */

4726

4727

POSESSIVE: /* Test for possessive quantifier */

4728

if (ptr[1] == '+')

4729

{

4730

ptr++;

4731

length += 2 + 2*LINK_SIZE; /* Allow for atomic brackets */

4732

}

4733

continue;

4734

4735

/* An alternation contains an offset to the next branch or ket. If any ims

4736

options changed in the previous branch(es), and/or if we are in a

4737

lookbehind assertion, extra space will be needed at the start of the

4738

branch. This is handled by branch_extra. */

4739

4740

case '|':

4741

length += 1 + LINK_SIZE + branch_extra;

4742

continue;

4743

4744

/* A character class uses 33 characters provided that all the character

4745

values are less than 256. Otherwise, it uses a bit map for low valued

4746

characters, and individual items for others. Don't worry about character

4747

types that aren't allowed in classes - they'll get picked up during the

4748

compile. A character class that contains only one single-byte character

4749

uses 2 or 3 bytes, depending on whether it is negated or not. Notice this

4750

where we can. (In UTF-8 mode we can do this only for chars < 128.) */

4751

4752

case '[':

4753

if (*(++ptr) == '^')

4754

{

4755

class_optcount = 10; /* Greater than one */

4756

ptr++;

4757

}

4758

else class_optcount = 0;

4759

4760

#ifdef SUPPORT_UTF8

4761

class_utf8 = FALSE;

4762

#endif

4763

4764

/* Written as a "do" so that an initial ']' is taken as data */

4765

4766

if (*ptr != 0) do

4767

{

4768

/* Inside \Q...\E everything is literal except \E */

4769

4770

if (inescq)

4771

{

4772

if (*ptr != '\\' || ptr[1] != 'E') goto GET_ONE_CHARACTER;

4773

inescq = FALSE;

4774

ptr += 1;

4775

continue;

4776

}

4777

4778

/* Outside \Q...\E, check for escapes */

4779

4780

if (*ptr == '\\')

4781

{

4782

c = check_escape(&ptr, errorptr, bracount, options, TRUE);

4783

if (*errorptr != NULL) goto PCRE_ERROR_RETURN;

4784

4785

/* \b is backspace inside a class; \X is literal */

4786

4787

if (-c == ESC_b) c = '\b';

4788

else if (-c == ESC_X) c = 'X';

4789

4790

/* \Q enters quoting mode */

4791

4792

else if (-c == ESC_Q)

4793

{

4794

inescq = TRUE;

4795

continue;

4796

}

4797

4798

/* Handle escapes that turn into characters */

4799

4800

if (c >= 0) goto NON_SPECIAL_CHARACTER;

4801

4802

/* Escapes that are meta-things. The normal ones just affect the

4803

bit map, but Unicode properties require an XCLASS extended item. */

4804

4805

else

4806

{

4807

class_optcount = 10; /* \d, \s etc; make sure > 1 */

4808

#ifdef SUPPORT_UTF8

4809

if (-c == ESC_p || -c == ESC_P)

4810

{

4811

if (!class_utf8)

4812

{

4813

class_utf8 = TRUE;

4814

length += LINK_SIZE + 2;

4815

}

4816

length += 2;

4817

}

4818

#endif

4819

}

4820

}

4821

4822

/* Check the syntax for POSIX stuff. The bits we actually handle are

4823

checked during the real compile phase. */

4824

4825

else if (*ptr == '[' && check_posix_syntax(ptr, &ptr, &compile_block))

4826

{

4827

ptr++;

4828

class_optcount = 10; /* Make sure > 1 */

4829

}

4830

4831

/* Anything else increments the possible optimization count. We have to

4832

detect ranges here so that we can compute the number of extra ranges for

4833

caseless wide characters when UCP support is available. If there are wide

4834

characters, we are going to have to use an XCLASS, even for single

4835

characters. */

4836

4837

else

4838

{

4839

int d;

4840

4841

GET_ONE_CHARACTER:

4842

4843

#ifdef SUPPORT_UTF8

4844

if (utf8)

4845

{

4846

int extra = 0;

4847

GETCHARLEN(c, ptr, extra);

4848

ptr += extra;

4849

}

4850

else c = *ptr;

4851

#else

4852

c = *ptr;

4853

#endif

4854

4855

/* Come here from handling \ above when it escapes to a char value */

4856

4857

NON_SPECIAL_CHARACTER:

4858

class_optcount++;

4859

4860

d = -1;

4861

if (ptr[1] == '-')

4862

{

4863

uschar const *hyptr = ptr++;

4864

if (ptr[1] == '\\')

4865

{

4866

ptr++;

4867

d = check_escape(&ptr, errorptr, bracount, options, TRUE);

4868

if (*errorptr != NULL) goto PCRE_ERROR_RETURN;

4869

if (-d == ESC_b) d = '\b'; /* backspace */

4870

else if (-d == ESC_X) d = 'X'; /* literal X in a class */

4871

}

4872

else if (ptr[1] != 0 && ptr[1] != ']')

4873

{

4874

ptr++;

4875

#ifdef SUPPORT_UTF8

4876

if (utf8)

4877

{

4878

int extra = 0;

4879

GETCHARLEN(d, ptr, extra);

4880

ptr += extra;

4881

}

4882

else

4883

#endif

4884

d = *ptr;

4885

}

4886

if (d < 0) ptr = hyptr; /* go back to hyphen as data */

4887

}

4888

4889

/* If d >= 0 we have a range. In UTF-8 mode, if the end is > 255, or >

4890

127 for caseless matching, we will need to use an XCLASS. */

4891

4892

if (d >= 0)

4893

{

4894

class_optcount = 10; /* Ensure > 1 */

4895

if (d < c)

4896

{

4897

*errorptr = ERR8;

4898

goto PCRE_ERROR_RETURN;

4899

}

4900

4901

#ifdef SUPPORT_UTF8

4902

if (utf8 && (d > 255 || ((options & PCRE_CASELESS) != 0 && d > 127)))

4903

{

4904

uschar buffer[6];

4905

if (!class_utf8) /* Allow for XCLASS overhead */

4906

{

4907

class_utf8 = TRUE;

4908

length += LINK_SIZE + 2;

4909

}

4910

4911

#ifdef SUPPORT_UCP

4912

/* If we have UCP support, find out how many extra ranges are

4913

needed to map the other case of characters within this range. We

4914

have to mimic the range optimization here, because extending the

4915

range upwards might push d over a boundary that makes is use

4916

another byte in the UTF-8 representation. */

4917

4918

if ((options & PCRE_CASELESS) != 0)

4919

{

4920

int occ, ocd;

4921

int cc = c;

4922

int origd = d;

4923

while (get_othercase_range(&cc, origd, &occ, &ocd))

4924

{

4925

if (occ >= c && ocd <= d) continue; /* Skip embedded */

4926

4927

if (occ < c && ocd >= c - 1) /* Extend the basic range */

4928

{ /* if there is overlap, */

4929

c = occ; /* noting that if occ < c */

4930

continue; /* we can't have ocd > d */

4931

} /* because a subrange is */

4932

if (ocd > d && occ <= d + 1) /* always shorter than */

4933

{ /* the basic range. */

4934

d = ocd;

4935

continue;

4936

}

4937

4938

/* An extra item is needed */

4939

4940

length += 1 + ord2utf8(occ, buffer) +

4941

((occ == ocd)? 0 : ord2utf8(ocd, buffer));

4942

}

4943

}

4944

#endif /* SUPPORT_UCP */

4945

4946

/* The length of the (possibly extended) range */

4947

4948

length += 1 + ord2utf8(c, buffer) + ord2utf8(d, buffer);

4949

}

4950

#endif /* SUPPORT_UTF8 */

4951

4952

}

4953

4954

/* We have a single character. There is nothing to be done unless we

4955

are in UTF-8 mode. If the char is > 255, or 127 when caseless, we must

4956

allow for an XCL_SINGLE item, doubled for caselessness if there is UCP

4957

support. */

4958

4959

else

4960

{

4961

#ifdef SUPPORT_UTF8

4962

if (utf8 && (c > 255 || ((options & PCRE_CASELESS) != 0 && c > 127)))

4963

{

4964

uschar buffer[6];

4965

class_optcount = 10; /* Ensure > 1 */

4966

if (!class_utf8) /* Allow for XCLASS overhead */

4967

{

4968

class_utf8 = TRUE;

4969

length += LINK_SIZE + 2;

4970

}

4971

#ifdef SUPPORT_UCP

4972

length += (((options & PCRE_CASELESS) != 0)? 2 : 1) *

4973

(1 + ord2utf8(c, buffer));

4974

#else /* SUPPORT_UCP */

4975

length += 1 + ord2utf8(c, buffer);

4976

#endif /* SUPPORT_UCP */

4977

}

4978

#endif /* SUPPORT_UTF8 */

4979

}

4980

}

4981

}

4982

while (*(++ptr) != 0 && (inescq || *ptr != ']')); /* Concludes "do" above */

4983

4984

if (*ptr == 0) /* Missing terminating ']' */

4985

{

4986

*errorptr = ERR6;

4987

goto PCRE_ERROR_RETURN;

4988

}

4989

4990

/* We can optimize when there was only one optimizable character. Repeats

4991

for positive and negated single one-byte chars are handled by the general

4992

code. Here, we handle repeats for the class opcodes. */

4993

4994

if (class_optcount == 1) length += 3; else

4995

{

4996

length += 33;

4997

4998

/* A repeat needs either 1 or 5 bytes. If it is a possessive quantifier,

4999

we also need extra for wrapping the whole thing in a sub-pattern. */

5000

5001

if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2))

5002

{

5003

ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);

5004

if (*errorptr != NULL) goto PCRE_ERROR_RETURN;

5005

if ((min == 0 && (max == 1 || max == -1)) ||

5006

(min == 1 && max == -1))

5007

length++;

5008

else length += 5;

5009

if (ptr[1] == '+')

5010

{

5011

ptr++;

5012

length += 2 + 2*LINK_SIZE;

5013

}

5014

else if (ptr[1] == '?') ptr++;

5015

}

5016

}

5017

continue;

5018

5019

/* Brackets may be genuine groups or special things */

5020

5021

case '(':

5022

branch_newextra = 0;

5023

bracket_length = 1 + LINK_SIZE;

5024

5025

/* Handle special forms of bracket, which all start (? */

5026

5027

if (ptr[1] == '?')

5028

{

5029

int set, unset;

5030

int *optset;

5031

5032

switch (c = ptr[2])

5033

{

5034

/* Skip over comments entirely */

5035

case '#':

5036

ptr += 3;

5037

while (*ptr != 0 && *ptr != ')') ptr++;

5038

if (*ptr == 0)

5039

{

5040

*errorptr = ERR18;

5041

goto PCRE_ERROR_RETURN;

5042

}

5043

continue;

5044

5045

/* Non-referencing groups and lookaheads just move the pointer on, and

5046

then behave like a non-special bracket, except that they don't increment

5047

the count of extracting brackets. Ditto for the "once only" bracket,

5048

which is in Perl from version 5.005. */

5049

5050

case ':':

5051

case '=':

5052

case '!':

5053

case '>':

5054

ptr += 2;

5055

break;

5056

5057

/* (?R) specifies a recursive call to the regex, which is an extension

5058

to provide the facility which can be obtained by (?p{perl-code}) in

5059

Perl 5.6. In Perl 5.8 this has become (??{perl-code}).

5060

5061

From PCRE 4.00, items such as (?3) specify subroutine-like "calls" to

5062

the appropriate numbered brackets. This includes both recursive and

5063

non-recursive calls. (?R) is now synonymous with (?0). */

5064

5065

case 'R':

5066

ptr++;

5067

5068

case '0': case '1': case '2': case '3': case '4':

5069

case '5': case '6': case '7': case '8': case '9':

5070

ptr += 2;

5071

if (c != 'R')

5072

while ((digitab[*(++ptr)] & ctype_digit) != 0);

5073

if (*ptr != ')')

5074

{

5075

*errorptr = ERR29;

5076

goto PCRE_ERROR_RETURN;

5077

}

5078

length += 1 + LINK_SIZE;

5079

5080

/* If this item is quantified, it will get wrapped inside brackets so

5081

as to use the code for quantified brackets. We jump down and use the

5082

code that handles this for real brackets. */

5083

5084

if (ptr[1] == '+' || ptr[1] == '*' || ptr[1] == '?' || ptr[1] == '{')

5085

{

5086

length += 2 + 2 * LINK_SIZE; /* to make bracketed */

5087

duplength = 5 + 3 * LINK_SIZE;

5088

goto HANDLE_QUANTIFIED_BRACKETS;

5089

}

5090

continue;

5091

5092

/* (?C) is an extension which provides "callout" - to provide a bit of

5093

the functionality of the Perl (?{...}) feature. An optional number may

5094

follow (default is zero). */

5095

5096

case 'C':

5097

ptr += 2;

5098

while ((digitab[*(++ptr)] & ctype_digit) != 0);

5099

if (*ptr != ')')

5100

{

5101

*errorptr = ERR39;

5102

goto PCRE_ERROR_RETURN;

5103

}

5104

length += 2 + 2*LINK_SIZE;

5105

continue;

5106

5107

/* Named subpatterns are an extension copied from Python */

5108

5109

case 'P':

5110

ptr += 3;

5111

if (*ptr == '<')

5112

{

5113

const uschar *p; /* Don't amalgamate; some compilers */

5114

p = ++ptr; /* grumble at autoincrement in declaration */

5115

while ((compile_block.ctypes[*ptr] & ctype_word) != 0) ptr++;

5116

if (*ptr != '>')

5117

{

5118

*errorptr = ERR42;

5119

goto PCRE_ERROR_RETURN;

5120

}

5121

name_count++;

5122

if (ptr - p > max_name_size) max_name_size = (ptr - p);

5123

break;

5124

}

5125

5126

if (*ptr == '=' || *ptr == '>')

5127

{

5128

while ((compile_block.ctypes[*(++ptr)] & ctype_word) != 0);

5129

if (*ptr != ')')

5130

{

5131

*errorptr = ERR42;

5132

goto PCRE_ERROR_RETURN;

5133

}

5134

break;

5135

}

5136

5137

/* Unknown character after (?P */

5138

5139

*errorptr = ERR41;

5140

goto PCRE_ERROR_RETURN;

5141

5142

/* Lookbehinds are in Perl from version 5.005 */

5143

5144

case '<':

5145

ptr += 3;

5146

if (*ptr == '=' || *ptr == '!')

5147

{

5148

branch_newextra = 1 + LINK_SIZE;

5149

length += 1 + LINK_SIZE; /* For the first branch */

5150

break;

5151

}

5152

*errorptr = ERR24;

5153

goto PCRE_ERROR_RETURN;

5154

5155

/* Conditionals are in Perl from version 5.005. The bracket must either

5156

be followed by a number (for bracket reference) or by an assertion

5157

group, or (a PCRE extension) by 'R' for a recursion test. */

5158

5159

case '(':

5160

if (ptr[3] == 'R' && ptr[4] == ')')

5161

{

5162

ptr += 4;

5163

length += 3;

5164

}

5165

else if ((digitab[ptr[3]] & ctype_digit) != 0)

5166

{

5167

ptr += 4;

5168

length += 3;

5169

while ((digitab[*ptr] & ctype_digit) != 0) ptr++;

5170

if (*ptr != ')')

5171

{

5172

*errorptr = ERR26;

5173

goto PCRE_ERROR_RETURN;

5174

}

5175

}

5176

else /* An assertion must follow */

5177

{

5178

ptr++; /* Can treat like ':' as far as spacing is concerned */

5179

if (ptr[2] != '?' ||

5180

(ptr[3] != '=' && ptr[3] != '!' && ptr[3] != '<') )

5181

{

5182

ptr += 2; /* To get right offset in message */

5183

*errorptr = ERR28;

5184

goto PCRE_ERROR_RETURN;

5185

}

5186

}

5187

break;

5188

5189

/* Else loop checking valid options until ) is met. Anything else is an

5190

error. If we are without any brackets, i.e. at top level, the settings

5191

act as if specified in the options, so massage the options immediately.

5192

This is for backward compatibility with Perl 5.004. */

5193

5194

default:

5195

set = unset = 0;

5196

optset = &set;

5197

ptr += 2;

5198

5199

for (;; ptr++)

5200

{

5201

c = *ptr;

5202

switch (c)

5203

{

5204

case 'i':

5205

*optset |= PCRE_CASELESS;

5206

continue;

5207

5208

case 'm':

5209

*optset |= PCRE_MULTILINE;

5210

continue;

5211

5212

case 's':

5213

*optset |= PCRE_DOTALL;

5214

continue;

5215

5216

case 'x':

5217

*optset |= PCRE_EXTENDED;

5218

continue;

5219

5220

case 'X':

5221

*optset |= PCRE_EXTRA;

5222

continue;

5223

5224

case 'U':

5225

*optset |= PCRE_UNGREEDY;

5226

continue;

5227

5228

case '-':

5229

optset = &unset;

5230

continue;

5231

5232

/* A termination by ')' indicates an options-setting-only item; if

5233

this is at the very start of the pattern (indicated by item_count

5234

being zero), we use it to set the global options. This is helpful

5235

when analyzing the pattern for first characters, etc. Otherwise

5236

nothing is done here and it is handled during the compiling

5237

process.

5238

5239

[Historical note: Up to Perl 5.8, options settings at top level

5240

were always global settings, wherever they appeared in the pattern.

5241

That is, they were equivalent to an external setting. From 5.8

5242

onwards, they apply only to what follows (which is what you might

5243

expect).] */

5244

5245

case ')':

5246

if (item_count == 0)

5247

{

5248

options = (options | set) & (~unset);

5249

set = unset = 0; /* To save length */

5250

item_count--; /* To allow for several */

5251

}

5252

5253

/* Fall through */

5254

5255

/* A termination by ':' indicates the start of a nested group with

5256

the given options set. This is again handled at compile time, but

5257

we must allow for compiled space if any of the ims options are

5258

set. We also have to allow for resetting space at the end of

5259

the group, which is why 4 is added to the length and not just 2.

5260

If there are several changes of options within the same group, this

5261

will lead to an over-estimate on the length, but this shouldn't

5262

matter very much. We also have to allow for resetting options at

5263

the start of any alternations, which we do by setting

5264

branch_newextra to 2. Finally, we record whether the case-dependent

5265

flag ever changes within the regex. This is used by the "required

5266

character" code. */

5267

5268

case ':':

5269

if (((set|unset) & PCRE_IMS) != 0)

5270

{

5271

length += 4;

5272

branch_newextra = 2;

5273

if (((set|unset) & PCRE_CASELESS) != 0) options |= PCRE_ICHANGED;

5274

}

5275

goto END_OPTIONS;

5276

5277

/* Unrecognized option character */

5278

5279

default:

5280

*errorptr = ERR12;

5281

goto PCRE_ERROR_RETURN;

5282

}

5283

}

5284

5285

/* If we hit a closing bracket, that's it - this is a freestanding

5286

option-setting. We need to ensure that branch_extra is updated if

5287

necessary. The only values branch_newextra can have here are 0 or 2.

5288

If the value is 2, then branch_extra must either be 2 or 5, depending

5289

on whether this is a lookbehind group or not. */

5290

5291

END_OPTIONS:

5292

if (c == ')')

5293

{

5294

if (branch_newextra == 2 &&

5295

(branch_extra == 0 || branch_extra == 1+LINK_SIZE))

5296

branch_extra += branch_newextra;

5297

continue;

5298

}

5299

5300

/* If options were terminated by ':' control comes here. Fall through

5301

to handle the group below. */

5302

}

5303

}

5304

5305

/* Extracting brackets must be counted so we can process escapes in a

5306

Perlish way. If the number exceeds EXTRACT_BASIC_MAX we are going to

5307

need an additional 3 bytes of store per extracting bracket. However, if

5308

PCRE_NO_AUTO)CAPTURE is set, unadorned brackets become non-capturing, so we

5309

must leave the count alone (it will aways be zero). */

5310

5311

else if ((options & PCRE_NO_AUTO_CAPTURE) == 0)

5312

{

5313

bracount++;

5314

if (bracount > EXTRACT_BASIC_MAX) bracket_length += 3;

5315

}

5316

5317

/* Save length for computing whole length at end if there's a repeat that

5318

requires duplication of the group. Also save the current value of

5319

branch_extra, and start the new group with the new value. If non-zero, this

5320

will either be 2 for a (?imsx: group, or 3 for a lookbehind assertion. */

5321

5322

if (brastackptr >= sizeof(brastack)/sizeof(int))

5323

{

5324

*errorptr = ERR19;

5325

goto PCRE_ERROR_RETURN;

5326

}

5327

5328

bralenstack[brastackptr] = branch_extra;

5329

branch_extra = branch_newextra;

5330

5331

brastack[brastackptr++] = length;

5332

length += bracket_length;

5333

continue;

5334

5335

/* Handle ket. Look for subsequent max/min; for certain sets of values we

5336

have to replicate this bracket up to that many times. If brastackptr is

5337

0 this is an unmatched bracket which will generate an error, but take care

5338

not to try to access brastack[-1] when computing the length and restoring

5339

the branch_extra value. */

5340

5341

case ')':

5342

length += 1 + LINK_SIZE;

5343

if (brastackptr > 0)

5344

{

5345

duplength = length - brastack[--brastackptr];

5346

branch_extra = bralenstack[brastackptr];

5347

}

5348

else duplength = 0;

5349

5350

/* The following code is also used when a recursion such as (?3) is

5351

followed by a quantifier, because in that case, it has to be wrapped inside

5352

brackets so that the quantifier works. The value of duplength must be

5353

set before arrival. */

5354

5355

HANDLE_QUANTIFIED_BRACKETS:

5356

5357

/* Leave ptr at the final char; for read_repeat_counts this happens

5358

automatically; for the others we need an increment. */

5359

5360

if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))

5361

{

5362

ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);

5363

if (*errorptr != NULL) goto PCRE_ERROR_RETURN;

5364

}

5365

else if (c == '*') { min = 0; max = -1; ptr++; }

5366

else if (c == '+') { min = 1; max = -1; ptr++; }

5367

else if (c == '?') { min = 0; max = 1; ptr++; }

5368

else { min = 1; max = 1; }

5369

5370

/* If the minimum is zero, we have to allow for an OP_BRAZERO before the

5371

group, and if the maximum is greater than zero, we have to replicate

5372

maxval-1 times; each replication acquires an OP_BRAZERO plus a nesting

5373

bracket set. */

5374

5375

if (min == 0)

5376

{

5377

length++;

5378

if (max > 0) length += (max - 1) * (duplength + 3 + 2*LINK_SIZE);

5379

}

5380

5381

/* When the minimum is greater than zero, we have to replicate up to

5382

minval-1 times, with no additions required in the copies. Then, if there

5383

is a limited maximum we have to replicate up to maxval-1 times allowing

5384

for a BRAZERO item before each optional copy and nesting brackets for all

5385

but one of the optional copies. */

5386

5387

else

5388

{

5389

length += (min - 1) * duplength;

5390

if (max > min) /* Need this test as max=-1 means no limit */

5391

length += (max - min) * (duplength + 3 + 2*LINK_SIZE)

5392

- (2 + 2*LINK_SIZE);

5393

}

5394

5395

/* Allow space for once brackets for "possessive quantifier" */

5396

5397

if (ptr[1] == '+')

5398

{

5399

ptr++;

5400

length += 2 + 2*LINK_SIZE;

5401

}

5402

continue;

5403

5404

/* Non-special character. It won't be space or # in extended mode, so it is

5405

always a genuine character. If we are in a \Q...\E sequence, check for the

5406

end; if not, we have a literal. */

5407

5408

default:

5409

NORMAL_CHAR:

5410

5411

if (inescq && c == '\\' && ptr[1] == 'E')

5412

{

5413

inescq = FALSE;

5414

ptr++;

5415

continue;

5416

}

5417

5418

length += 2; /* For a one-byte character */

5419

lastitemlength = 1; /* Default length of last item for repeats */

5420

5421

/* In UTF-8 mode, check for additional bytes. */

5422

5423

#ifdef SUPPORT_UTF8

5424

if (utf8 && (c & 0xc0) == 0xc0)

5425

{

5426

while ((ptr[1] & 0xc0) == 0x80) /* Can't flow over the end */

5427

{ /* because the end is marked */

5428

lastitemlength++; /* by a zero byte. */

5429

length++;

5430

ptr++;

5431

}

5432

}

5433

#endif

5434

5435

continue;

5436

}

5437

}

5438

5439

length += 2 + LINK_SIZE; /* For final KET and END */

5440

5441

if ((options & PCRE_AUTO_CALLOUT) != 0)

5442

length += 2 + 2*LINK_SIZE; /* For final callout */

5443

5444

if (length > MAX_PATTERN_SIZE)

5445

{

5446

*errorptr = ERR20;

5447

return NULL;

5448

}

5449

5450

/* Compute the size of data block needed and get it, either from malloc or

5451

externally provided function. */

5452

5453

size = length + sizeof(real_pcre) + name_count * (max_name_size + 3);

5454

re = (real_pcre *)(pcre_malloc)(size);

5455

5456

if (re == NULL)

5457

{

5458

*errorptr = ERR21;

5459

return NULL;

5460

}

5461

5462

/* Put in the magic number, and save the sizes, options, and character table

5463

pointer. NULL is used for the default character tables. The nullpad field is at

5464

the end; it's there to help in the case when a regex compiled on a system with

5465

4-byte pointers is run on another with 8-byte pointers. */

5466

5467

re->magic_number = MAGIC_NUMBER;

5468

re->size = size;

5469

re->options = options;

5470

re->dummy1 = re->dummy2 = 0;

5471

re->name_table_offset = sizeof(real_pcre);

5472

re->name_entry_size = max_name_size + 3;

5473

re->name_count = name_count;

5474

re->tables = (tables == pcre_default_tables)? NULL : tables;

5475

re->nullpad = NULL;

5476

5477

/* The starting points of the name/number translation table and of the code are

5478

passed around in the compile data block. */

5479

5480

compile_block.names_found = 0;

5481

compile_block.name_entry_size = max_name_size + 3;

5482

compile_block.name_table = (uschar *)re + re->name_table_offset;

5483

codestart = compile_block.name_table + re->name_entry_size * re->name_count;

5484

compile_block.start_code = codestart;

5485

compile_block.start_pattern = (const uschar *)pattern;

5486

compile_block.req_varyopt = 0;

5487

compile_block.nopartial = FALSE;

5488

5489

/* Set up a starting, non-extracting bracket, then compile the expression. On

5490

error, *errorptr will be set non-NULL, so we don't need to look at the result

5491

of the function here. */

5492

5493

ptr = (const uschar *)pattern;

5494

code = (uschar *)codestart;

5495

*code = OP_BRA;

5496

bracount = 0;

5497

(void)compile_regex(options, options & PCRE_IMS, &bracount, &code, &ptr,

5498

errorptr, FALSE, 0, &firstbyte, &reqbyte, NULL, &compile_block);

5499

re->top_bracket = bracount;

5500

re->top_backref = compile_block.top_backref;

5501

5502

if (compile_block.nopartial) re->options |= PCRE_NOPARTIAL;

5503

5504

/* If not reached end of pattern on success, there's an excess bracket. */

5505

5506

if (*errorptr == NULL && *ptr != 0) *errorptr = ERR22;

5507

5508

/* Fill in the terminating state and check for disastrous overflow, but

5509

if debugging, leave the test till after things are printed out. */

5510

5511

*code++ = OP_END;

5512

5513

#ifndef DEBUG

5514

if (code - codestart > length) *errorptr = ERR23;

5515

#endif

5516

5517

/* Give an error if there's back reference to a non-existent capturing

5518

subpattern. */

5519

5520

if (re->top_backref > re->top_bracket) *errorptr = ERR15;

5521

5522

/* Failed to compile, or error while post-processing */

5523

5524

if (*errorptr != NULL)

5525

{

5526

(pcre_free)(re);

5527

PCRE_ERROR_RETURN:

5528

*erroroffset = ptr - (const uschar *)pattern;

5529

return NULL;

5530

}

5531

5532

/* If the anchored option was not passed, set the flag if we can determine that

5533

the pattern is anchored by virtue of ^ characters or \A or anything else (such

5534

as starting with .* when DOTALL is set).

5535

5536

Otherwise, if we know what the first character has to be, save it, because that

5537

speeds up unanchored matches no end. If not, see if we can set the

5538

PCRE_STARTLINE flag. This is helpful for multiline matches when all branches

5539

start with ^. and also when all branches start with .* for non-DOTALL matches.

5540

5541

5542

if ((options & PCRE_ANCHORED) == 0)

5543

{

5544

int temp_options = options;

5545

if (is_anchored(codestart, &temp_options, 0, compile_block.backref_map))

5546

re->options |= PCRE_ANCHORED;

5547

else

5548

{

5549

if (firstbyte < 0)

5550

firstbyte = find_firstassertedchar(codestart, &temp_options, FALSE);

5551

if (firstbyte >= 0) /* Remove caseless flag for non-caseable chars */

5552

{

5553

int ch = firstbyte & 255;

5554

re->first_byte = ((firstbyte & REQ_CASELESS) != 0 &&

5555

compile_block.fcc[ch] == ch)? ch : firstbyte;

5556

re->options |= PCRE_FIRSTSET;

5557

}

5558

else if (is_startline(codestart, 0, compile_block.backref_map))

5559

re->options |= PCRE_STARTLINE;

5560

}

5561

}

5562

5563

/* For an anchored pattern, we use the "required byte" only if it follows a

5564

variable length item in the regex. Remove the caseless flag for non-caseable

5565

bytes. */

5566

5567

if (reqbyte >= 0 &&

5568

((re->options & PCRE_ANCHORED) == 0 || (reqbyte & REQ_VARY) != 0))

5569

{

5570

int ch = reqbyte & 255;

5571

re->req_byte = ((reqbyte & REQ_CASELESS) != 0 &&

5572

compile_block.fcc[ch] == ch)? (reqbyte & ~REQ_CASELESS) : reqbyte;

5573

re->options |= PCRE_REQCHSET;

5574

}

5575

5576

/* Print out the compiled data for debugging */

5577

5578

#ifdef DEBUG

5579

5580

printf("Length = %d top_bracket = %d top_backref = %d\n",

5581

length, re->top_bracket, re->top_backref);

5582

5583

if (re->options != 0)

5584

{

5585

printf("%s%s%s%s%s%s%s%s%s%s\n",

5586

((re->options & PCRE_NOPARTIAL) != 0)? "nopartial " : "",

5587

((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",

5588

((re->options & PCRE_CASELESS) != 0)? "caseless " : "",

5589

((re->options & PCRE_ICHANGED) != 0)? "case state changed " : "",

5590

((re->options & PCRE_EXTENDED) != 0)? "extended " : "",

5591

((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",

5592

((re->options & PCRE_DOTALL) != 0)? "dotall " : "",

5593

((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",

5594

((re->options & PCRE_EXTRA) != 0)? "extra " : "",

5595

((re->options & PCRE_UNGREEDY) != 0)? "ungreedy " : "");

5596

}

5597

5598

if ((re->options & PCRE_FIRSTSET) != 0)

5599

{

5600

int ch = re->first_byte & 255;

5601

const char *caseless = ((re->first_byte & REQ_CASELESS) == 0)? "" : " (caseless)";

5602

if (isprint(ch)) printf("First char = %c%s\n", ch, caseless);

5603

else printf("First char = \\x%02x%s\n", ch, caseless);

5604

}

5605

5606

if ((re->options & PCRE_REQCHSET) != 0)

5607

{

5608

int ch = re->req_byte & 255;

5609

const char *caseless = ((re->req_byte & REQ_CASELESS) == 0)? "" : " (caseless)";

5610

if (isprint(ch)) printf("Req char = %c%s\n", ch, caseless);

5611

else printf("Req char = \\x%02x%s\n", ch, caseless);

5612

}

5613

5614

print_internals(re, stdout);

5615

5616

/* This check is done here in the debugging case so that the code that

5617

was compiled can be seen. */

5618

5619

if (code - codestart > length)

5620

{

5621

*errorptr = ERR23;

5622

(pcre_free)(re);

5623

*erroroffset = ptr - (uschar *)pattern;

5624

return NULL;

5625

}

5626

#endif

5627

5628

return (pcre *)re;

5629

}

5630

5631

5632

5633

/*************************************************

5634

* Match a back-reference *

5635

*************************************************/

5636

5637

/* If a back reference hasn't been set, the length that is passed is greater

5638

than the number of characters left in the string, so the match fails.

5639

5640

Arguments:

5641

offset index into the offset vector

5642

eptr points into the subject

5643

length length to be matched

5644

md points to match data block

5645

ims the ims flags

5646

5647

Returns: TRUE if matched

5648

5649

5650

static BOOL

5651

match_ref(int offset, register const uschar *eptr, int length, match_data *md,

5652

unsigned long int ims)

5653

{

5654

const uschar *p = md->start_subject + md->offset_vector[offset];

5655

5656

#ifdef DEBUG

5657

if (eptr >= md->end_subject)

5658

printf("matching subject <null>");

5659

else

5660

{

5661

printf("matching subject ");

5662

pchars(eptr, length, TRUE, md);

5663

}

5664

printf(" against backref ");

5665

pchars(p, length, FALSE, md);

5666

printf("\n");

5667

#endif

5668

5669

/* Always fail if not enough characters left */

5670

5671

if (length > md->end_subject - eptr) return FALSE;

5672

5673

/* Separate the caselesss case for speed */

5674

5675

if ((ims & PCRE_CASELESS) != 0)

5676

{

5677

while (length-- > 0)

5678

if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;

5679

}

5680

else

5681

{ while (length-- > 0) if (*p++ != *eptr++) return FALSE; }

5682

5683

return TRUE;

5684

}

5685

5686

5687

#ifdef SUPPORT_UTF8

5688

/*************************************************

5689

* Match character against an XCLASS *

5690

*************************************************/

5691

5692

/* This function is called from within the XCLASS code below, to match a

5693

character against an extended class which might match values > 255.

5694

5695

Arguments:

5696

c the character

5697

data points to the flag byte of the XCLASS data

5698

5699

Returns: TRUE if character matches, else FALSE

5700

5701

5702

static BOOL

5703

match_xclass(int c, const uschar *data)

5704

{

5705

int t;

5706

BOOL negated = (*data & XCL_NOT) != 0;

5707

5708

/* Character values < 256 are matched against a bitmap, if one is present. If

5709

not, we still carry on, because there may be ranges that start below 256 in the

5710

additional data. */

5711

5712

if (c < 256)

5713

{

5714

if ((*data & XCL_MAP) != 0 && (data[1 + c/8] & (1 << (c&7))) != 0)

5715

return !negated; /* char found */

5716

}

5717

5718

/* First skip the bit map if present. Then match against the list of Unicode

5719

properties or large chars or ranges that end with a large char. We won't ever

5720

encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */

5721

5722

if ((*data++ & XCL_MAP) != 0) data += 32;

5723

5724

while ((t = *data++) != XCL_END)

5725

{

5726

int x, y;

5727

if (t == XCL_SINGLE)

5728

{

5729

GETCHARINC(x, data);

5730

if (c == x) return !negated;

5731

}

5732

else if (t == XCL_RANGE)

5733

{

5734

GETCHARINC(x, data);

5735

GETCHARINC(y, data);

5736

if (c >= x && c <= y) return !negated;

5737

}

5738

5739

#ifdef SUPPORT_UCP

5740

else /* XCL_PROP & XCL_NOTPROP */

5741

{

5742

int chartype, othercase;

5743

int rqdtype = *data++;

5744

int category = ucp_findchar(c, &chartype, &othercase);

5745

if (rqdtype >= 128)

5746

{

5747

if ((rqdtype - 128 == category) == (t == XCL_PROP)) return !negated;

5748

}

5749

else

5750

{

5751

if ((rqdtype == chartype) == (t == XCL_PROP)) return !negated;

5752

}

5753

}

5754

#endif /* SUPPORT_UCP */

5755

}

5756

5757

return negated; /* char did not match */

5758

}

5759

#endif

5760

5761

5762

/***************************************************************************

5763

****************************************************************************

5764

RECURSION IN THE match() FUNCTION

5765

5766

The match() function is highly recursive. Some regular expressions can cause

5767

it to recurse thousands of times. I was writing for Unix, so I just let it

5768

call itself recursively. This uses the stack for saving everything that has

5769

to be saved for a recursive call. On Unix, the stack can be large, and this

5770

works fine.

5771

5772

It turns out that on non-Unix systems there are problems with programs that

5773

use a lot of stack. (This despite the fact that every last chip has oodles

5774

of memory these days, and techniques for extending the stack have been known

5775

for decades.) So....

5776

5777

There is a fudge, triggered by defining NO_RECURSE, which avoids recursive

5778

calls by keeping local variables that need to be preserved in blocks of memory

5779

obtained from malloc instead instead of on the stack. Macros are used to

5780

achieve this so that the actual code doesn't look very different to what it

5781

always used to.

5782

****************************************************************************

5783

***************************************************************************/

5784

5785

5786

/* These versions of the macros use the stack, as normal */

5787

5788

#ifndef NO_RECURSE

5789

#define REGISTER register

5790

#define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) rx = match(ra,rb,rc,rd,re,rf,rg)

5791

#define RRETURN(ra) return ra

5792

#else

5793

5794

5795

/* These versions of the macros manage a private stack on the heap. Note

5796

that the rd argument of RMATCH isn't actually used. It's the md argument of

5797

match(), which never changes. */

5798

5799

#define REGISTER

5800

5801

#define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\

5802

5803

heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\

5804

if (setjmp(frame->Xwhere) == 0)\

5805

5806

newframe->Xeptr = ra;\

5807

newframe->Xecode = rb;\

5808

newframe->Xoffset_top = rc;\

5809

newframe->Xims = re;\

5810

newframe->Xeptrb = rf;\

5811

newframe->Xflags = rg;\

5812

newframe->Xprevframe = frame;\

5813

frame = newframe;\

5814

DPRINTF(("restarting from line %d\n", __LINE__));\

5815

goto HEAP_RECURSE;\

5816

5817

else\

5818

5819

DPRINTF(("longjumped back to line %d\n", __LINE__));\

5820

frame = md->thisframe;\

5821

rx = frame->Xresult;\

5822

5823

}

5824

5825

#define RRETURN(ra)\

5826

5827

heapframe *newframe = frame;\

5828

frame = newframe->Xprevframe;\

5829

(pcre_stack_free)(newframe);\

5830

if (frame != NULL)\

5831

5832

frame->Xresult = ra;\

5833

md->thisframe = frame;\

5834

longjmp(frame->Xwhere, 1);\

5835

5836

return ra;\

5837

}

5838

5839

5840

/* Structure for remembering the local variables in a private frame */

5841

5842

typedef struct heapframe {

5843

struct heapframe *Xprevframe;

5844

5845

/* Function arguments that may change */

5846

5847

const uschar *Xeptr;

5848

const uschar *Xecode;

5849

int Xoffset_top;

5850

long int Xims;

5851

eptrblock *Xeptrb;

5852

int Xflags;

5853

5854

/* Function local variables */

5855

5856

const uschar *Xcallpat;

5857

const uschar *Xcharptr;

5858

const uschar *Xdata;

5859

const uschar *Xnext;

5860

const uschar *Xpp;

5861

const uschar *Xprev;

5862

const uschar *Xsaved_eptr;

5863

5864

recursion_info Xnew_recursive;

5865

5866

BOOL Xcur_is_word;

5867

BOOL Xcondition;

5868

BOOL Xminimize;

5869

BOOL Xprev_is_word;

5870

5871

unsigned long int Xoriginal_ims;

5872

5873

#ifdef SUPPORT_UCP

5874

int Xprop_type;

5875

int Xprop_fail_result;

5876

int Xprop_category;

5877

int Xprop_chartype;

5878

int Xprop_othercase;

5879

int Xprop_test_against;

5880

int *Xprop_test_variable;

5881

#endif

5882

5883

int Xctype;

5884

int Xfc;

5885

int Xfi;

5886

int Xlength;

5887

int Xmax;

5888

int Xmin;

5889

int Xnumber;

5890

int Xoffset;

5891

int Xop;

5892

int Xsave_capture_last;

5893

int Xsave_offset1, Xsave_offset2, Xsave_offset3;

5894

int Xstacksave[REC_STACK_SAVE_MAX];

5895

5896

eptrblock Xnewptrb;

5897

5898

/* Place to pass back result, and where to jump back to */

5899

5900

int Xresult;

5901

jmp_buf Xwhere;

5902

5903

} heapframe;

5904

5905

#endif

5906

5907

5908

/***************************************************************************

5909

***************************************************************************/

5910

5911

5912

5913

/*************************************************

5914

* Match from current position *

5915

*************************************************/

5916

5917

/* On entry ecode points to the first opcode, and eptr to the first character

5918

in the subject string, while eptrb holds the value of eptr at the start of the

5919

last bracketed group - used for breaking infinite loops matching zero-length

5920

strings. This function is called recursively in many circumstances. Whenever it

5921

returns a negative (error) response, the outer incarnation must also return the

5922

same response.

5923

5924

Performance note: It might be tempting to extract commonly used fields from the

5925

md structure (e.g. utf8, end_subject) into individual variables to improve

5926

performance. Tests using gcc on a SPARC disproved this; in the first case, it

5927

made performance worse.

5928

5929

Arguments:

5930

eptr pointer in subject

5931

ecode position in code

5932

offset_top current top pointer

5933

md pointer to "static" info for the match

5934

ims current /i, /m, and /s options

5935

eptrb pointer to chain of blocks containing eptr at start of

5936

brackets - for testing for empty matches

5937

flags can contain

5938

match_condassert - this is an assertion condition

5939

match_isgroup - this is the start of a bracketed group

5940

5941

Returns: MATCH_MATCH if matched ) these values are >= 0

5942

MATCH_NOMATCH if failed to match )

5943

a negative PCRE_ERROR_xxx value if aborted by an error condition

5944

(e.g. stopped by recursion limit)

5945

5946

5947

static int

5948

match(REGISTER const uschar *eptr, REGISTER const uschar *ecode,

5949

int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,

5950

int flags)

5951

{

5952

/* These variables do not need to be preserved over recursion in this function,

5953

so they can be ordinary variables in all cases. Mark them with "register"

5954

because they are used a lot in loops. */

5955

5956

5957

5958

5959

5960

/* When recursion is not being used, all "local" variables that have to be

5961

preserved over calls to RMATCH() are part of a "frame" which is obtained from

5962

heap storage. Set up the top-level frame here; others are obtained from the

5963

heap whenever RMATCH() does a "recursion". See the macro definitions above. */

5964

5965

#ifdef NO_RECURSE

5966

heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe));

5967

frame->Xprevframe = NULL; /* Marks the top level */

5968

5969

/* Copy in the original argument variables */

5970

5971

frame->Xeptr = eptr;

5972

frame->Xecode = ecode;

5973

frame->Xoffset_top = offset_top;

5974

frame->Xims = ims;

5975

frame->Xeptrb = eptrb;

5976

frame->Xflags = flags;

5977

5978

/* This is where control jumps back to to effect "recursion" */

5979

5980

HEAP_RECURSE:

5981

5982

/* Macros make the argument variables come from the current frame */

5983

5984

#define eptr frame->Xeptr

5985

#define ecode frame->Xecode

5986

#define offset_top frame->Xoffset_top

5987

#define ims frame->Xims

5988

#define eptrb frame->Xeptrb

5989

#define flags frame->Xflags

5990

5991

/* Ditto for the local variables */

5992

5993

#ifdef SUPPORT_UTF8

5994

#define charptr frame->Xcharptr

5995

#endif

5996

#define callpat frame->Xcallpat

5997

#define data frame->Xdata

5998

#define next frame->Xnext

5999

#define pp frame->Xpp

6000

#define prev frame->Xprev

6001

#define saved_eptr frame->Xsaved_eptr

6002

6003

#define new_recursive frame->Xnew_recursive

6004

6005

#define cur_is_word frame->Xcur_is_word

6006

#define condition frame->Xcondition

6007

#define minimize frame->Xminimize

6008

#define prev_is_word frame->Xprev_is_word

6009

6010

#define original_ims frame->Xoriginal_ims

6011

6012

#ifdef SUPPORT_UCP

6013

#define prop_type frame->Xprop_type

6014

#define prop_fail_result frame->Xprop_fail_result

6015

#define prop_category frame->Xprop_category

6016

#define prop_chartype frame->Xprop_chartype

6017

#define prop_othercase frame->Xprop_othercase

6018

#define prop_test_against frame->Xprop_test_against

6019

#define prop_test_variable frame->Xprop_test_variable

6020

#endif

6021

6022

#define ctype frame->Xctype

6023

#define fc frame->Xfc

6024

#define fi frame->Xfi

6025

#define length frame->Xlength

6026

#define max frame->Xmax

6027

#define min frame->Xmin

6028

#define number frame->Xnumber

6029

#define offset frame->Xoffset

6030

#define op frame->Xop

6031

#define save_capture_last frame->Xsave_capture_last

6032

#define save_offset1 frame->Xsave_offset1

6033

#define save_offset2 frame->Xsave_offset2

6034

#define save_offset3 frame->Xsave_offset3

6035

#define stacksave frame->Xstacksave

6036

6037

#define newptrb frame->Xnewptrb

6038

6039

/* When recursion is being used, local variables are allocated on the stack and

6040

get preserved during recursion in the normal way. In this environment, fi and

6041

i, and fc and c, can be the same variables. */

6042

6043

#else

6044

#define fi i

6045

#define fc c

6046

6047

6048

#ifdef SUPPORT_UTF8 /* Many of these variables are used ony */

6049

const uschar *charptr; /* small blocks of the code. My normal */

6050

#endif /* style of coding would have declared */

6051

const uschar *callpat; /* them within each of those blocks. */

6052

const uschar *data; /* However, in order to accommodate the */

6053

const uschar *next; /* version of this code that uses an */

6054

const uschar *pp; /* external "stack" implemented on the */

6055

const uschar *prev; /* heap, it is easier to declare them */

6056

const uschar *saved_eptr; /* all here, so the declarations can */

6057

/* be cut out in a block. The only */

6058

recursion_info new_recursive; /* declarations within blocks below are */

6059

/* for variables that do not have to */

6060

BOOL cur_is_word; /* be preserved over a recursive call */

6061

BOOL condition; /* to RMATCH(). */

6062

BOOL minimize;

6063

BOOL prev_is_word;

6064

6065

unsigned long int original_ims;

6066

6067

#ifdef SUPPORT_UCP

6068

int prop_type;

6069

int prop_fail_result;

6070

int prop_category;

6071

int prop_chartype;

6072

int prop_othercase;

6073

int prop_test_against;

6074

int *prop_test_variable;

6075

#endif

6076

6077

int ctype;

6078

int length;

6079

int max;

6080

int min;

6081

int number;

6082

int offset;

6083

int op;

6084

int save_capture_last;

6085

int save_offset1, save_offset2, save_offset3;

6086

int stacksave[REC_STACK_SAVE_MAX];

6087

6088

eptrblock newptrb;

6089

#endif

6090

6091

/* These statements are here to stop the compiler complaining about unitialized

6092

variables. */

6093

6094

#ifdef SUPPORT_UCP

6095

prop_fail_result = 0;

6096

prop_test_against = 0;

6097

prop_test_variable = NULL;

6098

#endif

6099

6100

/* OK, now we can get on with the real code of the function. Recursion is

6101

specified by the macros RMATCH and RRETURN. When NO_RECURSE is *not* defined,

6102

these just turn into a recursive call to match() and a "return", respectively.

6103

However, RMATCH isn't like a function call because it's quite a complicated

6104

macro. It has to be used in one particular way. This shouldn't, however, impact

6105

performance when true recursion is being used. */

6106

6107

if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);

6108

6109

original_ims = ims; /* Save for resetting on ')' */

6110

6111

/* At the start of a bracketed group, add the current subject pointer to the

6112

stack of such pointers, to be re-instated at the end of the group when we hit

6113

the closing ket. When match() is called in other circumstances, we don't add to

6114

this stack. */

6115

6116

if ((flags & match_isgroup) != 0)

6117

{

6118

newptrb.epb_prev = eptrb;

6119

newptrb.epb_saved_eptr = eptr;

6120

eptrb = &newptrb;

6121

}

6122

6123

/* Now start processing the operations. */

6124

6125

for (;;)

6126

{

6127

op = *ecode;

6128

minimize = FALSE;

6129

6130

/* For partial matching, remember if we ever hit the end of the subject after

6131

matching at least one subject character. */

6132

6133

if (md->partial &&

6134

eptr >= md->end_subject &&

6135

eptr > md->start_match)

6136

md->hitend = TRUE;

6137

6138

/* Opening capturing bracket. If there is space in the offset vector, save

6139

the current subject position in the working slot at the top of the vector. We

6140

mustn't change the current values of the data slot, because they may be set

6141

from a previous iteration of this group, and be referred to by a reference

6142

inside the group.

6143

6144

If the bracket fails to match, we need to restore this value and also the

6145

values of the final offsets, in case they were set by a previous iteration of

6146

the same bracket.

6147

6148

If there isn't enough space in the offset vector, treat this as if it were a

6149

non-capturing bracket. Don't worry about setting the flag for the error case

6150

here; that is handled in the code for KET. */

6151

6152

if (op > OP_BRA)

6153

{

6154

number = op - OP_BRA;

6155

6156

/* For extended extraction brackets (large number), we have to fish out the

6157

number from a dummy opcode at the start. */

6158

6159

if (number > EXTRACT_BASIC_MAX)

6160

number = GET2(ecode, 2+LINK_SIZE);

6161

offset = number << 1;

6162

6163

#ifdef DEBUG

6164

printf("start bracket %d subject=", number);

6165

pchars(eptr, 16, TRUE, md);

6166

printf("\n");

6167

#endif

6168

6169

if (offset < md->offset_max)

6170

{

6171

save_offset1 = md->offset_vector[offset];

6172

save_offset2 = md->offset_vector[offset+1];

6173

save_offset3 = md->offset_vector[md->offset_end - number];

6174

save_capture_last = md->capture_last;

6175

6176

DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));

6177

md->offset_vector[md->offset_end - number] = eptr - md->start_subject;

6178

6179

6180

{

6181

RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,

6182

match_isgroup);

6183

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

6184

md->capture_last = save_capture_last;

6185

ecode += GET(ecode, 1);

6186

}

6187

while (*ecode == OP_ALT);

6188

6189

DPRINTF(("bracket %d failed\n", number));

6190

6191

md->offset_vector[offset] = save_offset1;

6192

md->offset_vector[offset+1] = save_offset2;

6193

md->offset_vector[md->offset_end - number] = save_offset3;

6194

6195

RRETURN(MATCH_NOMATCH);

6196

}

6197

6198

/* Insufficient room for saving captured contents */

6199

6200

else op = OP_BRA;

6201

}

6202

6203

/* Other types of node can be handled by a switch */

6204

6205

switch(op)

6206

{

6207

case OP_BRA: /* Non-capturing bracket: optimized */

6208

DPRINTF(("start bracket 0\n"));

6209

6210

{

6211

RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,

6212

match_isgroup);

6213

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

6214

ecode += GET(ecode, 1);

6215

}

6216

while (*ecode == OP_ALT);

6217

DPRINTF(("bracket 0 failed\n"));

6218

RRETURN(MATCH_NOMATCH);

6219

6220

/* Conditional group: compilation checked that there are no more than

6221

two branches. If the condition is false, skipping the first branch takes us

6222

past the end if there is only one branch, but that's OK because that is

6223

exactly what going to the ket would do. */

6224

6225

case OP_COND:

6226

if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */

6227

{

6228

offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */

6229

condition = (offset == CREF_RECURSE * 2)?

6230

(md->recursive != NULL) :

6231

(offset < offset_top && md->offset_vector[offset] >= 0);

6232

RMATCH(rrc, eptr, ecode + (condition?

6233

(LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1))),

6234

offset_top, md, ims, eptrb, match_isgroup);

6235

RRETURN(rrc);

6236

}

6237

6238

/* The condition is an assertion. Call match() to evaluate it - setting

6239

the final argument TRUE causes it to stop at the end of an assertion. */

6240

6241

else

6242

{

6243

RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,

6244

match_condassert | match_isgroup);

6245

if (rrc == MATCH_MATCH)

6246

{

6247

ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2);

6248

while (*ecode == OP_ALT) ecode += GET(ecode, 1);

6249

}

6250

else if (rrc != MATCH_NOMATCH)

6251

{

6252

RRETURN(rrc); /* Need braces because of following else */

6253

}

6254

else ecode += GET(ecode, 1);

6255

RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb,

6256

match_isgroup);

6257

RRETURN(rrc);

6258

}

6259

/* Control never reaches here */

6260

6261

/* Skip over conditional reference or large extraction number data if

6262

encountered. */

6263

6264

case OP_CREF:

6265

case OP_BRANUMBER:

6266

ecode += 3;

6267

break;

6268

6269

/* End of the pattern. If we are in a recursion, we should restore the

6270

offsets appropriately and continue from after the call. */

6271

6272

case OP_END:

6273

if (md->recursive != NULL && md->recursive->group_num == 0)

6274

{

6275

recursion_info *rec = md->recursive;

6276

DPRINTF(("Hit the end in a (?0) recursion\n"));

6277

md->recursive = rec->prevrec;

6278

memmove(md->offset_vector, rec->offset_save,

6279

rec->saved_max * sizeof(int));

6280

md->start_match = rec->save_start;

6281

ims = original_ims;

6282

ecode = rec->after_call;

6283

break;

6284

}

6285

6286

/* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty

6287

string - backtracking will then try other alternatives, if any. */

6288

6289

if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH);

6290

md->end_match_ptr = eptr; /* Record where we ended */

6291

md->end_offset_top = offset_top; /* and how many extracts were taken */

6292

RRETURN(MATCH_MATCH);

6293

6294

/* Change option settings */

6295

6296

case OP_OPT:

6297

ims = ecode[1];

6298

ecode += 2;

6299

DPRINTF(("ims set to %02lx\n", ims));

6300

break;

6301

6302

/* Assertion brackets. Check the alternative branches in turn - the

6303

matching won't pass the KET for an assertion. If any one branch matches,

6304

the assertion is true. Lookbehind assertions have an OP_REVERSE item at the

6305

start of each branch to move the current point backwards, so the code at

6306

this level is identical to the lookahead case. */

6307

6308

case OP_ASSERT:

6309

case OP_ASSERTBACK:

6310

6311

{

6312

RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,

6313

match_isgroup);

6314

if (rrc == MATCH_MATCH) break;

6315

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

6316

ecode += GET(ecode, 1);

6317

}

6318

while (*ecode == OP_ALT);

6319

if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);

6320

6321

/* If checking an assertion for a condition, return MATCH_MATCH. */

6322

6323

if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);

6324

6325

/* Continue from after the assertion, updating the offsets high water

6326

mark, since extracts may have been taken during the assertion. */

6327

6328

do ecode += GET(ecode,1); while (*ecode == OP_ALT);

6329

ecode += 1 + LINK_SIZE;

6330

offset_top = md->end_offset_top;

6331

continue;

6332

6333

/* Negative assertion: all branches must fail to match */

6334

6335

case OP_ASSERT_NOT:

6336

case OP_ASSERTBACK_NOT:

6337

6338

{

6339

RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL,

6340

match_isgroup);

6341

if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);

6342

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

6343

ecode += GET(ecode,1);

6344

}

6345

while (*ecode == OP_ALT);

6346

6347

if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH);

6348

6349

ecode += 1 + LINK_SIZE;

6350

continue;

6351

6352

/* Move the subject pointer back. This occurs only at the start of

6353

each branch of a lookbehind assertion. If we are too close to the start to

6354

move back, this match function fails. When working with UTF-8 we move

6355

back a number of characters, not bytes. */

6356

6357

case OP_REVERSE:

6358

#ifdef SUPPORT_UTF8

6359

if (md->utf8)

6360

{

6361

c = GET(ecode,1);

6362

for (i = 0; i < c; i++)

6363

{

6364

eptr--;

6365

if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);

6366

BACKCHAR(eptr)

6367

}

6368

}

6369

else

6370

#endif

6371

6372

/* No UTF-8 support, or not in UTF-8 mode: count is byte count */

6373

6374

{

6375

eptr -= GET(ecode,1);

6376

if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);

6377

}

6378

6379

/* Skip to next op code */

6380

6381

ecode += 1 + LINK_SIZE;

6382

break;

6383

6384

/* The callout item calls an external function, if one is provided, passing

6385

details of the match so far. This is mainly for debugging, though the

6386

function is able to force a failure. */

6387

6388

case OP_CALLOUT:

6389

if (pcre_callout != NULL)

6390

{

6391

pcre_callout_block cb;

6392

cb.version = 1; /* Version 1 of the callout block */

6393

cb.callout_number = ecode[1];

6394

cb.offset_vector = md->offset_vector;

6395

cb.subject = (const char *)md->start_subject;

6396

cb.subject_length = md->end_subject - md->start_subject;

6397

cb.start_match = md->start_match - md->start_subject;

6398

cb.current_position = eptr - md->start_subject;

6399

cb.pattern_position = GET(ecode, 2);

6400

cb.next_item_length = GET(ecode, 2 + LINK_SIZE);

6401

cb.capture_top = offset_top/2;

6402

cb.capture_last = md->capture_last;

6403

cb.callout_data = md->callout_data;

6404

if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);

6405

if (rrc < 0) RRETURN(rrc);

6406

}

6407

ecode += 2 + 2*LINK_SIZE;

6408

break;

6409

6410

/* Recursion either matches the current regex, or some subexpression. The

6411

offset data is the offset to the starting bracket from the start of the

6412

whole pattern. (This is so that it works from duplicated subpatterns.)

6413

6414

If there are any capturing brackets started but not finished, we have to

6415

save their starting points and reinstate them after the recursion. However,

6416

we don't know how many such there are (offset_top records the completed

6417

total) so we just have to save all the potential data. There may be up to

6418

65535 such values, which is too large to put on the stack, but using malloc

6419

for small numbers seems expensive. As a compromise, the stack is used when

6420

there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc

6421

is used. A problem is what to do if the malloc fails ... there is no way of

6422

returning to the top level with an error. Save the top REC_STACK_SAVE_MAX

6423

values on the stack, and accept that the rest may be wrong.

6424

6425

There are also other values that have to be saved. We use a chained

6426

sequence of blocks that actually live on the stack. Thanks to Robin Houston

6427

for the original version of this logic. */

6428

6429

case OP_RECURSE:

6430

{

6431

callpat = md->start_code + GET(ecode, 1);

6432

new_recursive.group_num = *callpat - OP_BRA;

6433

6434

/* For extended extraction brackets (large number), we have to fish out

6435

the number from a dummy opcode at the start. */

6436

6437

if (new_recursive.group_num > EXTRACT_BASIC_MAX)

6438

new_recursive.group_num = GET2(callpat, 2+LINK_SIZE);

6439

6440

/* Add to "recursing stack" */

6441

6442

new_recursive.prevrec = md->recursive;

6443

md->recursive = &new_recursive;

6444

6445

/* Find where to continue from afterwards */

6446

6447

ecode += 1 + LINK_SIZE;

6448

new_recursive.after_call = ecode;

6449

6450

/* Now save the offset data. */

6451

6452

new_recursive.saved_max = md->offset_end;

6453

if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)

6454

new_recursive.offset_save = stacksave;

6455

else

6456

{

6457

new_recursive.offset_save =

6458

(int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int));

6459

if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);

6460

}

6461

6462

memcpy(new_recursive.offset_save, md->offset_vector,

6463

new_recursive.saved_max * sizeof(int));

6464

new_recursive.save_start = md->start_match;

6465

md->start_match = eptr;

6466

6467

/* OK, now we can do the recursion. For each top-level alternative we

6468

restore the offset and recursion data. */

6469

6470

DPRINTF(("Recursing into group %d\n", new_recursive.group_num));

6471

6472

{

6473

RMATCH(rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims,

6474

eptrb, match_isgroup);

6475

if (rrc == MATCH_MATCH)

6476

{

6477

md->recursive = new_recursive.prevrec;

6478

if (new_recursive.offset_save != stacksave)

6479

(pcre_free)(new_recursive.offset_save);

6480

RRETURN(MATCH_MATCH);

6481

}

6482

else if (rrc != MATCH_NOMATCH) RRETURN(rrc);

6483

6484

md->recursive = &new_recursive;

6485

memcpy(md->offset_vector, new_recursive.offset_save,

6486

new_recursive.saved_max * sizeof(int));

6487

callpat += GET(callpat, 1);

6488

}

6489

while (*callpat == OP_ALT);

6490

6491

DPRINTF(("Recursion didn't match\n"));

6492

md->recursive = new_recursive.prevrec;

6493

if (new_recursive.offset_save != stacksave)

6494

(pcre_free)(new_recursive.offset_save);

6495

RRETURN(MATCH_NOMATCH);

6496

}

6497

/* Control never reaches here */

6498

6499

/* "Once" brackets are like assertion brackets except that after a match,

6500

the point in the subject string is not moved back. Thus there can never be

6501

a move back into the brackets. Friedl calls these "atomic" subpatterns.

6502

Check the alternative branches in turn - the matching won't pass the KET

6503

for this kind of subpattern. If any one branch matches, we carry on as at

6504

the end of a normal bracket, leaving the subject pointer. */

6505

6506

case OP_ONCE:

6507

{

6508

prev = ecode;

6509

saved_eptr = eptr;

6510

6511

6512

{

6513

RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims,

6514

eptrb, match_isgroup);

6515

if (rrc == MATCH_MATCH) break;

6516

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

6517

ecode += GET(ecode,1);

6518

}

6519

while (*ecode == OP_ALT);

6520

6521

/* If hit the end of the group (which could be repeated), fail */

6522

6523

if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);

6524

6525

/* Continue as from after the assertion, updating the offsets high water

6526

mark, since extracts may have been taken. */

6527

6528

do ecode += GET(ecode,1); while (*ecode == OP_ALT);

6529

6530

offset_top = md->end_offset_top;

6531

eptr = md->end_match_ptr;

6532

6533

/* For a non-repeating ket, just continue at this level. This also

6534

happens for a repeating ket if no characters were matched in the group.

6535

This is the forcible breaking of infinite loops as implemented in Perl

6536

5.005. If there is an options reset, it will get obeyed in the normal

6537

course of events. */

6538

6539

if (*ecode == OP_KET || eptr == saved_eptr)

6540

{

6541

ecode += 1+LINK_SIZE;

6542

break;

6543

}

6544

6545

/* The repeating kets try the rest of the pattern or restart from the

6546

preceding bracket, in the appropriate order. We need to reset any options

6547

that changed within the bracket before re-running it, so check the next

6548

opcode. */

6549

6550

if (ecode[1+LINK_SIZE] == OP_OPT)

6551

{

6552

ims = (ims & ~PCRE_IMS) | ecode[4];

6553

DPRINTF(("ims set to %02lx at group repeat\n", ims));

6554

}

6555

6556

if (*ecode == OP_KETRMIN)

6557

{

6558

RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0);

6559

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

6560

RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);

6561

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

6562

}

6563

else /* OP_KETRMAX */

6564

{

6565

RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);

6566

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

6567

RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);

6568

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

6569

}

6570

}

6571

RRETURN(MATCH_NOMATCH);

6572

6573

/* An alternation is the end of a branch; scan along to find the end of the

6574

bracketed group and go to there. */

6575

6576

case OP_ALT:

6577

do ecode += GET(ecode,1); while (*ecode == OP_ALT);

6578

break;

6579

6580

/* BRAZERO and BRAMINZERO occur just before a bracket group, indicating

6581

that it may occur zero times. It may repeat infinitely, or not at all -

6582

i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper

6583

repeat limits are compiled as a number of copies, with the optional ones

6584

preceded by BRAZERO or BRAMINZERO. */

6585

6586

case OP_BRAZERO:

6587

{

6588

next = ecode+1;

6589

RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup);

6590

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

6591

do next += GET(next,1); while (*next == OP_ALT);

6592

ecode = next + 1+LINK_SIZE;

6593

}

6594

break;

6595

6596

case OP_BRAMINZERO:

6597

{

6598

next = ecode+1;

6599

do next += GET(next,1); while (*next == OP_ALT);

6600

RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb,

6601

match_isgroup);

6602

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

6603

ecode++;

6604

}

6605

break;

6606

6607

/* End of a group, repeated or non-repeating. If we are at the end of

6608

an assertion "group", stop matching and return MATCH_MATCH, but record the

6609

current high water mark for use by positive assertions. Do this also

6610

for the "once" (not-backup up) groups. */

6611

6612

case OP_KET:

6613

case OP_KETRMIN:

6614

case OP_KETRMAX:

6615

{

6616

prev = ecode - GET(ecode, 1);

6617

saved_eptr = eptrb->epb_saved_eptr;

6618

6619

/* Back up the stack of bracket start pointers. */

6620

6621

eptrb = eptrb->epb_prev;

6622

6623

if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||

6624

*prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||

6625

*prev == OP_ONCE)

6626

{

6627

md->end_match_ptr = eptr; /* For ONCE */

6628

md->end_offset_top = offset_top;

6629

RRETURN(MATCH_MATCH);

6630

}

6631

6632

/* In all other cases except a conditional group we have to check the

6633

group number back at the start and if necessary complete handling an

6634

extraction by setting the offsets and bumping the high water mark. */

6635

6636

if (*prev != OP_COND)

6637

{

6638

number = *prev - OP_BRA;

6639

6640

/* For extended extraction brackets (large number), we have to fish out

6641

the number from a dummy opcode at the start. */

6642

6643

if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE);

6644

offset = number << 1;

6645

6646

#ifdef DEBUG

6647

printf("end bracket %d", number);

6648

printf("\n");

6649

#endif

6650

6651

/* Test for a numbered group. This includes groups called as a result

6652

of recursion. Note that whole-pattern recursion is coded as a recurse

6653

into group 0, so it won't be picked up here. Instead, we catch it when

6654

the OP_END is reached. */

6655

6656

if (number > 0)

6657

{

6658

md->capture_last = number;

6659

if (offset >= md->offset_max) md->offset_overflow = TRUE; else

6660

{

6661

md->offset_vector[offset] =

6662

md->offset_vector[md->offset_end - number];

6663

md->offset_vector[offset+1] = eptr - md->start_subject;

6664

if (offset_top <= offset) offset_top = offset + 2;

6665

}

6666

6667

/* Handle a recursively called group. Restore the offsets

6668

appropriately and continue from after the call. */

6669

6670

if (md->recursive != NULL && md->recursive->group_num == number)

6671

{

6672

recursion_info *rec = md->recursive;

6673

DPRINTF(("Recursion (%d) succeeded - continuing\n", number));

6674

md->recursive = rec->prevrec;

6675

md->start_match = rec->save_start;

6676

memcpy(md->offset_vector, rec->offset_save,

6677

rec->saved_max * sizeof(int));

6678

ecode = rec->after_call;

6679

ims = original_ims;

6680

break;

6681

}

6682

}

6683

}

6684

6685

/* Reset the value of the ims flags, in case they got changed during

6686

the group. */

6687

6688

ims = original_ims;

6689

DPRINTF(("ims reset to %02lx\n", ims));

6690

6691

/* For a non-repeating ket, just continue at this level. This also

6692

happens for a repeating ket if no characters were matched in the group.

6693

This is the forcible breaking of infinite loops as implemented in Perl

6694

5.005. If there is an options reset, it will get obeyed in the normal

6695

course of events. */

6696

6697

if (*ecode == OP_KET || eptr == saved_eptr)

6698

{

6699

ecode += 1 + LINK_SIZE;

6700

break;

6701

}

6702

6703

/* The repeating kets try the rest of the pattern or restart from the

6704

preceding bracket, in the appropriate order. */

6705

6706

if (*ecode == OP_KETRMIN)

6707

{

6708

RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);

6709

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

6710

RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);

6711

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

6712

}

6713

else /* OP_KETRMAX */

6714

{

6715

RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup);

6716

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

6717

RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0);

6718

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

6719

}

6720

}

6721

6722

RRETURN(MATCH_NOMATCH);

6723

6724

/* Start of subject unless notbol, or after internal newline if multiline */

6725

6726

case OP_CIRC:

6727

if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);

6728

if ((ims & PCRE_MULTILINE) != 0)

6729

{

6730

if (eptr != md->start_subject && eptr[-1] != NEWLINE)

6731

RRETURN(MATCH_NOMATCH);

6732

ecode++;

6733

break;

6734

}

6735

/* ... else fall through */

6736

6737

/* Start of subject assertion */

6738

6739

case OP_SOD:

6740

if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);

6741

ecode++;

6742

break;

6743

6744

/* Start of match assertion */

6745

6746

case OP_SOM:

6747

if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);

6748

ecode++;

6749

break;

6750

6751

/* Assert before internal newline if multiline, or before a terminating

6752

newline unless endonly is set, else end of subject unless noteol is set. */

6753

6754

case OP_DOLL:

6755

if ((ims & PCRE_MULTILINE) != 0)

6756

{

6757

if (eptr < md->end_subject)

6758

{ if (*eptr != NEWLINE) RRETURN(MATCH_NOMATCH); }

6759

else

6760

{ if (md->noteol) RRETURN(MATCH_NOMATCH); }

6761

ecode++;

6762

break;

6763

}

6764

else

6765

{

6766

if (md->noteol) RRETURN(MATCH_NOMATCH);

6767

if (!md->endonly)

6768

{

6769

if (eptr < md->end_subject - 1 ||

6770

(eptr == md->end_subject - 1 && *eptr != NEWLINE))

6771

RRETURN(MATCH_NOMATCH);

6772

ecode++;

6773

break;

6774

}

6775

}

6776

/* ... else fall through */

6777

6778

/* End of subject assertion (\z) */

6779

6780

case OP_EOD:

6781

if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);

6782

ecode++;

6783

break;

6784

6785

/* End of subject or ending \n assertion (\Z) */

6786

6787

case OP_EODN:

6788

if (eptr < md->end_subject - 1 ||

6789

(eptr == md->end_subject - 1 && *eptr != NEWLINE)) RRETURN(MATCH_NOMATCH);

6790

ecode++;

6791

break;

6792

6793

/* Word boundary assertions */

6794

6795

case OP_NOT_WORD_BOUNDARY:

6796

case OP_WORD_BOUNDARY:

6797

{

6798

6799

/* Find out if the previous and current characters are "word" characters.

6800

It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to

6801

be "non-word" characters. */

6802

6803

#ifdef SUPPORT_UTF8

6804

if (md->utf8)

6805

{

6806

if (eptr == md->start_subject) prev_is_word = FALSE; else

6807

{

6808

const uschar *lastptr = eptr - 1;

6809

while((*lastptr & 0xc0) == 0x80) lastptr--;

6810

GETCHAR(c, lastptr);

6811

prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;

6812

}

6813

if (eptr >= md->end_subject) cur_is_word = FALSE; else

6814

{

6815

GETCHAR(c, eptr);

6816

cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;

6817

}

6818

}

6819

else

6820

#endif

6821

6822

/* More streamlined when not in UTF-8 mode */

6823

6824

{

6825

prev_is_word = (eptr != md->start_subject) &&

6826

((md->ctypes[eptr[-1]] & ctype_word) != 0);

6827

cur_is_word = (eptr < md->end_subject) &&

6828

((md->ctypes[*eptr] & ctype_word) != 0);

6829

}

6830

6831

/* Now see if the situation is what we want */

6832

6833

if ((*ecode++ == OP_WORD_BOUNDARY)?

6834

cur_is_word == prev_is_word : cur_is_word != prev_is_word)

6835

RRETURN(MATCH_NOMATCH);

6836

}

6837

break;

6838

6839

/* Match a single character type; inline for speed */

6840

6841

case OP_ANY:

6842

if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE)

6843

RRETURN(MATCH_NOMATCH);

6844

if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);

6845

#ifdef SUPPORT_UTF8

6846

if (md->utf8)

6847

while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;

6848

#endif

6849

ecode++;

6850

break;

6851

6852

/* Match a single byte, even in UTF-8 mode. This opcode really does match

6853

any byte, even newline, independent of the setting of PCRE_DOTALL. */

6854

6855

case OP_ANYBYTE:

6856

if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);

6857

ecode++;

6858

break;

6859

6860

case OP_NOT_DIGIT:

6861

if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

6862

GETCHARINCTEST(c, eptr);

6863

if (

6864

#ifdef SUPPORT_UTF8

6865

c < 256 &&

6866

#endif

6867

(md->ctypes[c] & ctype_digit) != 0

6868

)

6869

RRETURN(MATCH_NOMATCH);

6870

ecode++;

6871

break;

6872

6873

case OP_DIGIT:

6874

if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

6875

GETCHARINCTEST(c, eptr);

6876

if (

6877

#ifdef SUPPORT_UTF8

6878

c >= 256 ||

6879

#endif

6880

(md->ctypes[c] & ctype_digit) == 0

6881

)

6882

RRETURN(MATCH_NOMATCH);

6883

ecode++;

6884

break;

6885

6886

case OP_NOT_WHITESPACE:

6887

if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

6888

GETCHARINCTEST(c, eptr);

6889

if (

6890

#ifdef SUPPORT_UTF8

6891

c < 256 &&

6892

#endif

6893

(md->ctypes[c] & ctype_space) != 0

6894

)

6895

RRETURN(MATCH_NOMATCH);

6896

ecode++;

6897

break;

6898

6899

case OP_WHITESPACE:

6900

if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

6901

GETCHARINCTEST(c, eptr);

6902

if (

6903

#ifdef SUPPORT_UTF8

6904

c >= 256 ||

6905

#endif

6906

(md->ctypes[c] & ctype_space) == 0

6907

)

6908

RRETURN(MATCH_NOMATCH);

6909

ecode++;

6910

break;

6911

6912

case OP_NOT_WORDCHAR:

6913

if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

6914

GETCHARINCTEST(c, eptr);

6915

if (

6916

#ifdef SUPPORT_UTF8

6917

c < 256 &&

6918

#endif

6919

(md->ctypes[c] & ctype_word) != 0

6920

)

6921

RRETURN(MATCH_NOMATCH);

6922

ecode++;

6923

break;

6924

6925

case OP_WORDCHAR:

6926

if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

6927

GETCHARINCTEST(c, eptr);

6928

if (

6929

#ifdef SUPPORT_UTF8

6930

c >= 256 ||

6931

#endif

6932

(md->ctypes[c] & ctype_word) == 0

6933

)

6934

RRETURN(MATCH_NOMATCH);

6935

ecode++;

6936

break;

6937

6938

#ifdef SUPPORT_UCP

6939

/* Check the next character by Unicode property. We will get here only

6940

if the support is in the binary; otherwise a compile-time error occurs. */

6941

6942

case OP_PROP:

6943

case OP_NOTPROP:

6944

if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

6945

GETCHARINCTEST(c, eptr);

6946

{

6947

int chartype, rqdtype;

6948

int othercase;

6949

int category = ucp_findchar(c, &chartype, &othercase);

6950

6951

rqdtype = *(++ecode);

6952

ecode++;

6953

6954

if (rqdtype >= 128)

6955

{

6956

if ((rqdtype - 128 != category) == (op == OP_PROP))

6957

RRETURN(MATCH_NOMATCH);

6958

}

6959

else

6960

{

6961

if ((rqdtype != chartype) == (op == OP_PROP))

6962

RRETURN(MATCH_NOMATCH);

6963

}

6964

}

6965

break;

6966

6967

/* Match an extended Unicode sequence. We will get here only if the support

6968

is in the binary; otherwise a compile-time error occurs. */

6969

6970

case OP_EXTUNI:

6971

if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

6972

GETCHARINCTEST(c, eptr);

6973

{

6974

int chartype;

6975

int othercase;

6976

int category = ucp_findchar(c, &chartype, &othercase);

6977

if (category == ucp_M) RRETURN(MATCH_NOMATCH);

6978

while (eptr < md->end_subject)

6979

{

6980

int len = 1;

6981

if (!md->utf8) c = *eptr; else

6982

{

6983

GETCHARLEN(c, eptr, len);

6984

}

6985

category = ucp_findchar(c, &chartype, &othercase);

6986

if (category != ucp_M) break;

6987

eptr += len;

6988

}

6989

}

6990

ecode++;

6991

break;

6992

#endif

6993

6994

6995

/* Match a back reference, possibly repeatedly. Look past the end of the

6996

item to see if there is repeat information following. The code is similar

6997

to that for character classes, but repeated for efficiency. Then obey

6998

similar code to character type repeats - written out again for speed.

6999

However, if the referenced string is the empty string, always treat

7000

it as matched, any number of times (otherwise there could be infinite

7001

loops). */

7002

7003

case OP_REF:

7004

{

7005

offset = GET2(ecode, 1) << 1; /* Doubled ref number */

7006

ecode += 3; /* Advance past item */

7007

7008

/* If the reference is unset, set the length to be longer than the amount

7009

of subject left; this ensures that every attempt at a match fails. We

7010

can't just fail here, because of the possibility of quantifiers with zero

7011

minima. */

7012

7013

length = (offset >= offset_top || md->offset_vector[offset] < 0)?

7014

md->end_subject - eptr + 1 :

7015

md->offset_vector[offset+1] - md->offset_vector[offset];

7016

7017

/* Set up for repetition, or handle the non-repeated case */

7018

7019

switch (*ecode)

7020

{

7021

case OP_CRSTAR:

7022

case OP_CRMINSTAR:

7023

case OP_CRPLUS:

7024

case OP_CRMINPLUS:

7025

case OP_CRQUERY:

7026

case OP_CRMINQUERY:

7027

c = *ecode++ - OP_CRSTAR;

7028

minimize = (c & 1) != 0;

7029

min = rep_min[c]; /* Pick up values from tables; */

7030

max = rep_max[c]; /* zero for max => infinity */

7031

if (max == 0) max = INT_MAX;

7032

break;

7033

7034

case OP_CRRANGE:

7035

case OP_CRMINRANGE:

7036

minimize = (*ecode == OP_CRMINRANGE);

7037

min = GET2(ecode, 1);

7038

max = GET2(ecode, 3);

7039

if (max == 0) max = INT_MAX;

7040

ecode += 5;

7041

break;

7042

7043

default: /* No repeat follows */

7044

if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);

7045

eptr += length;

7046

continue; /* With the main loop */

7047

}

7048

7049

/* If the length of the reference is zero, just continue with the

7050

main loop. */

7051

7052

if (length == 0) continue;

7053

7054

/* First, ensure the minimum number of matches are present. We get back

7055

the length of the reference string explicitly rather than passing the

7056

address of eptr, so that eptr can be a register variable. */

7057

7058

for (i = 1; i <= min; i++)

7059

{

7060

if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);

7061

eptr += length;

7062

}

7063

7064

/* If min = max, continue at the same level without recursion.

7065

They are not both allowed to be zero. */

7066

7067

if (min == max) continue;

7068

7069

/* If minimizing, keep trying and advancing the pointer */

7070

7071

if (minimize)

7072

{

7073

for (fi = min;; fi++)

7074

{

7075

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7076

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7077

if (fi >= max || !match_ref(offset, eptr, length, md, ims))

7078

RRETURN(MATCH_NOMATCH);

7079

eptr += length;

7080

}

7081

/* Control never gets here */

7082

}

7083

7084

/* If maximizing, find the longest string and work backwards */

7085

7086

else

7087

{

7088

pp = eptr;

7089

for (i = min; i < max; i++)

7090

{

7091

if (!match_ref(offset, eptr, length, md, ims)) break;

7092

eptr += length;

7093

}

7094

while (eptr >= pp)

7095

{

7096

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7097

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7098

eptr -= length;

7099

}

7100

RRETURN(MATCH_NOMATCH);

7101

}

7102

}

7103

/* Control never gets here */

7104

7105

7106

7107

/* Match a bit-mapped character class, possibly repeatedly. This op code is

7108

used when all the characters in the class have values in the range 0-255,

7109

and either the matching is caseful, or the characters are in the range

7110

0-127 when UTF-8 processing is enabled. The only difference between

7111

OP_CLASS and OP_NCLASS occurs when a data character outside the range is

7112

encountered.

7113

7114

First, look past the end of the item to see if there is repeat information

7115

following. Then obey similar code to character type repeats - written out

7116

again for speed. */

7117

7118

case OP_NCLASS:

7119

case OP_CLASS:

7120

{

7121

data = ecode + 1; /* Save for matching */

7122

ecode += 33; /* Advance past the item */

7123

7124

switch (*ecode)

7125

{

7126

case OP_CRSTAR:

7127

case OP_CRMINSTAR:

7128

case OP_CRPLUS:

7129

case OP_CRMINPLUS:

7130

case OP_CRQUERY:

7131

case OP_CRMINQUERY:

7132

c = *ecode++ - OP_CRSTAR;

7133

minimize = (c & 1) != 0;

7134

min = rep_min[c]; /* Pick up values from tables; */

7135

max = rep_max[c]; /* zero for max => infinity */

7136

if (max == 0) max = INT_MAX;

7137

break;

7138

7139

case OP_CRRANGE:

7140

case OP_CRMINRANGE:

7141

minimize = (*ecode == OP_CRMINRANGE);

7142

min = GET2(ecode, 1);

7143

max = GET2(ecode, 3);

7144

if (max == 0) max = INT_MAX;

7145

ecode += 5;

7146

break;

7147

7148

default: /* No repeat follows */

7149

min = max = 1;

7150

break;

7151

}

7152

7153

/* First, ensure the minimum number of matches are present. */

7154

7155

#ifdef SUPPORT_UTF8

7156

/* UTF-8 mode */

7157

if (md->utf8)

7158

{

7159

for (i = 1; i <= min; i++)

7160

{

7161

if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

7162

GETCHARINC(c, eptr);

7163

if (c > 255)

7164

{

7165

if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);

7166

}

7167

else

7168

{

7169

if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);

7170

}

7171

}

7172

}

7173

else

7174

#endif

7175

/* Not UTF-8 mode */

7176

{

7177

for (i = 1; i <= min; i++)

7178

{

7179

if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

7180

c = *eptr++;

7181

if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);

7182

}

7183

}

7184

7185

/* If max == min we can continue with the main loop without the

7186

need to recurse. */

7187

7188

if (min == max) continue;

7189

7190

/* If minimizing, keep testing the rest of the expression and advancing

7191

the pointer while it matches the class. */

7192

7193

if (minimize)

7194

{

7195

#ifdef SUPPORT_UTF8

7196

/* UTF-8 mode */

7197

if (md->utf8)

7198

{

7199

for (fi = min;; fi++)

7200

{

7201

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7202

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7203

if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

7204

GETCHARINC(c, eptr);

7205

if (c > 255)

7206

{

7207

if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);

7208

}

7209

else

7210

{

7211

if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);

7212

}

7213

}

7214

}

7215

else

7216

#endif

7217

/* Not UTF-8 mode */

7218

{

7219

for (fi = min;; fi++)

7220

{

7221

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7222

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7223

if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

7224

c = *eptr++;

7225

if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);

7226

}

7227

}

7228

/* Control never gets here */

7229

}

7230

7231

/* If maximizing, find the longest possible run, then work backwards. */

7232

7233

else

7234

{

7235

pp = eptr;

7236

7237

#ifdef SUPPORT_UTF8

7238

/* UTF-8 mode */

7239

if (md->utf8)

7240

{

7241

for (i = min; i < max; i++)

7242

{

7243

int len = 1;

7244

if (eptr >= md->end_subject) break;

7245

GETCHARLEN(c, eptr, len);

7246

if (c > 255)

7247

{

7248

if (op == OP_CLASS) break;

7249

}

7250

else

7251

{

7252

if ((data[c/8] & (1 << (c&7))) == 0) break;

7253

}

7254

eptr += len;

7255

}

7256

for (;;)

7257

{

7258

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7259

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7260

if (eptr-- == pp) break; /* Stop if tried at original pos */

7261

BACKCHAR(eptr);

7262

}

7263

}

7264

else

7265

#endif

7266

/* Not UTF-8 mode */

7267

{

7268

for (i = min; i < max; i++)

7269

{

7270

if (eptr >= md->end_subject) break;

7271

c = *eptr;

7272

if ((data[c/8] & (1 << (c&7))) == 0) break;

7273

eptr++;

7274

}

7275

while (eptr >= pp)

7276

{

7277

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7278

eptr--;

7279

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7280

}

7281

}

7282

7283

RRETURN(MATCH_NOMATCH);

7284

}

7285

}

7286

/* Control never gets here */

7287

7288

7289

/* Match an extended character class. This opcode is encountered only

7290

in UTF-8 mode, because that's the only time it is compiled. */

7291

7292

#ifdef SUPPORT_UTF8

7293

case OP_XCLASS:

7294

{

7295

data = ecode + 1 + LINK_SIZE; /* Save for matching */

7296

ecode += GET(ecode, 1); /* Advance past the item */

7297

7298

switch (*ecode)

7299

{

7300

case OP_CRSTAR:

7301

case OP_CRMINSTAR:

7302

case OP_CRPLUS:

7303

case OP_CRMINPLUS:

7304

case OP_CRQUERY:

7305

case OP_CRMINQUERY:

7306

c = *ecode++ - OP_CRSTAR;

7307

minimize = (c & 1) != 0;

7308

min = rep_min[c]; /* Pick up values from tables; */

7309

max = rep_max[c]; /* zero for max => infinity */

7310

if (max == 0) max = INT_MAX;

7311

break;

7312

7313

case OP_CRRANGE:

7314

case OP_CRMINRANGE:

7315

minimize = (*ecode == OP_CRMINRANGE);

7316

min = GET2(ecode, 1);

7317

max = GET2(ecode, 3);

7318

if (max == 0) max = INT_MAX;

7319

ecode += 5;

7320

break;

7321

7322

default: /* No repeat follows */

7323

min = max = 1;

7324

break;

7325

}

7326

7327

/* First, ensure the minimum number of matches are present. */

7328

7329

for (i = 1; i <= min; i++)

7330

{

7331

if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

7332

GETCHARINC(c, eptr);

7333

if (!match_xclass(c, data)) RRETURN(MATCH_NOMATCH);

7334

}

7335

7336

/* If max == min we can continue with the main loop without the

7337

need to recurse. */

7338

7339

if (min == max) continue;

7340

7341

/* If minimizing, keep testing the rest of the expression and advancing

7342

the pointer while it matches the class. */

7343

7344

if (minimize)

7345

{

7346

for (fi = min;; fi++)

7347

{

7348

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7349

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7350

if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

7351

GETCHARINC(c, eptr);

7352

if (!match_xclass(c, data)) RRETURN(MATCH_NOMATCH);

7353

}

7354

/* Control never gets here */

7355

}

7356

7357

/* If maximizing, find the longest possible run, then work backwards. */

7358

7359

else

7360

{

7361

pp = eptr;

7362

for (i = min; i < max; i++)

7363

{

7364

int len = 1;

7365

if (eptr >= md->end_subject) break;

7366

GETCHARLEN(c, eptr, len);

7367

if (!match_xclass(c, data)) break;

7368

eptr += len;

7369

}

7370

for(;;)

7371

{

7372

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7373

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7374

if (eptr-- == pp) break; /* Stop if tried at original pos */

7375

BACKCHAR(eptr)

7376

}

7377

RRETURN(MATCH_NOMATCH);

7378

}

7379

7380

/* Control never gets here */

7381

}

7382

#endif /* End of XCLASS */

7383

7384

/* Match a single character, casefully */

7385

7386

case OP_CHAR:

7387

#ifdef SUPPORT_UTF8

7388

if (md->utf8)

7389

{

7390

length = 1;

7391

ecode++;

7392

GETCHARLEN(fc, ecode, length);

7393

if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);

7394

while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);

7395

}

7396

else

7397

#endif

7398

7399

/* Non-UTF-8 mode */

7400

{

7401

if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);

7402

if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);

7403

ecode += 2;

7404

}

7405

break;

7406

7407

/* Match a single character, caselessly */

7408

7409

case OP_CHARNC:

7410

#ifdef SUPPORT_UTF8

7411

if (md->utf8)

7412

{

7413

length = 1;

7414

ecode++;

7415

GETCHARLEN(fc, ecode, length);

7416

7417

if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);

7418

7419

/* If the pattern character's value is < 128, we have only one byte, and

7420

can use the fast lookup table. */

7421

7422

if (fc < 128)

7423

{

7424

if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);

7425

}

7426

7427

/* Otherwise we must pick up the subject character */

7428

7429

else

7430

{

7431

int dc;

7432

GETCHARINC(dc, eptr);

7433

ecode += length;

7434

7435

/* If we have Unicode property support, we can use it to test the other

7436

case of the character, if there is one. The result of ucp_findchar() is

7437

< 0 if the char isn't found, and othercase is returned as zero if there

7438

isn't one. */

7439

7440

if (fc != dc)

7441

{

7442

#ifdef SUPPORT_UCP

7443

int chartype;

7444

int othercase;

7445

if (ucp_findchar(fc, &chartype, &othercase) < 0 || dc != othercase)

7446

#endif

7447

RRETURN(MATCH_NOMATCH);

7448

}

7449

}

7450

}

7451

else

7452

#endif /* SUPPORT_UTF8 */

7453

7454

/* Non-UTF-8 mode */

7455

{

7456

if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);

7457

if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);

7458

ecode += 2;

7459

}

7460

break;

7461

7462

/* Match a single character repeatedly; different opcodes share code. */

7463

7464

case OP_EXACT:

7465

min = max = GET2(ecode, 1);

7466

ecode += 3;

7467

goto REPEATCHAR;

7468

7469

case OP_UPTO:

7470

case OP_MINUPTO:

7471

min = 0;

7472

max = GET2(ecode, 1);

7473

minimize = *ecode == OP_MINUPTO;

7474

ecode += 3;

7475

goto REPEATCHAR;

7476

7477

case OP_STAR:

7478

case OP_MINSTAR:

7479

case OP_PLUS:

7480

case OP_MINPLUS:

7481

case OP_QUERY:

7482

case OP_MINQUERY:

7483

c = *ecode++ - OP_STAR;

7484

minimize = (c & 1) != 0;

7485

min = rep_min[c]; /* Pick up values from tables; */

7486

max = rep_max[c]; /* zero for max => infinity */

7487

if (max == 0) max = INT_MAX;

7488

7489

/* Common code for all repeated single-character matches. We can give

7490

up quickly if there are fewer than the minimum number of characters left in

7491

the subject. */

7492

7493

REPEATCHAR:

7494

#ifdef SUPPORT_UTF8

7495

if (md->utf8)

7496

{

7497

length = 1;

7498

charptr = ecode;

7499

GETCHARLEN(fc, ecode, length);

7500

if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);

7501

ecode += length;

7502

7503

/* Handle multibyte character matching specially here. There is

7504

support for caseless matching if UCP support is present. */

7505

7506

if (length > 1)

7507

{

7508

int oclength = 0;

7509

uschar occhars[8];

7510

7511

#ifdef SUPPORT_UCP

7512

int othercase;

7513

int chartype;

7514

if ((ims & PCRE_CASELESS) != 0 &&

7515

ucp_findchar(fc, &chartype, &othercase) >= 0 &&

7516

othercase > 0)

7517

oclength = ord2utf8(othercase, occhars);

7518

#endif /* SUPPORT_UCP */

7519

7520

for (i = 1; i <= min; i++)

7521

{

7522

if (memcmp(eptr, charptr, length) == 0) eptr += length;

7523

/* Need braces because of following else */

7524

else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }

7525

else

7526

{

7527

if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);

7528

eptr += oclength;

7529

}

7530

}

7531

7532

if (min == max) continue;

7533

7534

if (minimize)

7535

{

7536

for (fi = min;; fi++)

7537

{

7538

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7539

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7540

if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

7541

if (memcmp(eptr, charptr, length) == 0) eptr += length;

7542

/* Need braces because of following else */

7543

else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }

7544

else

7545

{

7546

if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);

7547

eptr += oclength;

7548

}

7549

}

7550

/* Control never gets here */

7551

}

7552

else

7553

{

7554

pp = eptr;

7555

for (i = min; i < max; i++)

7556

{

7557

if (eptr > md->end_subject - length) break;

7558

if (memcmp(eptr, charptr, length) == 0) eptr += length;

7559

else if (oclength == 0) break;

7560

else

7561

{

7562

if (memcmp(eptr, occhars, oclength) != 0) break;

7563

eptr += oclength;

7564

}

7565

}

7566

while (eptr >= pp)

7567

{

7568

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7569

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7570

eptr -= length;

7571

}

7572

RRETURN(MATCH_NOMATCH);

7573

}

7574

/* Control never gets here */

7575

}

7576

7577

/* If the length of a UTF-8 character is 1, we fall through here, and

7578

obey the code as for non-UTF-8 characters below, though in this case the

7579

value of fc will always be < 128. */

7580

}

7581

else

7582

#endif /* SUPPORT_UTF8 */

7583

7584

/* When not in UTF-8 mode, load a single-byte character. */

7585

{

7586

if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);

7587

fc = *ecode++;

7588

}

7589

7590

/* The value of fc at this point is always less than 256, though we may or

7591

may not be in UTF-8 mode. The code is duplicated for the caseless and

7592

caseful cases, for speed, since matching characters is likely to be quite

7593

common. First, ensure the minimum number of matches are present. If min =

7594

max, continue at the same level without recursing. Otherwise, if

7595

minimizing, keep trying the rest of the expression and advancing one

7596

matching character if failing, up to the maximum. Alternatively, if

7597

maximizing, find the maximum number of characters and work backwards. */

7598

7599

DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,

7600

max, eptr));

7601

7602

if ((ims & PCRE_CASELESS) != 0)

7603

{

7604

fc = md->lcc[fc];

7605

for (i = 1; i <= min; i++)

7606

if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);

7607

if (min == max) continue;

7608

if (minimize)

7609

{

7610

for (fi = min;; fi++)

7611

{

7612

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7613

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7614

if (fi >= max || eptr >= md->end_subject ||

7615

fc != md->lcc[*eptr++])

7616

RRETURN(MATCH_NOMATCH);

7617

}

7618

/* Control never gets here */

7619

}

7620

else

7621

{

7622

pp = eptr;

7623

for (i = min; i < max; i++)

7624

{

7625

if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;

7626

eptr++;

7627

}

7628

while (eptr >= pp)

7629

{

7630

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7631

eptr--;

7632

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7633

}

7634

RRETURN(MATCH_NOMATCH);

7635

}

7636

/* Control never gets here */

7637

}

7638

7639

/* Caseful comparisons (includes all multi-byte characters) */

7640

7641

else

7642

{

7643

for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);

7644

if (min == max) continue;

7645

if (minimize)

7646

{

7647

for (fi = min;; fi++)

7648

{

7649

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7650

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7651

if (fi >= max || eptr >= md->end_subject || fc != *eptr++)

7652

RRETURN(MATCH_NOMATCH);

7653

}

7654

/* Control never gets here */

7655

}

7656

else

7657

{

7658

pp = eptr;

7659

for (i = min; i < max; i++)

7660

{

7661

if (eptr >= md->end_subject || fc != *eptr) break;

7662

eptr++;

7663

}

7664

while (eptr >= pp)

7665

{

7666

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7667

eptr--;

7668

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7669

}

7670

RRETURN(MATCH_NOMATCH);

7671

}

7672

}

7673

/* Control never gets here */

7674

7675

/* Match a negated single one-byte character. The character we are

7676

checking can be multibyte. */

7677

7678

case OP_NOT:

7679

if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

7680

ecode++;

7681

GETCHARINCTEST(c, eptr);

7682

if ((ims & PCRE_CASELESS) != 0)

7683

{

7684

#ifdef SUPPORT_UTF8

7685

if (c < 256)

7686

#endif

7687

c = md->lcc[c];

7688

if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH);

7689

}

7690

else

7691

{

7692

if (*ecode++ == c) RRETURN(MATCH_NOMATCH);

7693

}

7694

break;

7695

7696

/* Match a negated single one-byte character repeatedly. This is almost a

7697

repeat of the code for a repeated single character, but I haven't found a

7698

nice way of commoning these up that doesn't require a test of the

7699

positive/negative option for each character match. Maybe that wouldn't add

7700

very much to the time taken, but character matching *is* what this is all

7701

about... */

7702

7703

case OP_NOTEXACT:

7704

min = max = GET2(ecode, 1);

7705

ecode += 3;

7706

goto REPEATNOTCHAR;

7707

7708

case OP_NOTUPTO:

7709

case OP_NOTMINUPTO:

7710

min = 0;

7711

max = GET2(ecode, 1);

7712

minimize = *ecode == OP_NOTMINUPTO;

7713

ecode += 3;

7714

goto REPEATNOTCHAR;

7715

7716

case OP_NOTSTAR:

7717

case OP_NOTMINSTAR:

7718

case OP_NOTPLUS:

7719

case OP_NOTMINPLUS:

7720

case OP_NOTQUERY:

7721

case OP_NOTMINQUERY:

7722

c = *ecode++ - OP_NOTSTAR;

7723

minimize = (c & 1) != 0;

7724

min = rep_min[c]; /* Pick up values from tables; */

7725

max = rep_max[c]; /* zero for max => infinity */

7726

if (max == 0) max = INT_MAX;

7727

7728

/* Common code for all repeated single-byte matches. We can give up quickly

7729

if there are fewer than the minimum number of bytes left in the

7730

subject. */

7731

7732

REPEATNOTCHAR:

7733

if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);

7734

fc = *ecode++;

7735

7736

/* The code is duplicated for the caseless and caseful cases, for speed,

7737

since matching characters is likely to be quite common. First, ensure the

7738

minimum number of matches are present. If min = max, continue at the same

7739

level without recursing. Otherwise, if minimizing, keep trying the rest of

7740

the expression and advancing one matching character if failing, up to the

7741

maximum. Alternatively, if maximizing, find the maximum number of

7742

characters and work backwards. */

7743

7744

DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,

7745

max, eptr));

7746

7747

if ((ims & PCRE_CASELESS) != 0)

7748

{

7749

fc = md->lcc[fc];

7750

7751

#ifdef SUPPORT_UTF8

7752

/* UTF-8 mode */

7753

if (md->utf8)

7754

{

7755

7756

for (i = 1; i <= min; i++)

7757

{

7758

GETCHARINC(d, eptr);

7759

if (d < 256) d = md->lcc[d];

7760

if (fc == d) RRETURN(MATCH_NOMATCH);

7761

}

7762

}

7763

else

7764

#endif

7765

7766

/* Not UTF-8 mode */

7767

{

7768

for (i = 1; i <= min; i++)

7769

if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);

7770

}

7771

7772

if (min == max) continue;

7773

7774

if (minimize)

7775

{

7776

#ifdef SUPPORT_UTF8

7777

/* UTF-8 mode */

7778

if (md->utf8)

7779

{

7780

7781

for (fi = min;; fi++)

7782

{

7783

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7784

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7785

GETCHARINC(d, eptr);

7786

if (d < 256) d = md->lcc[d];

7787

if (fi >= max || eptr >= md->end_subject || fc == d)

7788

RRETURN(MATCH_NOMATCH);

7789

}

7790

}

7791

else

7792

#endif

7793

/* Not UTF-8 mode */

7794

{

7795

for (fi = min;; fi++)

7796

{

7797

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7798

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7799

if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])

7800

RRETURN(MATCH_NOMATCH);

7801

}

7802

}

7803

/* Control never gets here */

7804

}

7805

7806

/* Maximize case */

7807

7808

else

7809

{

7810

pp = eptr;

7811

7812

#ifdef SUPPORT_UTF8

7813

/* UTF-8 mode */

7814

if (md->utf8)

7815

{

7816

7817

for (i = min; i < max; i++)

7818

{

7819

int len = 1;

7820

if (eptr >= md->end_subject) break;

7821

GETCHARLEN(d, eptr, len);

7822

if (d < 256) d = md->lcc[d];

7823

if (fc == d) break;

7824

eptr += len;

7825

}

7826

for(;;)

7827

{

7828

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7829

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7830

if (eptr-- == pp) break; /* Stop if tried at original pos */

7831

BACKCHAR(eptr);

7832

}

7833

}

7834

else

7835

#endif

7836

/* Not UTF-8 mode */

7837

{

7838

for (i = min; i < max; i++)

7839

{

7840

if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;

7841

eptr++;

7842

}

7843

while (eptr >= pp)

7844

{

7845

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7846

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7847

eptr--;

7848

}

7849

}

7850

7851

RRETURN(MATCH_NOMATCH);

7852

}

7853

/* Control never gets here */

7854

}

7855

7856

/* Caseful comparisons */

7857

7858

else

7859

{

7860

#ifdef SUPPORT_UTF8

7861

/* UTF-8 mode */

7862

if (md->utf8)

7863

{

7864

7865

for (i = 1; i <= min; i++)

7866

{

7867

GETCHARINC(d, eptr);

7868

if (fc == d) RRETURN(MATCH_NOMATCH);

7869

}

7870

}

7871

else

7872

#endif

7873

/* Not UTF-8 mode */

7874

{

7875

for (i = 1; i <= min; i++)

7876

if (fc == *eptr++) RRETURN(MATCH_NOMATCH);

7877

}

7878

7879

if (min == max) continue;

7880

7881

if (minimize)

7882

{

7883

#ifdef SUPPORT_UTF8

7884

/* UTF-8 mode */

7885

if (md->utf8)

7886

{

7887

7888

for (fi = min;; fi++)

7889

{

7890

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7891

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7892

GETCHARINC(d, eptr);

7893

if (fi >= max || eptr >= md->end_subject || fc == d)

7894

RRETURN(MATCH_NOMATCH);

7895

}

7896

}

7897

else

7898

#endif

7899

/* Not UTF-8 mode */

7900

{

7901

for (fi = min;; fi++)

7902

{

7903

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7904

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7905

if (fi >= max || eptr >= md->end_subject || fc == *eptr++)

7906

RRETURN(MATCH_NOMATCH);

7907

}

7908

}

7909

/* Control never gets here */

7910

}

7911

7912

/* Maximize case */

7913

7914

else

7915

{

7916

pp = eptr;

7917

7918

#ifdef SUPPORT_UTF8

7919

/* UTF-8 mode */

7920

if (md->utf8)

7921

{

7922

7923

for (i = min; i < max; i++)

7924

{

7925

int len = 1;

7926

if (eptr >= md->end_subject) break;

7927

GETCHARLEN(d, eptr, len);

7928

if (fc == d) break;

7929

eptr += len;

7930

}

7931

for(;;)

7932

{

7933

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7934

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7935

if (eptr-- == pp) break; /* Stop if tried at original pos */

7936

BACKCHAR(eptr);

7937

}

7938

}

7939

else

7940

#endif

7941

/* Not UTF-8 mode */

7942

{

7943

for (i = min; i < max; i++)

7944

{

7945

if (eptr >= md->end_subject || fc == *eptr) break;

7946

eptr++;

7947

}

7948

while (eptr >= pp)

7949

{

7950

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

7951

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

7952

eptr--;

7953

}

7954

}

7955

7956

RRETURN(MATCH_NOMATCH);

7957

}

7958

}

7959

/* Control never gets here */

7960

7961

/* Match a single character type repeatedly; several different opcodes

7962

share code. This is very similar to the code for single characters, but we

7963

repeat it in the interests of efficiency. */

7964

7965

case OP_TYPEEXACT:

7966

min = max = GET2(ecode, 1);

7967

minimize = TRUE;

7968

ecode += 3;

7969

goto REPEATTYPE;

7970

7971

case OP_TYPEUPTO:

7972

case OP_TYPEMINUPTO:

7973

min = 0;

7974

max = GET2(ecode, 1);

7975

minimize = *ecode == OP_TYPEMINUPTO;

7976

ecode += 3;

7977

goto REPEATTYPE;

7978

7979

case OP_TYPESTAR:

7980

case OP_TYPEMINSTAR:

7981

case OP_TYPEPLUS:

7982

case OP_TYPEMINPLUS:

7983

case OP_TYPEQUERY:

7984

case OP_TYPEMINQUERY:

7985

c = *ecode++ - OP_TYPESTAR;

7986

minimize = (c & 1) != 0;

7987

min = rep_min[c]; /* Pick up values from tables; */

7988

max = rep_max[c]; /* zero for max => infinity */

7989

if (max == 0) max = INT_MAX;

7990

7991

/* Common code for all repeated single character type matches. Note that

7992

in UTF-8 mode, '.' matches a character of any length, but for the other

7993

character types, the valid characters are all one-byte long. */

7994

7995

REPEATTYPE:

7996

ctype = *ecode++; /* Code for the character type */

7997

7998

#ifdef SUPPORT_UCP

7999

if (ctype == OP_PROP || ctype == OP_NOTPROP)

8000

{

8001

prop_fail_result = ctype == OP_NOTPROP;

8002

prop_type = *ecode++;

8003

if (prop_type >= 128)

8004

{

8005

prop_test_against = prop_type - 128;

8006

prop_test_variable = &prop_category;

8007

}

8008

else

8009

{

8010

prop_test_against = prop_type;

8011

prop_test_variable = &prop_chartype;

8012

}

8013

}

8014

else prop_type = -1;

8015

#endif

8016

8017

/* First, ensure the minimum number of matches are present. Use inline

8018

code for maximizing the speed, and do the type test once at the start

8019

(i.e. keep it out of the loop). Also we can test that there are at least

8020

the minimum number of bytes before we start. This isn't as effective in

8021

UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that

8022

is tidier. Also separate the UCP code, which can be the same for both UTF-8

8023

and single-bytes. */

8024

8025

if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);

8026

if (min > 0)

8027

{

8028

#ifdef SUPPORT_UCP

8029

if (prop_type > 0)

8030

{

8031

for (i = 1; i <= min; i++)

8032

{

8033

GETCHARINC(c, eptr);

8034

prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);

8035

if ((*prop_test_variable == prop_test_against) == prop_fail_result)

8036

RRETURN(MATCH_NOMATCH);

8037

}

8038

}

8039

8040

/* Match extended Unicode sequences. We will get here only if the

8041

support is in the binary; otherwise a compile-time error occurs. */

8042

8043

else if (ctype == OP_EXTUNI)

8044

{

8045

for (i = 1; i <= min; i++)

8046

{

8047

GETCHARINCTEST(c, eptr);

8048

prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);

8049

if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);

8050

while (eptr < md->end_subject)

8051

{

8052

int len = 1;

8053

if (!md->utf8) c = *eptr; else

8054

{

8055

GETCHARLEN(c, eptr, len);

8056

}

8057

prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);

8058

if (prop_category != ucp_M) break;

8059

eptr += len;

8060

}

8061

}

8062

}

8063

8064

else

8065

#endif /* SUPPORT_UCP */

8066

8067

/* Handle all other cases when the coding is UTF-8 */

8068

8069

#ifdef SUPPORT_UTF8

8070

if (md->utf8) switch(ctype)

8071

{

8072

case OP_ANY:

8073

for (i = 1; i <= min; i++)

8074

{

8075

if (eptr >= md->end_subject ||

8076

(*eptr++ == NEWLINE && (ims & PCRE_DOTALL) == 0))

8077

RRETURN(MATCH_NOMATCH);

8078

while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;

8079

}

8080

break;

8081

8082

case OP_ANYBYTE:

8083

eptr += min;

8084

break;

8085

8086

case OP_NOT_DIGIT:

8087

for (i = 1; i <= min; i++)

8088

{

8089

if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

8090

GETCHARINC(c, eptr);

8091

if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)

8092

RRETURN(MATCH_NOMATCH);

8093

}

8094

break;

8095

8096

case OP_DIGIT:

8097

for (i = 1; i <= min; i++)

8098

{

8099

if (eptr >= md->end_subject ||

8100

*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)

8101

RRETURN(MATCH_NOMATCH);

8102

/* No need to skip more bytes - we know it's a 1-byte character */

8103

}

8104

break;

8105

8106

case OP_NOT_WHITESPACE:

8107

for (i = 1; i <= min; i++)

8108

{

8109

if (eptr >= md->end_subject ||

8110

(*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0))

8111

RRETURN(MATCH_NOMATCH);

8112

while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;

8113

}

8114

break;

8115

8116

case OP_WHITESPACE:

8117

for (i = 1; i <= min; i++)

8118

{

8119

if (eptr >= md->end_subject ||

8120

*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)

8121

RRETURN(MATCH_NOMATCH);

8122

/* No need to skip more bytes - we know it's a 1-byte character */

8123

}

8124

break;

8125

8126

case OP_NOT_WORDCHAR:

8127

for (i = 1; i <= min; i++)

8128

{

8129

if (eptr >= md->end_subject ||

8130

(*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0))

8131

RRETURN(MATCH_NOMATCH);

8132

while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;

8133

}

8134

break;

8135

8136

case OP_WORDCHAR:

8137

for (i = 1; i <= min; i++)

8138

{

8139

if (eptr >= md->end_subject ||

8140

*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)

8141

RRETURN(MATCH_NOMATCH);

8142

/* No need to skip more bytes - we know it's a 1-byte character */

8143

}

8144

break;

8145

8146

default:

8147

RRETURN(PCRE_ERROR_INTERNAL);

8148

} /* End switch(ctype) */

8149

8150

else

8151

#endif /* SUPPORT_UTF8 */

8152

8153

/* Code for the non-UTF-8 case for minimum matching of operators other

8154

than OP_PROP and OP_NOTPROP. */

8155

8156

switch(ctype)

8157

{

8158

case OP_ANY:

8159

if ((ims & PCRE_DOTALL) == 0)

8160

{

8161

for (i = 1; i <= min; i++)

8162

if (*eptr++ == NEWLINE) RRETURN(MATCH_NOMATCH);

8163

}

8164

else eptr += min;

8165

break;

8166

8167

case OP_ANYBYTE:

8168

eptr += min;

8169

break;

8170

8171

case OP_NOT_DIGIT:

8172

for (i = 1; i <= min; i++)

8173

if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);

8174

break;

8175

8176

case OP_DIGIT:

8177

for (i = 1; i <= min; i++)

8178

if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);

8179

break;

8180

8181

case OP_NOT_WHITESPACE:

8182

for (i = 1; i <= min; i++)

8183

if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);

8184

break;

8185

8186

case OP_WHITESPACE:

8187

for (i = 1; i <= min; i++)

8188

if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);

8189

break;

8190

8191

case OP_NOT_WORDCHAR:

8192

for (i = 1; i <= min; i++)

8193

if ((md->ctypes[*eptr++] & ctype_word) != 0)

8194

RRETURN(MATCH_NOMATCH);

8195

break;

8196

8197

case OP_WORDCHAR:

8198

for (i = 1; i <= min; i++)

8199

if ((md->ctypes[*eptr++] & ctype_word) == 0)

8200

RRETURN(MATCH_NOMATCH);

8201

break;

8202

8203

default:

8204

RRETURN(PCRE_ERROR_INTERNAL);

8205

}

8206

}

8207

8208

/* If min = max, continue at the same level without recursing */

8209

8210

if (min == max) continue;

8211

8212

/* If minimizing, we have to test the rest of the pattern before each

8213

subsequent match. Again, separate the UTF-8 case for speed, and also

8214

separate the UCP cases. */

8215

8216

if (minimize)

8217

{

8218

#ifdef SUPPORT_UCP

8219

if (prop_type > 0)

8220

{

8221

for (fi = min;; fi++)

8222

{

8223

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

8224

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

8225

if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

8226

GETCHARINC(c, eptr);

8227

prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);

8228

if ((*prop_test_variable == prop_test_against) == prop_fail_result)

8229

RRETURN(MATCH_NOMATCH);

8230

}

8231

}

8232

8233

/* Match extended Unicode sequences. We will get here only if the

8234

support is in the binary; otherwise a compile-time error occurs. */

8235

8236

else if (ctype == OP_EXTUNI)

8237

{

8238

for (fi = min;; fi++)

8239

{

8240

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

8241

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

8242

if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

8243

GETCHARINCTEST(c, eptr);

8244

prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);

8245

if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);

8246

while (eptr < md->end_subject)

8247

{

8248

int len = 1;

8249

if (!md->utf8) c = *eptr; else

8250

{

8251

GETCHARLEN(c, eptr, len);

8252

}

8253

prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);

8254

if (prop_category != ucp_M) break;

8255

eptr += len;

8256

}

8257

}

8258

}

8259

8260

else

8261

#endif /* SUPPORT_UCP */

8262

8263

#ifdef SUPPORT_UTF8

8264

/* UTF-8 mode */

8265

if (md->utf8)

8266

{

8267

for (fi = min;; fi++)

8268

{

8269

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

8270

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

8271

if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

8272

8273

GETCHARINC(c, eptr);

8274

switch(ctype)

8275

{

8276

case OP_ANY:

8277

if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);

8278

break;

8279

8280

case OP_ANYBYTE:

8281

break;

8282

8283

case OP_NOT_DIGIT:

8284

if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)

8285

RRETURN(MATCH_NOMATCH);

8286

break;

8287

8288

case OP_DIGIT:

8289

if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)

8290

RRETURN(MATCH_NOMATCH);

8291

break;

8292

8293

case OP_NOT_WHITESPACE:

8294

if (c < 256 && (md->ctypes[c] & ctype_space) != 0)

8295

RRETURN(MATCH_NOMATCH);

8296

break;

8297

8298

case OP_WHITESPACE:

8299

if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)

8300

RRETURN(MATCH_NOMATCH);

8301

break;

8302

8303

case OP_NOT_WORDCHAR:

8304

if (c < 256 && (md->ctypes[c] & ctype_word) != 0)

8305

RRETURN(MATCH_NOMATCH);

8306

break;

8307

8308

case OP_WORDCHAR:

8309

if (c >= 256 && (md->ctypes[c] & ctype_word) == 0)

8310

RRETURN(MATCH_NOMATCH);

8311

break;

8312

8313

default:

8314

RRETURN(PCRE_ERROR_INTERNAL);

8315

}

8316

}

8317

}

8318

else

8319

#endif

8320

/* Not UTF-8 mode */

8321

{

8322

for (fi = min;; fi++)

8323

{

8324

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

8325

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

8326

if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);

8327

c = *eptr++;

8328

switch(ctype)

8329

{

8330

case OP_ANY:

8331

if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH);

8332

break;

8333

8334

case OP_ANYBYTE:

8335

break;

8336

8337

case OP_NOT_DIGIT:

8338

if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);

8339

break;

8340

8341

case OP_DIGIT:

8342

if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);

8343

break;

8344

8345

case OP_NOT_WHITESPACE:

8346

if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);

8347

break;

8348

8349

case OP_WHITESPACE:

8350

if ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);

8351

break;

8352

8353

case OP_NOT_WORDCHAR:

8354

if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);

8355

break;

8356

8357

case OP_WORDCHAR:

8358

if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);

8359

break;

8360

8361

default:

8362

RRETURN(PCRE_ERROR_INTERNAL);

8363

}

8364

}

8365

}

8366

/* Control never gets here */

8367

}

8368

8369

/* If maximizing it is worth using inline code for speed, doing the type

8370

test once at the start (i.e. keep it out of the loop). Again, keep the

8371

UTF-8 and UCP stuff separate. */

8372

8373

else

8374

{

8375

pp = eptr; /* Remember where we started */

8376

8377

#ifdef SUPPORT_UCP

8378

if (prop_type > 0)

8379

{

8380

for (i = min; i < max; i++)

8381

{

8382

int len = 1;

8383

if (eptr >= md->end_subject) break;

8384

GETCHARLEN(c, eptr, len);

8385

prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);

8386

if ((*prop_test_variable == prop_test_against) == prop_fail_result)

8387

break;

8388

eptr+= len;

8389

}

8390

8391

/* eptr is now past the end of the maximum run */

8392

8393

for(;;)

8394

{

8395

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

8396

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

8397

if (eptr-- == pp) break; /* Stop if tried at original pos */

8398

BACKCHAR(eptr);

8399

}

8400

}

8401

8402

/* Match extended Unicode sequences. We will get here only if the

8403

support is in the binary; otherwise a compile-time error occurs. */

8404

8405

else if (ctype == OP_EXTUNI)

8406

{

8407

for (i = min; i < max; i++)

8408

{

8409

if (eptr >= md->end_subject) break;

8410

GETCHARINCTEST(c, eptr);

8411

prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);

8412

if (prop_category == ucp_M) break;

8413

while (eptr < md->end_subject)

8414

{

8415

int len = 1;

8416

if (!md->utf8) c = *eptr; else

8417

{

8418

GETCHARLEN(c, eptr, len);

8419

}

8420

prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);

8421

if (prop_category != ucp_M) break;

8422

eptr += len;

8423

}

8424

}

8425

8426

/* eptr is now past the end of the maximum run */

8427

8428

for(;;)

8429

{

8430

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

8431

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

8432

if (eptr-- == pp) break; /* Stop if tried at original pos */

8433

for (;;) /* Move back over one extended */

8434

{

8435

int len = 1;

8436

BACKCHAR(eptr);

8437

if (!md->utf8) c = *eptr; else

8438

{

8439

GETCHARLEN(c, eptr, len);

8440

}

8441

prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase);

8442

if (prop_category != ucp_M) break;

8443

eptr--;

8444

}

8445

}

8446

}

8447

8448

else

8449

#endif /* SUPPORT_UCP */

8450

8451

#ifdef SUPPORT_UTF8

8452

/* UTF-8 mode */

8453

8454

if (md->utf8)

8455

{

8456

switch(ctype)

8457

{

8458

case OP_ANY:

8459

8460

/* Special code is required for UTF8, but when the maximum is unlimited

8461

we don't need it, so we repeat the non-UTF8 code. This is probably

8462

worth it, because .* is quite a common idiom. */

8463

8464

if (max < INT_MAX)

8465

{

8466

if ((ims & PCRE_DOTALL) == 0)

8467

{

8468

for (i = min; i < max; i++)

8469

{

8470

if (eptr >= md->end_subject || *eptr == NEWLINE) break;

8471

eptr++;

8472

while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;

8473

}

8474

}

8475

else

8476

{

8477

for (i = min; i < max; i++)

8478

{

8479

eptr++;

8480

while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;

8481

}

8482

}

8483

}

8484

8485

/* Handle unlimited UTF-8 repeat */

8486

8487

else

8488

{

8489

if ((ims & PCRE_DOTALL) == 0)

8490

{

8491

for (i = min; i < max; i++)

8492

{

8493

if (eptr >= md->end_subject || *eptr == NEWLINE) break;

8494

eptr++;

8495

}

8496

break;

8497

}

8498

else

8499

{

8500

c = max - min;

8501

if (c > md->end_subject - eptr) c = md->end_subject - eptr;

8502

eptr += c;

8503

}

8504

}

8505

break;

8506

8507

/* The byte case is the same as non-UTF8 */

8508

8509

case OP_ANYBYTE:

8510

c = max - min;

8511

if (c > md->end_subject - eptr) c = md->end_subject - eptr;

8512

eptr += c;

8513

break;

8514

8515

case OP_NOT_DIGIT:

8516

for (i = min; i < max; i++)

8517

{

8518

int len = 1;

8519

if (eptr >= md->end_subject) break;

8520

GETCHARLEN(c, eptr, len);

8521

if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;

8522

eptr+= len;

8523

}

8524

break;

8525

8526

case OP_DIGIT:

8527

for (i = min; i < max; i++)

8528

{

8529

int len = 1;

8530

if (eptr >= md->end_subject) break;

8531

GETCHARLEN(c, eptr, len);

8532

if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;

8533

eptr+= len;

8534

}

8535

break;

8536

8537

case OP_NOT_WHITESPACE:

8538

for (i = min; i < max; i++)

8539

{

8540

int len = 1;

8541

if (eptr >= md->end_subject) break;

8542

GETCHARLEN(c, eptr, len);

8543

if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;

8544

eptr+= len;

8545

}

8546

break;

8547

8548

case OP_WHITESPACE:

8549

for (i = min; i < max; i++)

8550

{

8551

int len = 1;

8552

if (eptr >= md->end_subject) break;

8553

GETCHARLEN(c, eptr, len);

8554

if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;

8555

eptr+= len;

8556

}

8557

break;

8558

8559

case OP_NOT_WORDCHAR:

8560

for (i = min; i < max; i++)

8561

{

8562

int len = 1;

8563

if (eptr >= md->end_subject) break;

8564

GETCHARLEN(c, eptr, len);

8565

if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;

8566

eptr+= len;

8567

}

8568

break;

8569

8570

case OP_WORDCHAR:

8571

for (i = min; i < max; i++)

8572

{

8573

int len = 1;

8574

if (eptr >= md->end_subject) break;

8575

GETCHARLEN(c, eptr, len);

8576

if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;

8577

eptr+= len;

8578

}

8579

break;

8580

8581

default:

8582

RRETURN(PCRE_ERROR_INTERNAL);

8583

}

8584

8585

/* eptr is now past the end of the maximum run */

8586

8587

for(;;)

8588

{

8589

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

8590

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

8591

if (eptr-- == pp) break; /* Stop if tried at original pos */

8592

BACKCHAR(eptr);

8593

}

8594

}

8595

else

8596

#endif

8597

8598

/* Not UTF-8 mode */

8599

{

8600

switch(ctype)

8601

{

8602

case OP_ANY:

8603

if ((ims & PCRE_DOTALL) == 0)

8604

{

8605

for (i = min; i < max; i++)

8606

{

8607

if (eptr >= md->end_subject || *eptr == NEWLINE) break;

8608

eptr++;

8609

}

8610

break;

8611

}

8612

/* For DOTALL case, fall through and treat as \C */

8613

8614

case OP_ANYBYTE:

8615

c = max - min;

8616

if (c > md->end_subject - eptr) c = md->end_subject - eptr;

8617

eptr += c;

8618

break;

8619

8620

case OP_NOT_DIGIT:

8621

for (i = min; i < max; i++)

8622

{

8623

if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)

8624

break;

8625

eptr++;

8626

}

8627

break;

8628

8629

case OP_DIGIT:

8630

for (i = min; i < max; i++)

8631

{

8632

if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)

8633

break;

8634

eptr++;

8635

}

8636

break;

8637

8638

case OP_NOT_WHITESPACE:

8639

for (i = min; i < max; i++)

8640

{

8641

if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)

8642

break;

8643

eptr++;

8644

}

8645

break;

8646

8647

case OP_WHITESPACE:

8648

for (i = min; i < max; i++)

8649

{

8650

if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)

8651

break;

8652

eptr++;

8653

}

8654

break;

8655

8656

case OP_NOT_WORDCHAR:

8657

for (i = min; i < max; i++)

8658

{

8659

if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)

8660

break;

8661

eptr++;

8662

}

8663

break;

8664

8665

case OP_WORDCHAR:

8666

for (i = min; i < max; i++)

8667

{

8668

if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)

8669

break;

8670

eptr++;

8671

}

8672

break;

8673

8674

default:

8675

RRETURN(PCRE_ERROR_INTERNAL);

8676

}

8677

8678

/* eptr is now past the end of the maximum run */

8679

8680

while (eptr >= pp)

8681

{

8682

RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);

8683

eptr--;

8684

if (rrc != MATCH_NOMATCH) RRETURN(rrc);

8685

}

8686

}

8687

8688

/* Get here if we can't make it match with any permitted repetitions */

8689

8690

RRETURN(MATCH_NOMATCH);

8691

}

8692

/* Control never gets here */

8693

8694

/* There's been some horrible disaster. Since all codes > OP_BRA are

8695

for capturing brackets, and there shouldn't be any gaps between 0 and

8696

OP_BRA, arrival here can only mean there is something seriously wrong

8697

in the code above or the OP_xxx definitions. */

8698

8699

default:

8700

DPRINTF(("Unknown opcode %d\n", *ecode));

8701

RRETURN(PCRE_ERROR_UNKNOWN_NODE);

8702

}

8703

8704

/* Do not stick any code in here without much thought; it is assumed

8705

that "continue" in the code above comes out to here to repeat the main

8706

loop. */

8707

8708

} /* End of main loop */

8709

/* Control never reaches here */

8710

}

8711

8712

8713

/***************************************************************************

8714

****************************************************************************

8715

RECURSION IN THE match() FUNCTION

8716

8717

Undefine all the macros that were defined above to handle this. */

8718

8719

#ifdef NO_RECURSE

8720

#undef eptr

8721

#undef ecode

8722

#undef offset_top

8723

#undef ims

8724

#undef eptrb

8725

#undef flags

8726

8727

#undef callpat

8728

#undef charptr

8729

#undef data

8730

#undef next

8731

#undef pp

8732

#undef prev

8733

#undef saved_eptr

8734

8735

#undef new_recursive

8736

8737

#undef cur_is_word

8738

#undef condition

8739

#undef minimize

8740

#undef prev_is_word

8741

8742

#undef original_ims

8743

8744

#undef ctype

8745

#undef length

8746

#undef max

8747

#undef min

8748

#undef number

8749

#undef offset

8750

#undef op

8751

#undef save_capture_last

8752

#undef save_offset1

8753

#undef save_offset2

8754

#undef save_offset3

8755

#undef stacksave

8756

8757

#undef newptrb

8758

8759

#endif

8760

8761

/* These two are defined as macros in both cases */

8762

8763

#undef fc

8764

#undef fi

8765

8766

/***************************************************************************

8767

***************************************************************************/

8768

8769

8770

8771

/*************************************************

8772

* Execute a Regular Expression *

8773

*************************************************/

8774

8775

/* This function applies a compiled re to a subject string and picks out

8776

portions of the string if it matches. Two elements in the vector are set for

8777

each substring: the offsets to the start and end of the substring.

8778

8779

Arguments:

8780

argument_re points to the compiled expression

8781

extra_data points to extra data or is NULL

8782

subject points to the subject string

8783

length length of subject string (may contain binary zeros)

8784

start_offset where to start in the subject string

8785

options option bits

8786

offsets points to a vector of ints to be filled in with offsets

8787

offsetcount the number of elements in the vector

8788

8789

Returns: > 0 => success; value is the number of elements filled in

8790

= 0 => success, but offsets is not big enough

8791

-1 => failed to match

8792

< -1 => some kind of unexpected problem

8793

8794

8795

EXPORT int

8796

pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,

8797

const char *subject, int length, int start_offset, int options, int *offsets,

8798

int offsetcount)

8799

{

8800

int rc, resetcount, ocount;

8801

int first_byte = -1;

8802

int req_byte = -1;

8803

int req_byte2 = -1;

8804

unsigned long int ims = 0;

8805

BOOL using_temporary_offsets = FALSE;

8806

BOOL anchored;

8807

BOOL startline;

8808

BOOL first_byte_caseless = FALSE;

8809

BOOL req_byte_caseless = FALSE;

8810

match_data match_block;

8811

const uschar *tables;

8812

const uschar *start_bits = NULL;

8813

const uschar *start_match = (const uschar *)subject + start_offset;

8814

const uschar *end_subject;

8815

const uschar *req_byte_ptr = start_match - 1;

8816

8817

pcre_study_data internal_study;

8818

const pcre_study_data *study;

8819

8820

real_pcre internal_re;

8821

const real_pcre *external_re = (const real_pcre *)argument_re;

8822

const real_pcre *re = external_re;

8823

8824

/* Plausibility checks */

8825

8826

if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;

8827

if (re == NULL || subject == NULL ||

8828

(offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;

8829

if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;

8830

8831

/* Fish out the optional data from the extra_data structure, first setting

8832

the default values. */

8833

8834

study = NULL;

8835

match_block.match_limit = MATCH_LIMIT;

8836

match_block.callout_data = NULL;

8837

8838

/* The table pointer is always in native byte order. */

8839

8840

tables = external_re->tables;

8841

8842

if (extra_data != NULL)

8843

{

8844

8845

if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)

8846

study = (const pcre_study_data *)extra_data->study_data;

8847

if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)

8848

match_block.match_limit = extra_data->match_limit;

8849

if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)

8850

match_block.callout_data = extra_data->callout_data;

8851

if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;

8852

}

8853

8854

/* If the exec call supplied NULL for tables, use the inbuilt ones. This

8855

is a feature that makes it possible to save compiled regex and re-use them

8856

in other programs later. */

8857

8858

if (tables == NULL) tables = pcre_default_tables;

8859

8860

/* Check that the first field in the block is the magic number. If it is not,

8861

test for a regex that was compiled on a host of opposite endianness. If this is

8862

the case, flipped values are put in internal_re and internal_study if there was

8863

study data too. */

8864

8865

if (re->magic_number != MAGIC_NUMBER)

8866

{

8867

re = try_flipped(re, &internal_re, study, &internal_study);

8868

if (re == NULL) return PCRE_ERROR_BADMAGIC;

8869

if (study != NULL) study = &internal_study;

8870

}

8871

8872

/* Set up other data */

8873

8874

anchored = ((re->options | options) & PCRE_ANCHORED) != 0;

8875

startline = (re->options & PCRE_STARTLINE) != 0;

8876

8877

/* The code starts after the real_pcre block and the capture name table. */

8878

8879

match_block.start_code = (const uschar *)external_re + re->name_table_offset +

8880

re->name_count * re->name_entry_size;

8881

8882

match_block.start_subject = (const uschar *)subject;

8883

match_block.start_offset = start_offset;

8884

match_block.end_subject = match_block.start_subject + length;

8885

end_subject = match_block.end_subject;

8886

8887

match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;

8888

match_block.utf8 = (re->options & PCRE_UTF8) != 0;

8889

8890

match_block.notbol = (options & PCRE_NOTBOL) != 0;

8891

match_block.noteol = (options & PCRE_NOTEOL) != 0;

8892

match_block.notempty = (options & PCRE_NOTEMPTY) != 0;

8893

match_block.partial = (options & PCRE_PARTIAL) != 0;

8894

match_block.hitend = FALSE;

8895

8896

match_block.recursive = NULL; /* No recursion at top level */

8897

8898

match_block.lcc = tables + lcc_offset;

8899

match_block.ctypes = tables + ctypes_offset;

8900

8901

/* Partial matching is supported only for a restricted set of regexes at the

8902

moment. */

8903

8904

if (match_block.partial && (re->options & PCRE_NOPARTIAL) != 0)

8905

return PCRE_ERROR_BADPARTIAL;

8906

8907

/* Check a UTF-8 string if required. Unfortunately there's no way of passing

8908

back the character offset. */

8909

8910

#ifdef SUPPORT_UTF8

8911

if (match_block.utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)

8912

{

8913

if (valid_utf8((uschar *)subject, length) >= 0)

8914

return PCRE_ERROR_BADUTF8;

8915

if (start_offset > 0 && start_offset < length)

8916

{

8917

int tb = ((uschar *)subject)[start_offset];

8918

if (tb > 127)

8919

{

8920

tb &= 0xc0;

8921

if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;

8922

}

8923

}

8924

}

8925

#endif

8926

8927

/* The ims options can vary during the matching as a result of the presence

8928

of (?ims) items in the pattern. They are kept in a local variable so that

8929

restoring at the exit of a group is easy. */

8930

8931

ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);

8932

8933

/* If the expression has got more back references than the offsets supplied can

8934

hold, we get a temporary chunk of working store to use during the matching.

8935

Otherwise, we can use the vector supplied, rounding down its size to a multiple

8936

of 3. */

8937

8938

ocount = offsetcount - (offsetcount % 3);

8939

8940

if (re->top_backref > 0 && re->top_backref >= ocount/3)

8941

{

8942

ocount = re->top_backref * 3 + 3;

8943

match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));

8944

if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;

8945

using_temporary_offsets = TRUE;

8946

DPRINTF(("Got memory to hold back references\n"));

8947

}

8948

else match_block.offset_vector = offsets;

8949

8950

match_block.offset_end = ocount;

8951

match_block.offset_max = (2*ocount)/3;

8952

match_block.offset_overflow = FALSE;

8953

match_block.capture_last = -1;

8954

8955

/* Compute the minimum number of offsets that we need to reset each time. Doing

8956

this makes a huge difference to execution time when there aren't many brackets

8957

in the pattern. */

8958

8959

resetcount = 2 + re->top_bracket * 2;

8960

if (resetcount > offsetcount) resetcount = ocount;

8961

8962

/* Reset the working variable associated with each extraction. These should

8963

never be used unless previously set, but they get saved and restored, and so we

8964

initialize them to avoid reading uninitialized locations. */

8965

8966

if (match_block.offset_vector != NULL)

8967

{

8968

8969

8970

while (--iptr >= iend) *iptr = -1;

8971

}

8972

8973

/* Set up the first character to match, if available. The first_byte value is

8974

never set for an anchored regular expression, but the anchoring may be forced

8975

at run time, so we have to test for anchoring. The first char may be unset for

8976

an unanchored pattern, of course. If there's no first char and the pattern was

8977

studied, there may be a bitmap of possible first characters. */

8978

8979

if (!anchored)

8980

{

8981

if ((re->options & PCRE_FIRSTSET) != 0)

8982

{

8983

first_byte = re->first_byte & 255;

8984

if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)

8985

first_byte = match_block.lcc[first_byte];

8986

}

8987

else

8988

if (!startline && study != NULL &&

8989

(study->options & PCRE_STUDY_MAPPED) != 0)

8990

start_bits = study->start_bits;

8991

}

8992

8993

/* For anchored or unanchored matches, there may be a "last known required

8994

character" set. */

8995

8996

if ((re->options & PCRE_REQCHSET) != 0)

8997

{

8998

req_byte = re->req_byte & 255;

8999

req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;

9000

req_byte2 = (tables + fcc_offset)[req_byte]; /* case flipped */

9001

}

9002

9003

/* Loop for handling unanchored repeated matching attempts; for anchored regexs

9004

the loop runs just once. */

9005

9006

9007

{

9008

/* Reset the maximum number of extractions we might see. */

9009

9010

if (match_block.offset_vector != NULL)

9011

{

9012

9013

9014

while (iptr < iend) *iptr++ = -1;

9015

}

9016

9017

/* Advance to a unique first char if possible */

9018

9019

if (first_byte >= 0)

9020

{

9021

if (first_byte_caseless)

9022

while (start_match < end_subject &&

9023

match_block.lcc[*start_match] != first_byte)

9024

start_match++;

9025

else

9026

while (start_match < end_subject && *start_match != first_byte)

9027

start_match++;

9028

}

9029

9030

/* Or to just after \n for a multiline match if possible */

9031

9032

else if (startline)

9033

{

9034

if (start_match > match_block.start_subject + start_offset)

9035

{

9036

while (start_match < end_subject && start_match[-1] != NEWLINE)

9037

start_match++;

9038

}

9039

}

9040

9041

/* Or to a non-unique first char after study */

9042

9043

else if (start_bits != NULL)

9044

{

9045

while (start_match < end_subject)

9046

{

9047

9048

if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break;

9049

}

9050

}

9051

9052

#ifdef DEBUG /* Sigh. Some compilers never learn. */

9053

printf(">>>> Match against: ");

9054

pchars(start_match, end_subject - start_match, TRUE, &match_block);

9055

printf("\n");

9056

#endif

9057

9058

/* If req_byte is set, we know that that character must appear in the subject

9059

for the match to succeed. If the first character is set, req_byte must be

9060

later in the subject; otherwise the test starts at the match point. This

9061

optimization can save a huge amount of backtracking in patterns with nested

9062

unlimited repeats that aren't going to match. Writing separate code for

9063

cased/caseless versions makes it go faster, as does using an autoincrement

9064

and backing off on a match.

9065

9066

HOWEVER: when the subject string is very, very long, searching to its end can

9067

take a long time, and give bad performance on quite ordinary patterns. This

9068

showed up when somebody was matching /^C/ on a 32-megabyte string... so we

9069

don't do this when the string is sufficiently long.

9070

9071

ALSO: this processing is disabled when partial matching is requested.

9072

9073

9074

if (req_byte >= 0 &&

9075

end_subject - start_match < REQ_BYTE_MAX &&

9076

!match_block.partial)

9077

{

9078

9079

9080

/* We don't need to repeat the search if we haven't yet reached the

9081

place we found it at last time. */

9082

9083

if (p > req_byte_ptr)

9084

{

9085

if (req_byte_caseless)

9086

{

9087

while (p < end_subject)

9088

{

9089

9090

if (pp == req_byte || pp == req_byte2) { p--; break; }

9091

}

9092

}

9093

else

9094

{

9095

while (p < end_subject)

9096

{

9097

if (*p++ == req_byte) { p--; break; }

9098

}

9099

}

9100

9101

/* If we can't find the required character, break the matching loop */

9102

9103

if (p >= end_subject) break;

9104

9105

/* If we have found the required character, save the point where we

9106

found it, so that we don't search again next time round the loop if

9107

the start hasn't passed this character yet. */

9108

9109

req_byte_ptr = p;

9110

}

9111

}

9112

9113

/* When a match occurs, substrings will be set for all internal extractions;

9114

we just need to set up the whole thing as substring 0 before returning. If

9115

there were too many extractions, set the return code to zero. In the case

9116

where we had to get some local store to hold offsets for backreferences, copy

9117

those back references that we can. In this case there need not be overflow

9118

if certain parts of the pattern were not used. */

9119

9120

match_block.start_match = start_match;

9121

match_block.match_call_count = 0;

9122

9123

rc = match(start_match, match_block.start_code, 2, &match_block, ims, NULL,

9124

match_isgroup);

9125

9126

if (rc == MATCH_NOMATCH)

9127

{

9128

start_match++;

9129

#ifdef SUPPORT_UTF8

9130

if (match_block.utf8)

9131

while(start_match < end_subject && (*start_match & 0xc0) == 0x80)

9132

start_match++;

9133

#endif

9134

continue;

9135

}

9136

9137

if (rc != MATCH_MATCH)

9138

{

9139

DPRINTF((">>>> error: returning %d\n", rc));

9140

return rc;

9141

}

9142

9143

/* We have a match! Copy the offset information from temporary store if

9144

necessary */

9145

9146

if (using_temporary_offsets)

9147

{

9148

if (offsetcount >= 4)

9149

{

9150

memcpy(offsets + 2, match_block.offset_vector + 2,

9151

(offsetcount - 2) * sizeof(int));

9152

DPRINTF(("Copied offsets from temporary memory\n"));

9153

}

9154

if (match_block.end_offset_top > offsetcount)

9155

match_block.offset_overflow = TRUE;

9156

9157

DPRINTF(("Freeing temporary memory\n"));

9158

(pcre_free)(match_block.offset_vector);

9159

}

9160

9161

rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;

9162

9163

if (offsetcount < 2) rc = 0; else

9164

{

9165

offsets[0] = start_match - match_block.start_subject;

9166

offsets[1] = match_block.end_match_ptr - match_block.start_subject;

9167

}

9168

9169

DPRINTF((">>>> returning %d\n", rc));

9170

return rc;

9171

}

9172

9173

/* This "while" is the end of the "do" above */

9174

9175

while (!anchored && start_match <= end_subject);

9176

9177

if (using_temporary_offsets)

9178

{

9179

DPRINTF(("Freeing temporary memory\n"));

9180

(pcre_free)(match_block.offset_vector);

9181

}

9182

9183

if (match_block.partial && match_block.hitend)

9184

{

9185

DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));

9186

return PCRE_ERROR_PARTIAL;

9187

}

9188

else

9189

{

9190

DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));

9191

return PCRE_ERROR_NOMATCH;

9192

}

9193

}

9194

9195

/* End of pcre.c */

Older »