~ubuntu-branches/ubuntu/wily/geany/wily

Viewing changes to scintilla/RESearch.cxx

Committer: Package Import Robot
Author(s): Chow Loong Jin
Date: 2011-12-10 07:43:26 UTC
mfrom: (3.3.7 sid)
Revision ID: package-import@ubuntu.com-20111210074326-s8yqbew5i20h33tf

Tags: 0.21-1ubuntu1

* Merge from Debian Unstable, remaining changes:
  - debian/patches/20_use_evince_viewer.patch:
     + use evince as viewer for pdf and dvi files
  - debian/patches/20_use_x_terminal_emulator.patch:
     + use x-terminal-emulator as terminal
  - debian/control
     + Add breaks on geany-plugins-common << 0.20
* Also fixes bugs:
  - Filter for MATLAB/Octave files filters everythign (LP: 885505)

files added:
.pc

.pc/.version

.pc/20_add_debian_specific_filetypes.patch

.pc/20_add_debian_specific_filetypes.patch/data

.pc/20_add_debian_specific_filetypes.patch/data/filetype_extensions.conf

.pc/20_change_gpl_location.patch

.pc/20_change_gpl_location.patch/src

.pc/20_change_gpl_location.patch/src/about.c

.pc/20_debian_control_tags.patch

.pc/20_debian_control_tags.patch/tagmanager

.pc/20_debian_control_tags.patch/tagmanager/conf.c

.pc/20_load_plugins_from_all_paths.patch

.pc/20_load_plugins_from_all_paths.patch/src

.pc/20_load_plugins_from_all_paths.patch/src/plugins.c

.pc/20_use_evince_viewer.patch

.pc/20_use_evince_viewer.patch/data

.pc/20_use_evince_viewer.patch/data/filetypes.latex

.pc/20_use_sensible_browser.patch

.pc/20_use_sensible_browser.patch/src

.pc/20_use_sensible_browser.patch/src/keyfile.c

.pc/20_use_x_terminal_emulator.patch

.pc/20_use_x_terminal_emulator.patch/src

.pc/20_use_x_terminal_emulator.patch/src/keyfile.c

.pc/applied-patches

data/filetypes.Cython.conf

data/filetypes.Scala.conf

data/filetypes.cobol

data/templates/files/.svn

data/templates/files/.svn/all-wcprops

data/templates/files/.svn/entries

data/templates/files/.svn/prop-base

data/templates/files/.svn/prop-base/file.html.svn-base

data/templates/files/.svn/prop-base/file.php.svn-base

data/templates/files/.svn/prop-base/file.rb.svn-base

data/templates/files/.svn/prop-base/file.tex.svn-base

data/templates/files/.svn/prop-base/main.c.svn-base

data/templates/files/.svn/prop-base/main.cxx.svn-base

data/templates/files/.svn/prop-base/main.d.svn-base

data/templates/files/.svn/prop-base/main.java.svn-base

data/templates/files/.svn/prop-base/main.py.svn-base

data/templates/files/.svn/prop-base/program.pas.svn-base

data/templates/files/.svn/props

data/templates/files/.svn/text-base

data/templates/files/.svn/text-base/file.html.svn-base

data/templates/files/.svn/text-base/file.php.svn-base

data/templates/files/.svn/text-base/file.rb.svn-base

data/templates/files/.svn/text-base/file.tex.svn-base

data/templates/files/.svn/text-base/main.c.svn-base

data/templates/files/.svn/text-base/main.cxx.svn-base

data/templates/files/.svn/text-base/main.d.svn-base

data/templates/files/.svn/text-base/main.java.svn-base

data/templates/files/.svn/text-base/main.py.svn-base

data/templates/files/.svn/text-base/main.vala.svn-base

data/templates/files/.svn/text-base/program.pas.svn-base

data/templates/files/.svn/tmp

data/templates/files/.svn/tmp/prop-base

data/templates/files/.svn/tmp/props

data/templates/files/.svn/tmp/text-base

data/templates/files/main.vala

debian/gbp.conf

debian/geany-common.doc-base

debian/geany-common.install

debian/geany.install

debian/geany.manpages

debian/geany.menu

debian/patches/20_add_debian_specific_filetypes.patch

debian/patches/20_change_gpl_location.patch

debian/patches/20_debian_control_tags.patch

debian/patches/20_load_plugins_from_all_paths.patch

debian/patches/20_use_evince_viewer.patch

debian/patches/20_use_sensible_browser.patch

debian/patches/20_use_x_terminal_emulator.patch

debian/patches/series

debian/source

debian/source/format

doc/images/pref_dialog_interface_interface.png

doc/images/pref_dialog_interface_notebook.png

doc/images/pref_dialog_interface_toolbar.png

doc/images/pref_dialog_various.png

po/fa.po

scintilla/gtk

scintilla/gtk/Converter.h

scintilla/gtk/PlatGTK.cxx

scintilla/gtk/ScintillaGTK.cxx

scintilla/gtk/scintilla-marshal.c

scintilla/gtk/scintilla-marshal.h

scintilla/gtk/scintilla-marshal.list

scintilla/include/ILexer.h

scintilla/lexers

scintilla/lexers/LexAda.cxx

scintilla/lexers/LexAsm.cxx

scintilla/lexers/LexBash.cxx

scintilla/lexers/LexBasic.cxx

scintilla/lexers/LexCOBOL.cxx

scintilla/lexers/LexCPP.cxx

scintilla/lexers/LexCSS.cxx

scintilla/lexers/LexCaml.cxx

scintilla/lexers/LexCmake.cxx

scintilla/lexers/LexD.cxx

scintilla/lexers/LexErlang.cxx

scintilla/lexers/LexForth.cxx

scintilla/lexers/LexFortran.cxx

scintilla/lexers/LexHTML.cxx

scintilla/lexers/LexHaskell.cxx

scintilla/lexers/LexLisp.cxx

scintilla/lexers/LexLua.cxx

scintilla/lexers/LexMarkdown.cxx

scintilla/lexers/LexMatlab.cxx

scintilla/lexers/LexNsis.cxx

scintilla/lexers/LexOthers.cxx

scintilla/lexers/LexPascal.cxx

scintilla/lexers/LexPerl.cxx

scintilla/lexers/LexPython.cxx

scintilla/lexers/LexR.cxx

scintilla/lexers/LexRuby.cxx

scintilla/lexers/LexSQL.cxx

scintilla/lexers/LexTCL.cxx

scintilla/lexers/LexTxt2tags.cxx

scintilla/lexers/LexVHDL.cxx

scintilla/lexers/LexVerilog.cxx

scintilla/lexers/LexYAML.cxx

scintilla/lexlib

scintilla/lexlib/Accessor.cxx

scintilla/lexlib/Accessor.h

scintilla/lexlib/CharacterSet.cxx

scintilla/lexlib/CharacterSet.h

scintilla/lexlib/LexAccessor.h

scintilla/lexlib/LexerBase.cxx

scintilla/lexlib/LexerBase.h

scintilla/lexlib/LexerModule.cxx

scintilla/lexlib/LexerModule.h

scintilla/lexlib/LexerNoExceptions.cxx

scintilla/lexlib/LexerNoExceptions.h

scintilla/lexlib/LexerSimple.cxx

scintilla/lexlib/LexerSimple.h

scintilla/lexlib/OptionSet.h

scintilla/lexlib/PropSetSimple.cxx

scintilla/lexlib/PropSetSimple.h

scintilla/lexlib/SparseState.h

scintilla/lexlib/StyleContext.cxx

scintilla/lexlib/StyleContext.h

scintilla/lexlib/WordList.cxx

scintilla/lexlib/WordList.h

scintilla/src

scintilla/src/AutoComplete.cxx

scintilla/src/AutoComplete.h

scintilla/src/CallTip.cxx

scintilla/src/CallTip.h

scintilla/src/Catalogue.cxx

scintilla/src/Catalogue.h

scintilla/src/CellBuffer.cxx

scintilla/src/CellBuffer.h

scintilla/src/CharClassify.cxx

scintilla/src/CharClassify.h

scintilla/src/ContractionState.cxx

scintilla/src/ContractionState.h

scintilla/src/Decoration.cxx

scintilla/src/Decoration.h

scintilla/src/Document.cxx

scintilla/src/Document.h

scintilla/src/Editor.cxx

scintilla/src/Editor.h

scintilla/src/ExternalLexer.cxx

scintilla/src/ExternalLexer.h

scintilla/src/FontQuality.h

scintilla/src/Indicator.cxx

scintilla/src/Indicator.h

scintilla/src/KeyMap.cxx

scintilla/src/KeyMap.h

scintilla/src/LineMarker.cxx

scintilla/src/LineMarker.h

scintilla/src/Partitioning.h

scintilla/src/PerLine.cxx

scintilla/src/PerLine.h

scintilla/src/PositionCache.cxx

scintilla/src/PositionCache.h

scintilla/src/RESearch.cxx

scintilla/src/RESearch.h

scintilla/src/RunStyles.cxx

scintilla/src/RunStyles.h

scintilla/src/SVector.h

scintilla/src/ScintillaBase.cxx

scintilla/src/ScintillaBase.h

scintilla/src/Selection.cxx

scintilla/src/Selection.h

scintilla/src/SplitVector.h

scintilla/src/Style.cxx

scintilla/src/Style.h

scintilla/src/UniConversion.cxx

scintilla/src/UniConversion.h

scintilla/src/ViewStyle.cxx

scintilla/src/ViewStyle.h

scintilla/src/XPM.cxx

scintilla/src/XPM.h

scintilla/version.txt

tagmanager/cobol.c

tagmanager/mio

tagmanager/mio/COPYING

tagmanager/mio/Makefile.am

tagmanager/mio/Makefile.in

tagmanager/mio/README

tagmanager/mio/mio-file.c

tagmanager/mio/mio-memory.c

tagmanager/mio/mio.c

tagmanager/mio/mio.h

files removed:
debian/README.source

debian/geany.doc-base

debian/install

debian/manpages

debian/menu

debian/patches/00list

debian/patches/20_add_debian_specific_filetypes.dpatch

debian/patches/20_change_gpl_location.dpatch

debian/patches/20_debian_control_tags.dpatch

debian/patches/20_use_evince_viewer.dpatch

debian/patches/20_use_sensible_browser.dpatch

debian/patches/20_use_x_terminal_emulator.dpatch

doc/images/pref_dialog_interface.png

doc/images/pref_dialog_toolbar.png

scintilla/AutoComplete.cxx

scintilla/AutoComplete.h

scintilla/CallTip.cxx

scintilla/CallTip.h

scintilla/CellBuffer.cxx

scintilla/CellBuffer.h

scintilla/CharClassify.cxx

scintilla/CharClassify.h

scintilla/CharacterSet.h

scintilla/ContractionState.cxx

scintilla/ContractionState.h

scintilla/Converter.h

scintilla/Decoration.cxx

scintilla/Decoration.h

scintilla/Document.cxx

scintilla/Document.h

scintilla/DocumentAccessor.cxx

scintilla/DocumentAccessor.h

scintilla/Editor.cxx

scintilla/Editor.h

scintilla/ExternalLexer.cxx

scintilla/ExternalLexer.h

scintilla/Indicator.cxx

scintilla/Indicator.h

scintilla/KeyMap.cxx

scintilla/KeyMap.h

scintilla/KeyWords.cxx

scintilla/LexAda.cxx

scintilla/LexAsm.cxx

scintilla/LexBash.cxx

scintilla/LexBasic.cxx

scintilla/LexCPP.cxx

scintilla/LexCSS.cxx

scintilla/LexCaml.cxx

scintilla/LexCmake.cxx

scintilla/LexD.cxx

scintilla/LexErlang.cxx

scintilla/LexForth.cxx

scintilla/LexFortran.cxx

scintilla/LexHTML.cxx

scintilla/LexHaskell.cxx

scintilla/LexLisp.cxx

scintilla/LexLua.cxx

scintilla/LexMarkdown.cxx

scintilla/LexMatlab.cxx

scintilla/LexNsis.cxx

scintilla/LexOthers.cxx

scintilla/LexPascal.cxx

scintilla/LexPerl.cxx

scintilla/LexPython.cxx

scintilla/LexR.cxx

scintilla/LexRuby.cxx

scintilla/LexSQL.cxx

scintilla/LexTCL.cxx

scintilla/LexTxt2tags.cxx

scintilla/LexVHDL.cxx

scintilla/LexVerilog.cxx

scintilla/LexYAML.cxx

scintilla/LineMarker.cxx

scintilla/LineMarker.h

scintilla/Partitioning.h

scintilla/PerLine.cxx

scintilla/PerLine.h

scintilla/PlatGTK.cxx

scintilla/PositionCache.cxx

scintilla/PositionCache.h

scintilla/PropSet.cxx

scintilla/PropSetSimple.h

scintilla/RESearch.cxx

scintilla/RESearch.h

scintilla/RunStyles.cxx

scintilla/RunStyles.h

scintilla/SVector.h

scintilla/ScintillaBase.cxx

scintilla/ScintillaBase.h

scintilla/ScintillaGTK.cxx

scintilla/Selection.cxx

scintilla/Selection.h

scintilla/SplitVector.h

scintilla/Style.cxx

scintilla/Style.h

scintilla/StyleContext.cxx

scintilla/StyleContext.h

scintilla/UniConversion.cxx

scintilla/UniConversion.h

scintilla/ViewStyle.cxx

scintilla/ViewStyle.h

scintilla/WindowAccessor.cxx

scintilla/XPM.cxx

scintilla/XPM.h

scintilla/include/Accessor.h

scintilla/include/KeyWords.h

scintilla/include/PropSet.h

scintilla/include/SString.h

scintilla/include/WindowAccessor.h

scintilla/scintilla-marshal.c

scintilla/scintilla-marshal.h

scintilla/scintilla-marshal.list

files modified:
AUTHORS

ChangeLog

HACKING

Makefile.in

NEWS

README

README.Packagers

THANKS

TODO

aclocal.m4

config.guess

config.h.in

config.sub

configure

configure.ac

data/colorschemes/alt.conf

data/filetype_extensions.conf

data/filetypes.Genie.conf

data/filetypes.actionscript

data/filetypes.ada

data/filetypes.asm

data/filetypes.c

data/filetypes.caml

data/filetypes.cmake

data/filetypes.conf

data/filetypes.cpp

data/filetypes.cs

data/filetypes.css

data/filetypes.d

data/filetypes.diff

data/filetypes.docbook

data/filetypes.erlang

data/filetypes.f77

data/filetypes.ferite

data/filetypes.forth

data/filetypes.fortran

data/filetypes.freebasic

data/filetypes.glsl

data/filetypes.haskell

data/filetypes.haxe

data/filetypes.html

data/filetypes.java

data/filetypes.javascript

data/filetypes.latex

data/filetypes.lisp

data/filetypes.lua

data/filetypes.makefile

data/filetypes.matlab

data/filetypes.nsis

data/filetypes.pascal

data/filetypes.perl

data/filetypes.php

data/filetypes.po

data/filetypes.python

data/filetypes.r

data/filetypes.restructuredtext

data/filetypes.ruby

data/filetypes.sh

data/filetypes.sql

data/filetypes.tcl

data/filetypes.vala

data/filetypes.verilog

data/filetypes.vhdl

data/filetypes.xml

data/filetypes.yaml

debian/changelog

debian/compat

debian/control

debian/rules

depcomp

doc/Makefile.in

doc/geany.1

doc/geany.1.in

doc/geany.html

doc/geany.txt

doc/images/build_menu_commands_dialog.png

doc/images/find_in_files_dialog.png

doc/images/pref_dialog_edit_completions.png

doc/images/pref_dialog_edit_display.png

doc/images/pref_dialog_edit_features.png

doc/images/pref_dialog_edit_indentation.png

doc/images/pref_dialog_files.png

doc/images/pref_dialog_gen_misc.png

doc/images/pref_dialog_gen_startup.png

doc/images/pref_dialog_keys.png

doc/images/pref_dialog_printing.png

doc/images/pref_dialog_templ.png

doc/images/pref_dialog_tools.png

doc/images/pref_dialog_vte.png

doc/plugins.dox

doc/pluginsignals.c

doc/pluginsymbols.c

geany.glade

geany.pc.in

geany.spec

geany_private.rc

icons/16x16/Makefile.in

icons/48x48/Makefile.in

icons/Makefile.in

icons/scalable/Makefile.in

install-sh

intltool-extract.in

intltool-merge.in

intltool-update.in

ltmain.sh *

makefile.win32

missing

mkinstalldirs

plugins/Makefile.am

plugins/Makefile.in

plugins/classbuilder.c

plugins/demoplugin.c

plugins/export.c

plugins/filebrowser.c

plugins/geanyfunctions.h

plugins/geanyplugin.h

plugins/genapi.py

plugins/htmlchars.c

plugins/makefile.win32

plugins/saveactions.c

plugins/splitwindow.c

po/ChangeLog

po/Makefile.in.in

po/POTFILES.in

po/ast.po

po/be.po

po/bg.po

po/ca.po

po/cs.po

po/de.po

po/el.po

po/en_GB.po

po/es.po

po/fi.po

po/fr.po

po/gl.po

po/hu.po

po/it.po

po/ja.po

po/kk.po

po/ko.po

po/lb.po

po/nl.po

po/pl.po

po/pt.po

po/pt_BR.po

po/ro.po

po/ru.po

po/sl.po

po/sv.po

po/tr.po

po/uk.po

po/vi.po

po/zh_CN.po

po/zh_TW.po

scintilla/Makefile.am

scintilla/Makefile.in

scintilla/include/Makefile.am

scintilla/include/Makefile.in

scintilla/include/Platform.h

scintilla/include/SciLexer.h

scintilla/include/Scintilla.h

scintilla/include/Scintilla.iface

scintilla/include/ScintillaWidget.h

scintilla/makefile.win32

src/Makefile.am

src/Makefile.in

src/about.c

src/about.h

src/build.c

src/build.h

src/callbacks.c

src/callbacks.h

src/dialogs.c

src/dialogs.h

src/document.c

src/document.h

src/documentprivate.h

src/editor.c

src/editor.h

src/encodings.c

src/encodings.h

src/filetypes.c

src/filetypes.h

src/filetypesprivate.h

src/gb.c

src/geany.h

src/geanyentryaction.c

src/geanyentryaction.h

src/geanymenubuttonaction.c

src/geanymenubuttonaction.h

src/geanyobject.c

src/geanyobject.h

src/geanywraplabel.c

src/geanywraplabel.h

src/highlighting.c

src/highlighting.h

src/images.c

src/interface.c

src/keybindings.c

src/keybindings.h

src/keyfile.c

src/keyfile.h

src/log.c

src/log.h

src/main.c

src/main.h

src/makefile.win32

src/msgwindow.c

src/msgwindow.h

src/navqueue.c

src/navqueue.h

src/notebook.c

src/notebook.h

src/plugindata.h

src/pluginprivate.h

src/plugins.c

src/plugins.h

src/pluginutils.c

src/pluginutils.h

src/prefix.c

src/prefs.c

src/prefs.h

src/printing.c

src/printing.h

src/project.c

src/project.h

src/projectprivate.h

src/sciwrappers.c

src/sciwrappers.h

src/search.c

src/search.h

src/sidebar.c

src/sidebar.h

src/socket.c

src/socket.h

src/stash.c

src/stash.h

src/support.h

src/symbols.c

src/symbols.h

src/templates.c

src/templates.h

src/toolbar.c

src/toolbar.h

src/tools.c

src/tools.h

src/ui_utils.c

src/ui_utils.h

src/utils.c

src/utils.h

src/vte.c

src/vte.h

src/win32.c

src/win32.h

tagmanager/Makefile.am

tagmanager/Makefile.in

tagmanager/c.c

tagmanager/conf.c

tagmanager/diff.c

tagmanager/entry.c

tagmanager/entry.h

tagmanager/fortran.c

tagmanager/get.c

tagmanager/get.h

tagmanager/haskell.c

tagmanager/include/Makefile.in

tagmanager/include/tm_tag.h

tagmanager/include/tm_workspace.h

tagmanager/js.c

tagmanager/latex.c

tagmanager/lregex.c

tagmanager/makefile.win32

tagmanager/nestlevel.c

tagmanager/nestlevel.h

tagmanager/nsis.c

tagmanager/parse.c

tagmanager/parsers.h

tagmanager/php.c

tagmanager/python.c

tagmanager/read.c

tagmanager/read.h

tagmanager/rest.c

tagmanager/sort.c

tagmanager/sql.c

tagmanager/strlist.c

tagmanager/tm_file_entry.c

tagmanager/tm_project.c

tagmanager/tm_source_file.c

tagmanager/tm_symbol.c

tagmanager/tm_tag.c

tagmanager/tm_workspace.c

win32-config.h

wscript

Show diffs side-by-side

added added

removed removed

scintilla/RESearch.cxx

// Scintilla source code edit control

/** @file RESearch.cxx

** Regular expression search library.

**/

* regex - Regular expression pattern matching and replacement

* By: Ozan S. Yigit (oz)

* Dept. of Computer Science

* York University

* Original code available from http://www.cs.yorku.ca/~oz/

* Translation to C++ by Neil Hodgson neilh@scintilla.org

* Removed all use of register.

* Converted to modern function prototypes.

* Put all global/static variables into an object so this code can be

* used from multiple threads, etc.

* Some extensions by Philippe Lhoste PhiLho(a)GMX.net

* These routines are the PUBLIC DOMAIN equivalents of regex

* routines as found in 4.nBSD UN*X, with minor extensions.

* These routines are derived from various implementations found

* in software tools books, and Conroy's grep. They are NOT derived

* from licensed/restricted software.

* For more interesting/academic/complicated implementations,

* see Henry Spencer's regexp routines, or GNU Emacs pattern

* matching module.

* Modification history removed.

* Interfaces:

* RESearch::Compile: compile a regular expression into a NFA.

* const char *RESearch::Compile(const char *pattern, int length,

* bool caseSensitive, bool posix)

* Returns a short error string if they fail.

* RESearch::Execute: execute the NFA to match a pattern.

* int RESearch::Execute(characterIndexer &ci, int lp, int endp)

* RESearch::Substitute: substitute the matched portions in a new string.

* int RESearch::Substitute(CharacterIndexer &ci, char *src, char *dst)

* re_fail: failure routine for RESearch::Execute. (no longer used)

* void re_fail(char *msg, char op)

* Regular Expressions:

* [1] char matches itself, unless it is a special

* character (metachar): . \ [ ] * + ^ $

* and ( ) if posix option.

* [2] . matches any character.

* [3] \ matches the character following it, except:

* - \a, \b, \f, \n, \r, \t, \v match the corresponding C

* escape char, respectively BEL, BS, FF, LF, CR, TAB and VT;

* Note that \r and \n are never matched because Scintilla

* regex searches are made line per line

* (stripped of end-of-line chars).

* - if not in posix mode, when followed by a

* left or right round bracket (see [7]);

* - when followed by a digit 1 to 9 (see [8]);

* - when followed by a left or right angle bracket

* (see [9]);

* - when followed by d, D, s, S, w or W (see [10]);

* - when followed by x and two hexa digits (see [11].

* Backslash is used as an escape character for all

* other meta-characters, and itself.

* [4] [set] matches one of the characters in the set.

* If the first character in the set is "^",

* it matches the characters NOT in the set, i.e.

* complements the set. A shorthand S-E (start dash end)

* is used to specify a set of characters S up to

* E, inclusive. S and E must be characters, otherwise

* the dash is taken literally (eg. in expression [\d-a]).

* The special characters "]" and "-" have no special

* meaning if they appear as the first chars in the set.

* To include both, put - first: [-]A-Z]

* (or just backslash them).

* examples: match:

* [-]|] matches these 3 chars,

* []-|] matches from ] to | chars

* [a-z] any lowercase alpha

* [^-]] any char except - and ]

* [^A-Z] any char except uppercase

* alpha

100

101

* [a-zA-Z] any alpha

102

103

* [5] * any regular expression form [1] to [4]

104

* (except [7], [8] and [9] forms of [3]),

105

* followed by closure char (*)

106

* matches zero or more matches of that form.

107

108

* [6] + same as [5], except it matches one or more.

109

* Both [5] and [6] are greedy (they match as much as possible).

110

111

* [7] a regular expression in the form [1] to [12], enclosed

112

* as $form$ (or (form) with posix flag) matches what

113

* form matches. The enclosure creates a set of tags,

114

* used for [8] and for pattern substitution.

115

* The tagged forms are numbered starting from 1.

116

117

* [8] a \ followed by a digit 1 to 9 matches whatever a

118

* previously tagged regular expression ([7]) matched.

119

120

* [9] \< a regular expression starting with a \< construct

121

* \> and/or ending with a \> construct, restricts the

122

* pattern matching to the beginning of a word, and/or

123

* the end of a word. A word is defined to be a character

124

* string beginning and/or ending with the characters

125

* A-Z a-z 0-9 and _. Scintilla extends this definition

126

* by user setting. The word must also be preceded and/or

127

* followed by any character outside those mentioned.

128

129

* [10] \l a backslash followed by d, D, s, S, w or W,

130

* becomes a character class (both inside and

131

* outside sets []).

132

* d: decimal digits

133

* D: any char except decimal digits

134

* s: whitespace (space, \t \n \r \f \v)

135

* S: any char except whitespace (see above)

136

* w: alphanumeric & underscore (changed by user setting)

137

* W: any char except alphanumeric & underscore (see above)

138

139

* [11] \xHH a backslash followed by x and two hexa digits,

140

* becomes the character whose Ascii code is equal

141

* to these digits. If not followed by two digits,

142

* it is 'x' char itself.

143

144

* [12] a composite regular expression xy where x and y

145

* are in the form [1] to [11] matches the longest

146

* match of x followed by a match for y.

147

148

* [13] ^ a regular expression starting with a ^ character

149

* $ and/or ending with a $ character, restricts the

150

* pattern matching to the beginning of the line,

151

* or the end of line. [anchors] Elsewhere in the

152

* pattern, ^ and $ are treated as ordinary characters.

153

154

155

* Acknowledgements:

156

157

* HCR's Hugh Redelmeier has been most helpful in various

158

* stages of development. He convinced me to include BOW

159

* and EOW constructs, originally invented by Rob Pike at

160

* the University of Toronto.

161

162

* References:

163

* Software tools Kernighan & Plauger

164

* Software tools in Pascal Kernighan & Plauger

165

* Grep [rsx-11 C dist] David Conroy

166

* ed - text editor Un*x Programmer's Manual

167

* Advanced editing on Un*x B. W. Kernighan

168

* RegExp routines Henry Spencer

169

170

* Notes:

171

172

* This implementation uses a bit-set representation for character

173

* classes for speed and compactness. Each character is represented

174

* by one bit in a 256-bit block. Thus, CCL always takes a

175

* constant 32 bytes in the internal nfa, and RESearch::Execute does a single

176

* bit comparison to locate the character in the set.

177

178

* Examples:

179

180

* pattern: foo*.*

181

* compile: CHR f CHR o CLO CHR o END CLO ANY END END

182

* matches: fo foo fooo foobar fobar foxx ...

183

184

* pattern: fo[ob]a[rz]

185

* compile: CHR f CHR o CCL bitset CHR a CCL bitset END

186

* matches: fobar fooar fobaz fooaz

187

188

* pattern: foo\\+

189

* compile: CHR f CHR o CHR o CHR \ CLO CHR \ END END

190

* matches: foo\ foo\\ foo\\\ ...

191

192

* pattern: $foo$[1-3]\1 (same as foo[1-3]foo)

193

* compile: BOT 1 CHR f CHR o CHR o EOT 1 CCL bitset REF 1 END

194

* matches: foo1foo foo2foo foo3foo

195

196

* pattern: $fo.*$-\1

197

* compile: BOT 1 CHR f CHR o CLO ANY END EOT 1 CHR - REF 1 END

198

* matches: foo-foo fo-fo fob-fob foobar-foobar ...

199

200

201

#include <stdlib.h>

202

203

#include "CharClassify.h"

204

#include "RESearch.h"

205

206

// Shut up annoying Visual C++ warnings:

207

#ifdef _MSC_VER

208

#pragma warning(disable: 4514)

209

#endif

210

211

#ifdef SCI_NAMESPACE

212

using namespace Scintilla;

213

#endif

214

215

#define OKP 1

216

#define NOP 0

217

218

#define CHR 1

219

#define ANY 2

220

#define CCL 3

221

#define BOL 4

222

#define EOL 5

223

#define BOT 6

224

#define EOT 7

225

#define BOW 8

226

#define EOW 9

227

#define REF 10

228

#define CLO 11

229

230

#define END 0

231

232

233

* The following defines are not meant to be changeable.

234

* They are for readability only.

235

236

#define BLKIND 0370

237

#define BITIND 07

238

239

const char bitarr[] = { 1, 2, 4, 8, 16, 32, 64, '\200' };

240

241

#define badpat(x) (*nfa = END, x)

242

243

244

* Character classification table for word boundary operators BOW

245

* and EOW is passed in by the creator of this object (Scintilla

246

* Document). The Document default state is that word chars are:

247

* 0-9, a-z, A-Z and _

248

249

250

RESearch::RESearch(CharClassify *charClassTable) {

251

failure = 0;

252

charClass = charClassTable;

253

Init();

254

}

255

256

RESearch::~RESearch() {

257

Clear();

258

}

259

260

void RESearch::Init() {

261

sta = NOP; /* status of lastpat */

262

bol = 0;

263

for (int i = 0; i < MAXTAG; i++)

264

pat[i] = 0;

265

for (int j = 0; j < BITBLK; j++)

266

bittab[j] = 0;

267

}

268

269

void RESearch::Clear() {

270

for (int i = 0; i < MAXTAG; i++) {

271

delete []pat[i];

272

pat[i] = 0;

273

bopat[i] = NOTFOUND;

274

eopat[i] = NOTFOUND;

275

}

276

}

277

278

bool RESearch::GrabMatches(CharacterIndexer &ci) {

279

bool success = true;

280

for (unsigned int i = 0; i < MAXTAG; i++) {

281

if ((bopat[i] != NOTFOUND) && (eopat[i] != NOTFOUND)) {

282

unsigned int len = eopat[i] - bopat[i];

283

pat[i] = new char[len + 1];

284

if (pat[i]) {

285

for (unsigned int j = 0; j < len; j++)

286

pat[i][j] = ci.CharAt(bopat[i] + j);

287

pat[i][len] = '\0';

288

} else {

289

success = false;

290

}

291

}

292

}

293

return success;

294

}

295

296

void RESearch::ChSet(unsigned char c) {

297

bittab[((c) & BLKIND) >> 3] |= bitarr[(c) & BITIND];

298

}

299

300

void RESearch::ChSetWithCase(unsigned char c, bool caseSensitive) {

301

if (caseSensitive) {

302

ChSet(c);

303

} else {

304

if ((c >= 'a') && (c <= 'z')) {

305

ChSet(c);

306

ChSet(static_cast<unsigned char>(c - 'a' + 'A'));

307

} else if ((c >= 'A') && (c <= 'Z')) {

308

ChSet(c);

309

ChSet(static_cast<unsigned char>(c - 'A' + 'a'));

310

} else {

311

ChSet(c);

312

}

313

}

314

}

315

316

const unsigned char escapeValue(unsigned char ch) {

317

switch (ch) {

318

case 'a': return '\a';

319

case 'b': return '\b';

320

case 'f': return '\f';

321

case 'n': return '\n';

322

case 'r': return '\r';

323

case 't': return '\t';

324

case 'v': return '\v';

325

}

326

return 0;

327

}

328

329

static int GetHexaChar(unsigned char hd1, unsigned char hd2) {

330

int hexValue = 0;

331

if (hd1 >= '0' && hd1 <= '9') {

332

hexValue += 16 * (hd1 - '0');

333

} else if (hd1 >= 'A' && hd1 <= 'F') {

334

hexValue += 16 * (hd1 - 'A' + 10);

335

} else if (hd1 >= 'a' && hd1 <= 'f') {

336

hexValue += 16 * (hd1 - 'a' + 10);

337

} else

338

return -1;

339

if (hd2 >= '0' && hd2 <= '9') {

340

hexValue += hd2 - '0';

341

} else if (hd2 >= 'A' && hd2 <= 'F') {

342

hexValue += hd2 - 'A' + 10;

343

} else if (hd2 >= 'a' && hd2 <= 'f') {

344

hexValue += hd2 - 'a' + 10;

345

} else

346

return -1;

347

return hexValue;

348

}

349

350

/**

351

* Called when the parser finds a backslash not followed

352

* by a valid expression (like \( in non-Posix mode).

353

* @param pattern: pointer on the char after the backslash.

354

* @param incr: (out) number of chars to skip after expression evaluation.

355

* @return the char if it resolves to a simple char,

356

* or -1 for a char class. In this case, bittab is changed.

357

358

int RESearch::GetBackslashExpression(

359

const char *pattern,

360

int &incr) {

361

// Since error reporting is primitive and messages are not used anyway,

362

// I choose to interpret unexpected syntax in a logical way instead

363

// of reporting errors. Otherwise, we can stick on, eg., PCRE behavior.

364

incr = 0; // Most of the time, will skip the char "naturally".

365

int c;

366

int result = -1;

367

unsigned char bsc = *pattern;

368

if (!bsc) {

369

// Avoid overrun

370

result = '\\'; // \ at end of pattern, take it literally

371

return result;

372

}

373

374

switch (bsc) {

375

case 'a':

376

case 'b':

377

case 'n':

378

case 'f':

379

case 'r':

380

case 't':

381

case 'v':

382

result = escapeValue(bsc);

383

break;

384

case 'x': {

385

unsigned char hd1 = *(pattern + 1);

386

unsigned char hd2 = *(pattern + 2);

387

int hexValue = GetHexaChar(hd1, hd2);

388

if (hexValue >= 0) {

389

result = hexValue;

390

incr = 2; // Must skip the digits

391

} else {

392

result = 'x'; // \x without 2 digits: see it as 'x'

393

}

394

}

395

break;

396

case 'd':

397

for (c = '0'; c <= '9'; c++) {

398

ChSet(static_cast<unsigned char>(c));

399

}

400

break;

401

case 'D':

402

for (c = 0; c < MAXCHR; c++) {

403

if (c < '0' || c > '9') {

404

ChSet(static_cast<unsigned char>(c));

405

}

406

}

407

break;

408

case 's':

409

ChSet(' ');

410

ChSet('\t');

411

ChSet('\n');

412

ChSet('\r');

413

ChSet('\f');

414

ChSet('\v');

415

break;

416

case 'S':

417

for (c = 0; c < MAXCHR; c++) {

418

if (c != ' ' && !(c >= 0x09 && c <= 0x0D)) {

419

ChSet(static_cast<unsigned char>(c));

420

}

421

}

422

break;

423

case 'w':

424

for (c = 0; c < MAXCHR; c++) {

425

if (iswordc(static_cast<unsigned char>(c))) {

426

ChSet(static_cast<unsigned char>(c));

427

}

428

}

429

break;

430

case 'W':

431

for (c = 0; c < MAXCHR; c++) {

432

if (!iswordc(static_cast<unsigned char>(c))) {

433

ChSet(static_cast<unsigned char>(c));

434

}

435

}

436

break;

437

default:

438

result = bsc;

439

}

440

return result;

441

}

442

443

const char *RESearch::Compile(const char *pattern, int length, bool caseSensitive, bool posix) {

444

char *mp=nfa; /* nfa pointer */

445

char *lp; /* saved pointer */

446

char *sp=nfa; /* another one */

447

char *mpMax = mp + MAXNFA - BITBLK - 10;

448

449

int tagi = 0; /* tag stack index */

450

int tagc = 1; /* actual tag count */

451

452

int n;

453

char mask; /* xor mask -CCL/NCL */

454

int c1, c2, prevChar;

455

456

if (!pattern || !length) {

457

if (sta)

458

return 0;

459

else

460

return badpat("No previous regular expression");

461

}

462

sta = NOP;

463

464

const char *p=pattern; /* pattern pointer */

465

for (int i=0; i<length; i++, p++) {

466

if (mp > mpMax)

467

return badpat("Pattern too long");

468

lp = mp;

469

switch (*p) {

470

471

case '.': /* match any char */

472

*mp++ = ANY;

473

break;

474

475

case '^': /* match beginning */

476

if (p == pattern)

477

*mp++ = BOL;

478

else {

479

*mp++ = CHR;

480

*mp++ = *p;

481

}

482

break;

483

484

case '$': /* match endofline */

485

if (!*(p+1))

486

*mp++ = EOL;

487

else {

488

*mp++ = CHR;

489

*mp++ = *p;

490

}

491

break;

492

493

case '[': /* match char class */

494

*mp++ = CCL;

495

prevChar = 0;

496

497

i++;

498

if (*++p == '^') {

499

mask = '\377';

500

i++;

501

p++;

502

} else

503

mask = 0;

504

505

if (*p == '-') { /* real dash */

506

i++;

507

prevChar = *p;

508

ChSet(*p++);

509

}

510

if (*p == ']') { /* real brace */

511

i++;

512

prevChar = *p;

513

ChSet(*p++);

514

}

515

while (*p && *p != ']') {

516

if (*p == '-') {

517

if (prevChar < 0) {

518

// Previous def. was a char class like \d, take dash literally

519

prevChar = *p;

520

ChSet(*p);

521

} else if (*(p+1)) {

522

if (*(p+1) != ']') {

523

c1 = prevChar + 1;

524

i++;

525

c2 = *++p;

526

if (c2 == '\\') {

527

if (!*(p+1)) // End of RE

528

return badpat("Missing ]");

529

else {

530

i++;

531

p++;

532

int incr;

533

c2 = GetBackslashExpression(p, incr);

534

i += incr;

535

p += incr;

536

if (c2 >= 0) {

537

// Convention: \c (c is any char) is case sensitive, whatever the option

538

ChSet(static_cast<unsigned char>(c2));

539

prevChar = c2;

540

} else {

541

// bittab is already changed

542

prevChar = -1;

543

}

544

}

545

}

546

if (prevChar < 0) {

547

// Char after dash is char class like \d, take dash literally

548

prevChar = '-';

549

ChSet('-');

550

} else {

551

// Put all chars between c1 and c2 included in the char set

552

while (c1 <= c2) {

553

ChSetWithCase(static_cast<unsigned char>(c1++), caseSensitive);

554

}

555

}

556

} else {

557

// Dash before the ], take it literally

558

prevChar = *p;

559

ChSet(*p);

560

}

561

} else {

562

return badpat("Missing ]");

563

}

564

} else if (*p == '\\' && *(p+1)) {

565

i++;

566

p++;

567

int incr;

568

int c = GetBackslashExpression(p, incr);

569

i += incr;

570

p += incr;

571

if (c >= 0) {

572

// Convention: \c (c is any char) is case sensitive, whatever the option

573

ChSet(static_cast<unsigned char>(c));

574

prevChar = c;

575

} else {

576

// bittab is already changed

577

prevChar = -1;

578

}

579

} else {

580

prevChar = *p;

581

ChSetWithCase(*p, caseSensitive);

582

}

583

i++;

584

p++;

585

}

586

if (!*p)

587

return badpat("Missing ]");

588

589

for (n = 0; n < BITBLK; bittab[n++] = 0)

590

*mp++ = static_cast<char>(mask ^ bittab[n]);

591

592

break;

593

594

case '*': /* match 0 or more... */

595

case '+': /* match 1 or more... */

596

if (p == pattern)

597

return badpat("Empty closure");

598

lp = sp; /* previous opcode */

599

if (*lp == CLO) /* equivalence... */

600

break;

601

switch (*lp) {

602

603

case BOL:

604

case BOT:

605

case EOT:

606

case BOW:

607

case EOW:

608

case REF:

609

return badpat("Illegal closure");

610

default:

611

break;

612

}

613

614

if (*p == '+')

615

for (sp = mp; lp < sp; lp++)

616

*mp++ = *lp;

617

618

*mp++ = END;

619

*mp++ = END;

620

sp = mp;

621

while (--mp > lp)

622

*mp = mp[-1];

623

*mp = CLO;

624

mp = sp;

625

break;

626

627

case '\\': /* tags, backrefs... */

628

i++;

629

switch (*++p) {

630

case '<':

631

*mp++ = BOW;

632

break;

633

case '>':

634

if (*sp == BOW)

635

return badpat("Null pattern inside \\<\\>");

636

*mp++ = EOW;

637

break;

638

case '1':

639

case '2':

640

case '3':

641

case '4':

642

case '5':

643

case '6':

644

case '7':

645

case '8':

646

case '9':

647

n = *p-'0';

648

if (tagi > 0 && tagstk[tagi] == n)

649

return badpat("Cyclical reference");

650

if (tagc > n) {

651

*mp++ = static_cast<char>(REF);

652

*mp++ = static_cast<char>(n);

653

} else

654

return badpat("Undetermined reference");

655

break;

656

default:

657

if (!posix && *p == '(') {

658

if (tagc < MAXTAG) {

659

tagstk[++tagi] = tagc;

660

*mp++ = BOT;

661

*mp++ = static_cast<char>(tagc++);

662

} else

663

return badpat("Too many \$\$ pairs");

664

} else if (!posix && *p == ')') {

665

if (*sp == BOT)

666

return badpat("Null pattern inside \$\$");

667

if (tagi > 0) {

668

*mp++ = static_cast<char>(EOT);

669

*mp++ = static_cast<char>(tagstk[tagi--]);

670

} else

671

return badpat("Unmatched \\)");

672

} else {

673

int incr;

674

int c = GetBackslashExpression(p, incr);

675

i += incr;

676

p += incr;

677

if (c >= 0) {

678

*mp++ = CHR;

679

*mp++ = static_cast<unsigned char>(c);

680

} else {

681

*mp++ = CCL;

682

mask = 0;

683

for (n = 0; n < BITBLK; bittab[n++] = 0)

684

*mp++ = static_cast<char>(mask ^ bittab[n]);

685

}

686

}

687

}

688

break;

689

690

default : /* an ordinary char */

691

if (posix && *p == '(') {

692

if (tagc < MAXTAG) {

693

tagstk[++tagi] = tagc;

694

*mp++ = BOT;

695

*mp++ = static_cast<char>(tagc++);

696

} else

697

return badpat("Too many () pairs");

698

} else if (posix && *p == ')') {

699

if (*sp == BOT)

700

return badpat("Null pattern inside ()");

701

if (tagi > 0) {

702

*mp++ = static_cast<char>(EOT);

703

*mp++ = static_cast<char>(tagstk[tagi--]);

704

} else

705

return badpat("Unmatched )");

706

} else {

707

unsigned char c = *p;

708

if (!c) // End of RE

709

c = '\\'; // We take it as raw backslash

710

if (caseSensitive || !iswordc(c)) {

711

*mp++ = CHR;

712

*mp++ = c;

713

} else {

714

*mp++ = CCL;

715

mask = 0;

716

ChSetWithCase(c, false);

717

for (n = 0; n < BITBLK; bittab[n++] = 0)

718

*mp++ = static_cast<char>(mask ^ bittab[n]);

719

}

720

}

721

break;

722

}

723

sp = lp;

724

}

725

if (tagi > 0)

726

return badpat((posix ? "Unmatched (" : "Unmatched \\("));

727

*mp = END;

728

sta = OKP;

729

return 0;

730

}

731

732

733

* RESearch::Execute:

734

* execute nfa to find a match.

735

736

* special cases: (nfa[0])

737

* BOL

738

* Match only once, starting from the

739

* beginning.

740

* CHR

741

* First locate the character without

742

* calling PMatch, and if found, call

743

* PMatch for the remaining string.

744

* END

745

* RESearch::Compile failed, poor luser did not

746

* check for it. Fail fast.

747

748

* If a match is found, bopat[0] and eopat[0] are set

749

* to the beginning and the end of the matched fragment,

750

* respectively.

751

752

753

int RESearch::Execute(CharacterIndexer &ci, int lp, int endp) {

754

unsigned char c;

755

int ep = NOTFOUND;

756

char *ap = nfa;

757

758

bol = lp;

759

failure = 0;

760

761

Clear();

762

763

switch (*ap) {

764

765

case BOL: /* anchored: match from BOL only */

766

ep = PMatch(ci, lp, endp, ap);

767

break;

768

case EOL: /* just searching for end of line normal path doesn't work */

769

if (*(ap+1) == END) {

770

lp = endp;

771

ep = lp;

772

break;

773

} else {

774

return 0;

775

}

776

case CHR: /* ordinary char: locate it fast */

777

c = *(ap+1);

778

while ((lp < endp) && (ci.CharAt(lp) != c))

779

lp++;

780

if (lp >= endp) /* if EOS, fail, else fall thru. */

781

return 0;

782

default: /* regular matching all the way. */

783

while (lp < endp) {

784

ep = PMatch(ci, lp, endp, ap);

785

if (ep != NOTFOUND)

786

break;

787

lp++;

788

}

789

break;

790

case END: /* munged automaton. fail always */

791

return 0;

792

}

793

if (ep == NOTFOUND)

794

return 0;

795

796

bopat[0] = lp;

797

eopat[0] = ep;

798

return 1;

799

}

800

801

802

* PMatch: internal routine for the hard part

803

804

* This code is partly snarfed from an early grep written by

805

* David Conroy. The backref and tag stuff, and various other

806

* innovations are by oz.

807

808

* special case optimizations: (nfa[n], nfa[n+1])

809

* CLO ANY

810

* We KNOW .* will match everything upto the

811

* end of line. Thus, directly go to the end of

812

* line, without recursive PMatch calls. As in

813

* the other closure cases, the remaining pattern

814

* must be matched by moving backwards on the

815

* string recursively, to find a match for xy

816

* (x is ".*" and y is the remaining pattern)

817

* where the match satisfies the LONGEST match for

818

* x followed by a match for y.

819

* CLO CHR

820

* We can again scan the string forward for the

821

* single char and at the point of failure, we

822

* execute the remaining nfa recursively, same as

823

* above.

824

825

* At the end of a successful match, bopat[n] and eopat[n]

826

* are set to the beginning and end of subpatterns matched

827

* by tagged expressions (n = 1 to 9).

828

829

830

extern void re_fail(char *,char);

831

832

#define isinset(x,y) ((x)[((y)&BLKIND)>>3] & bitarr[(y)&BITIND])

833

834

835

* skip values for CLO XXX to skip past the closure

836

837

838

#define ANYSKIP 2 /* [CLO] ANY END */

839

#define CHRSKIP 3 /* [CLO] CHR chr END */

840

#define CCLSKIP 34 /* [CLO] CCL 32 bytes END */

841

842

int RESearch::PMatch(CharacterIndexer &ci, int lp, int endp, char *ap) {

843

int op, c, n;

844

int e; /* extra pointer for CLO */

845

int bp; /* beginning of subpat... */

846

int ep; /* ending of subpat... */

847

int are; /* to save the line ptr. */

848

849

while ((op = *ap++) != END)

850

switch (op) {

851

852

case CHR:

853

if (ci.CharAt(lp++) != *ap++)

854

return NOTFOUND;

855

break;

856

case ANY:

857

if (lp++ >= endp)

858

return NOTFOUND;

859

break;

860

case CCL:

861

if (lp >= endp)

862

return NOTFOUND;

863

c = ci.CharAt(lp++);

864

if (!isinset(ap,c))

865

return NOTFOUND;

866

ap += BITBLK;

867

break;

868

case BOL:

869

if (lp != bol)

870

return NOTFOUND;

871

break;

872

case EOL:

873

if (lp < endp)

874

return NOTFOUND;

875

break;

876

case BOT:

877

bopat[*ap++] = lp;

878

break;

879

case EOT:

880

eopat[*ap++] = lp;

881

break;

882

case BOW:

883

if ((lp!=bol && iswordc(ci.CharAt(lp-1))) || !iswordc(ci.CharAt(lp)))

884

return NOTFOUND;

885

break;

886

case EOW:

887

if (lp==bol || !iswordc(ci.CharAt(lp-1)) || iswordc(ci.CharAt(lp)))

888

return NOTFOUND;

889

break;

890

case REF:

891

n = *ap++;

892

bp = bopat[n];

893

ep = eopat[n];

894

while (bp < ep)

895

if (ci.CharAt(bp++) != ci.CharAt(lp++))

896

return NOTFOUND;

897

break;

898

case CLO:

899

are = lp;

900

switch (*ap) {

901

902

case ANY:

903

while (lp < endp)

904

lp++;

905

n = ANYSKIP;

906

break;

907

case CHR:

908

c = *(ap+1);

909

while ((lp < endp) && (c == ci.CharAt(lp)))

910

lp++;

911

n = CHRSKIP;

912

break;

913

case CCL:

914

while ((lp < endp) && isinset(ap+1,ci.CharAt(lp)))

915

lp++;

916

n = CCLSKIP;

917

break;

918

default:

919

failure = true;

920

//re_fail("closure: bad nfa.", *ap);

921

return NOTFOUND;

922

}

923

924

ap += n;

925

926

while (lp >= are) {

927

if ((e = PMatch(ci, lp, endp, ap)) != NOTFOUND)

928

return e;

929

--lp;

930

}

931

return NOTFOUND;

932

default:

933

//re_fail("RESearch::Execute: bad nfa.", static_cast<char>(op));

934

return NOTFOUND;

935

}

936

return lp;

937

}

938

939

940

* RESearch::Substitute:

941

* substitute the matched portions of the src in dst.

942

943

* & substitute the entire matched pattern.

944

945

* \digit substitute a subpattern, with the given tag number.

946

* Tags are numbered from 1 to 9. If the particular

947

* tagged subpattern does not exist, null is substituted.

948

949

int RESearch::Substitute(CharacterIndexer &ci, char *src, char *dst) {

950

unsigned char c;

951

int pin;

952

int bp;

953

int ep;

954

955

if (!*src || !bopat[0])

956

return 0;

957

958

while ((c = *src++) != 0) {

959

switch (c) {

960

961

case '&':

962

pin = 0;

963

break;

964

965

case '\\':

966

c = *src++;

967

if (c >= '0' && c <= '9') {

968

pin = c - '0';

969

break;

970

}

971

972

default:

973

*dst++ = c;

974

continue;

975

}

976

977

if ((bp = bopat[pin]) != 0 && (ep = eopat[pin]) != 0) {

978

while (ci.CharAt(bp) && bp < ep)

979

*dst++ = ci.CharAt(bp++);

980

if (bp < ep)

981

return 0;

982

}

983

}

984

*dst = '\0';

985

return 1;

986

}

987

Older »