~ubuntu-branches/ubuntu/trusty/librep/trusty

« back to all changes in this revision

Viewing changes to src/regexp.3

Committer: Bazaar Package Importer
Author(s): Christian Marillat
Date: 2001-11-13 15:06:22 UTC
Revision ID: james.westby@ubuntu.com-20011113150622-vgmgmk6srj3kldr3

Tags: upstream-0.15.2

Import upstream version 0.15.2

files added:

.gdbinit

AUTHORS

BUGS

COPYING

ChangeLog

HACKING

INSTALL

Makedefs.in

Makefile.in

NEWS

README

THANKS

TODO

TREE

aclocal.m4

autogen.sh

build-info

config.guess

config.h.in

config.sub

configure

configure.in

doc/embed-1

doc/embed-2

doc/embed-3

doc/gc-protection

emulate-gnu-tar

install-aliases

install-sh

intl

intl/ChangeLog

intl/Makefile.in

intl/VERSION

intl/bindtextdom.c

intl/cat-compat.c

intl/dcgettext.c

intl/dgettext.c

intl/explodename.c

intl/finddomain.c

intl/gettext.c

intl/gettext.h

intl/gettextP.h

intl/hash-string.h

intl/intl-compat.c

intl/l10nflist.c

intl/libgettext.h

intl/linux-msg.sed

intl/loadinfo.h

intl/loadmsgcat.c

intl/localealias.c

intl/po2tbl.sed.in

intl/textdomain.c

intl/xopen-msg.sed

librep.spec

librep.spec.in

lisp

lisp/ChangeLog

lisp/Makefile.in

lisp/rep

lisp/rep.jl

lisp/rep/data

lisp/rep/data.jl

lisp/rep/data/objects.jl

lisp/rep/data/queues.jl

lisp/rep/data/records.jl

lisp/rep/data/ring.jl

lisp/rep/data/sort.jl

lisp/rep/data/string-util.jl

lisp/rep/data/symbol-table.jl

lisp/rep/i18n

lisp/rep/i18n/xgettext.jl

lisp/rep/io

lisp/rep/io/file-handlers

lisp/rep/io/file-handlers.jl

lisp/rep/io/file-handlers/remote

lisp/rep/io/file-handlers/remote.jl

lisp/rep/io/file-handlers/remote/ftp.jl

lisp/rep/io/file-handlers/remote/rcp.jl

lisp/rep/io/file-handlers/remote/rep.jl

lisp/rep/io/file-handlers/remote/utils.jl

lisp/rep/io/file-handlers/tar.jl

lisp/rep/io/file-handlers/tilde.jl

lisp/rep/io/files.jl

lisp/rep/io/streams.jl

lisp/rep/lang

lisp/rep/lang/backquote.jl

lisp/rep/lang/compat-doc.jl

lisp/rep/lang/debugger.jl

lisp/rep/lang/define.jl

lisp/rep/lang/doc.jl

lisp/rep/lang/error-helper.jl

lisp/rep/lang/interpreter.jl

lisp/rep/lang/math.jl

lisp/rep/lang/profiler.jl

lisp/rep/mail

lisp/rep/mail/addr.jl

lisp/rep/module-system.jl

lisp/rep/net

lisp/rep/net/domain-name.jl

lisp/rep/net/rpc.jl

lisp/rep/regexp.jl

lisp/rep/structures.jl

lisp/rep/system

lisp/rep/system.jl

lisp/rep/system/environ.jl

lisp/rep/system/pwd-prompt.jl

lisp/rep/test

lisp/rep/test/autoload.jl

lisp/rep/test/data.jl

lisp/rep/test/framework.jl

lisp/rep/threads

lisp/rep/threads/condition-variable.jl

lisp/rep/threads/message-port.jl

lisp/rep/threads/mutex.jl

lisp/rep/threads/utils.jl

lisp/rep/user.jl

lisp/rep/util

lisp/rep/util/autoloader.jl

lisp/rep/util/base64.jl

lisp/rep/util/date.jl

lisp/rep/util/gaol.jl

lisp/rep/util/ispell.jl

lisp/rep/util/memoize.jl

lisp/rep/util/repl.jl

lisp/rep/util/time.jl

lisp/rep/vm

lisp/rep/vm/assembler.jl

lisp/rep/vm/bytecode-defs.jl

lisp/rep/vm/bytecodes.jl

lisp/rep/vm/compiler

lisp/rep/vm/compiler.jl

lisp/rep/vm/compiler/basic.jl

lisp/rep/vm/compiler/bindings.jl

lisp/rep/vm/compiler/inline.jl

lisp/rep/vm/compiler/lap.jl

lisp/rep/vm/compiler/modules.jl

lisp/rep/vm/compiler/no-lang.jl

lisp/rep/vm/compiler/rep.jl

lisp/rep/vm/compiler/scheme.jl

lisp/rep/vm/compiler/src.jl

lisp/rep/vm/compiler/unscheme.jl

lisp/rep/vm/compiler/utils.jl

lisp/rep/vm/disassembler.jl

lisp/rep/vm/peephole.jl

lisp/rep/www

lisp/rep/www/cgi-get.jl

lisp/rep/www/fetch-url.jl

lisp/rep/www/quote-url.jl

lisp/scheme

lisp/scheme.jl

lisp/scheme/data.jl

lisp/scheme/misc.jl

lisp/scheme/syntax-funs.jl

lisp/scheme/syntax.jl

lisp/scheme/utils.jl

lisp/unscheme

lisp/unscheme.jl

lisp/unscheme/data.jl

lisp/unscheme/misc.jl

lisp/unscheme/syntax-funs.jl

lisp/unscheme/syntax.jl

ltmain.sh

man/Makefile.in

man/interface.texi

man/lang.texi

man/librep.texi

man/news.texi

man/repl.texi

mkinstalldirs

rep-debugger.el

rep.m4

rules.mk.sh

src/ChangeLog

src/Makefile.in

src/README.regexp

src/README.sdbm

src/alloca.c

src/bytecodes.h

src/continuations.c

src/datums.c

src/debug-buffer.c

src/dlmalloc.c

src/fake-libexec

src/files.c

src/find.c

src/fluids.c

src/getpagesize.h

src/gettext.c

src/librep.sym

src/lisp.c

src/lispcmds.c

src/lispmach.c

src/lispmach.h

src/macros.c

src/main.c

src/md5.c

src/md5.h

src/memcmp.c

src/message.c

src/misc.c

src/numbers.c

src/origin.c

src/readline.c

src/realpath.c

src/record-profile.c

src/regexp.3

src/regexp.c

src/regsub.c

src/rep-config.sh

src/rep-md5.c

src/rep-remote.c

src/rep-xgettext.jl

src/rep.c

src/rep.h

src/rep_config.h.in

src/rep_lisp.h

src/rep_regexp.h

src/rep_subrs.h

src/repdoc.c

src/repgdbm.c

src/repint.h

src/repint_subrs.h

src/repsdbm.c

src/safemach.c

src/sdbm.3

src/sdbm.c

src/sdbm.h

src/sdbm_hash.c

src/sdbm_pair.c

src/sdbm_pair.h

src/sdbm_tune.h

src/sockets.c

src/streams.c

src/structures.c

src/symbols.c

src/tables.c

src/timers.c

src/tuples.c

src/unix_defs.h

src/unix_dl.c

src/unix_files.c

src/unix_main.c

src/unix_processes.c

src/values.c

src/weak-refs.c

test

Show diffs side-by-side

added added

removed removed

src/regexp.3

.TH REGEXP 3 local

.DA 2 April 1986

.SH NAME

regcomp, regexec, regsub, regerror \- regular expression handler

.SH SYNOPSIS

.ft B

.nf

#include <regexp.h>

regexp *regcomp(exp)

char *exp;

int regexec(prog, string)

regexp *prog;

char *string;

regsub(prog, source, dest)

regexp *prog;

char *source;

char *dest;

regerror(msg)

char *msg;

.SH DESCRIPTION

These functions implement

.IR egrep (1)-style

regular expressions and supporting facilities.

.PP

.I Regcomp

compiles a regular expression into a structure of type

.IR regexp ,

and returns a pointer to it.

The space has been allocated using

.IR malloc (3)

and may be released by

.IR free .

.PP

.I Regexec

matches a NUL-terminated \fIstring\fR against the compiled regular expression

in \fIprog\fR.

It returns 1 for success and 0 for failure, and adjusts the contents of

\fIprog\fR's \fIstartp\fR and \fIendp\fR (see below) accordingly.

.PP

The members of a

.I regexp

structure include at least the following (not necessarily in order):

.PP

.RS

char *startp[NSUBEXP];

.br

char *endp[NSUBEXP];

.RE

.PP

where

.I NSUBEXP

is defined (as 10) in the header file.

Once a successful \fIregexec\fR has been done using the \fIregexp\fR,

each \fIstartp\fR-\fIendp\fR pair describes one substring

within the \fIstring\fR,

with the \fIstartp\fR pointing to the first character of the substring and

the \fIendp\fR pointing to the first character following the substring.

The 0th substring is the substring of \fIstring\fR that matched the whole

regular expression.

The others are those substrings that matched parenthesized expressions

within the regular expression, with parenthesized expressions numbered

in left-to-right order of their opening parentheses.

.PP

.I Regsub

copies \fIsource\fR to \fIdest\fR, making substitutions according to the

most recent \fIregexec\fR performed using \fIprog\fR.

Each instance of `&' in \fIsource\fR is replaced by the substring

indicated by \fIstartp\fR[\fI0\fR] and

\fIendp\fR[\fI0\fR].

Each instance of `\e\fIn\fR', where \fIn\fR is a digit, is replaced by

the substring indicated by

\fIstartp\fR[\fIn\fR] and

\fIendp\fR[\fIn\fR].

To get a literal `&' or `\e\fIn\fR' into \fIdest\fR, prefix it with `\e';

to get a literal `\e' preceding `&' or `\e\fIn\fR', prefix it with

another `\e'.

.PP

.I Regerror

is called whenever an error is detected in \fIregcomp\fR, \fIregexec\fR,

or \fIregsub\fR.

The default \fIregerror\fR writes the string \fImsg\fR,

with a suitable indicator of origin,

on the standard

error output

and invokes \fIexit\fR(2).

.I Regerror

can be replaced by the user if other actions are desirable.

.SH "REGULAR EXPRESSION SYNTAX"

A regular expression is zero or more \fIbranches\fR, separated by `|'.

It matches anything that matches one of the branches.

.PP

A branch is zero or more \fIpieces\fR, concatenated.

It matches a match for the first, followed by a match for the second, etc.

.PP

A piece is an \fIatom\fR possibly followed by `*', `+', or `?'.

100

An atom followed by `*' matches a sequence of 0 or more matches of the atom.

101

An atom followed by `+' matches a sequence of 1 or more matches of the atom.

102

An atom followed by `?' matches a match of the atom, or the null string.

103

.PP

104

An atom is a regular expression in parentheses (matching a match for the

105

regular expression), a \fIrange\fR (see below), `.'

106

(matching any single character), `^' (matching the null string at the

107

beginning of the input string), `$' (matching the null string at the

108

end of the input string), a `\e' followed by a single character (matching

109

that character), or a single character with no other significance

110

(matching that character).

111

.PP

112

A \fIrange\fR is a sequence of characters enclosed in `[]'.

113

It normally matches any single character from the sequence.

114

If the sequence begins with `^',

115

it matches any single character \fInot\fR from the rest of the sequence.

116

If two characters in the sequence are separated by `\-', this is shorthand

117

for the full list of ASCII characters between them

118

(e.g. `[0-9]' matches any decimal digit).

119

To include a literal `]' in the sequence, make it the first character

120

(following a possible `^').

121

To include a literal `\-', make it the first or last character.

122

.SH AMBIGUITY

123

If a regular expression could match two different parts of the input string,

124

it will match the one which begins earliest.

125

If both begin in the same place but match different lengths, or match

126

the same length in different ways, life gets messier, as follows.

127

.PP

128

In general, the possibilities in a list of branches are considered in

129

left-to-right order, the possibilities for `*', `+', and `?' are

130

considered longest-first, nested constructs are considered from the

131

outermost in, and concatenated constructs are considered leftmost-first.

132

The match that will be chosen is the one that uses the earliest

133

possibility in the first choice that has to be made.

134

If there is more than one choice, the next will be made in the same manner

135

(earliest possibility) subject to the decision on the first choice.

136

And so forth.

137

.PP

138

For example, `(ab|a)b*c' could match `abc' in one of two ways.

139

The first choice is between `ab' and `a'; since `ab' is earlier, and does

140

lead to a successful overall match, it is chosen.

141

Since the `b' is already spoken for,

142

the `b*' must match its last possibility\(emthe empty string\(emsince

143

it must respect the earlier choice.

144

.PP

145

In the particular case where no `|'s are present and there is only one

146

`*', `+', or `?', the net effect is that the longest possible

147

match will be chosen.

148

So `ab*', presented with `xabbbby', will match `abbbb'.

149

Note that if `ab*' is tried against `xabyabbbz', it

150

will match `ab' just after `x', due to the begins-earliest rule.

151

(In effect, the decision on where to start the match is the first choice

152

to be made, hence subsequent choices must respect it even if this leads them

153

to less-preferred alternatives.)

154

.SH SEE ALSO

155

egrep(1), expr(1)

156

.SH DIAGNOSTICS

157

\fIRegcomp\fR returns NULL for a failure

158

(\fIregerror\fR permitting),

159

where failures are syntax errors, exceeding implementation limits,

160

or applying `+' or `*' to a possibly-null operand.

161

.SH HISTORY

162

Both code and manual page were

163

written at U of T.

164

They are intended to be compatible with the Bell V8 \fIregexp\fR(3),

165

but are not derived from Bell code.

166

.SH BUGS

167

Empty branches and empty regular expressions are not portable to V8.

168

.PP

169

The restriction against

170

applying `*' or `+' to a possibly-null operand is an artifact of the

171

simplistic implementation.

172

.PP

173

Does not support \fIegrep\fR's newline-separated branches;

174

neither does the V8 \fIregexp\fR(3), though.

175

.PP

176

Due to emphasis on

177

compactness and simplicity,

178

it's not strikingly fast.

179

It does give special attention to handling simple cases quickly.

Older »