1
/* @(#)match.c 1.18 00/11/12 Copyright 1985 J. Schilling */
5
* Pattern matching functions
7
* Copyright (c) 1985,1995 J. Schilling
10
* This program is free software; you can redistribute it and/or modify
11
* it under the terms of the GNU General Public License as published by
12
* the Free Software Foundation; either version 2, or (at your option)
15
* This program is distributed in the hope that it will be useful,
16
* but WITHOUT ANY WARRANTY; without even the implied warranty of
17
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18
* GNU General Public License for more details.
20
* You should have received a copy of the GNU General Public License
21
* along with this program; see the file COPYING. If not, write to
22
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
25
* The pattern matching functions below are based on the algorithm
26
* presented by Martin Richards in:
28
* "A Compact Function for Regular Expression Pattern Matching",
29
* Software-Practice and Experience, Vol. 9, 527-534 (1979)
31
* Several changes have been made to the original source which has been
34
* '/' is replaced by '!' (to allow UNIX filenames)
35
* '(',')' are replaced by '{', '}'
36
* '\'' is replaced by '\\' (UNIX compatible quote)
38
* Character classes have been added to allow "[<character list>]"
40
* Start of line '^' and end of line '$' have been added.
44
#define opatmatch opatlmatch
45
#define patmatch patlmatch
49
typedef unsigned char Uchar;
51
/*---------------------------------------------------------------------------
55
+---------------------------------------------------------------------------*/
59
* put adds a new state to the active list
61
#define put(ret, state, sp, n) { \
62
register int *lstate = state; \
63
register int *lsp = sp; \
64
register int ln = n; \
66
while (lstate < lsp) { \
67
if (*lstate++ == ln) { \
81
* match a character in class
83
#define in_class(found, pat, c) { \
84
register const Uchar *lpat = pat; \
85
register int lc = c; \
95
while (*lpat != RCLASS) { \
99
if (*lpat == RANGE) { \
101
if (*lpat == QUOTE) \
103
hi_bound = *lpat++; \
105
hi_bound = lo_bound; \
107
if (lo_bound <= lc && lc <= hi_bound) { \
115
* opatmatch - the old external interpreter interface.
117
* Trys to match a string beginning at offset
118
* against the compiled pattern.
120
Uchar *opatmatch(pat, aux, str, soff, slen, alt)
130
return (patmatch(pat, aux, str, soff, slen, alt, state));
134
* patmatch - the external interpreter interface.
136
* Trys to match a string beginning at offset
137
* against the compiled pattern.
139
Uchar *patmatch(pat, aux, str, soff, slen, alt, state)
152
register int q, s, k;
154
const Uchar *lastp = (Uchar *)NULL;
157
for( ;soff <= slen; soff++) {
161
put(sp, state, state, 0);
163
put(sp, state, sp, alt);
165
for(s = soff; ; s++) {
167
* next char from input string
174
* first complete the closure
176
for(n = state; n < sp;) {
177
p = *n++; /* next state number */
180
q = aux[p]; /* get next state for pat */
181
k = pat[p]; /* get next char from pat */
185
put(sp, state, sp, p+1);
186
case NIL: /* NIL matches always */
188
put(sp, state, sp, q);
190
case LBRACK: /* alternations */
192
put(sp, state, sp, p+1);
194
put(sp, state, sp, q);
198
put(sp, state, sp, q);
202
put(sp, state, sp, q);
207
for (i = state; i < sp;) {
208
if (*i++ == ENDSTATE) {
214
return ((Uchar *)lastp);
217
* now try to match next character
221
for(i = sp; i < n;) {
222
p = *i++; /* next active state number */
236
in_class(q, &pat[p+1], c);
241
put(sp, state, sp, p);
251
put(sp, state, sp, aux[p]);
253
if (sp == state) { /* if no new states return */
256
if (lastp || (soff == slen - 1))
257
return ((Uchar *)lastp);
261
return ((Uchar *)lastp);
267
return ((Uchar *)lastp);
273
/*---------------------------------------------------------------------------
277
+---------------------------------------------------------------------------*/
279
typedef struct args {
280
const Uchar *pattern;
287
static void nextitem __PR((arg_t *));
288
static int prim __PR((arg_t *));
289
static int exp __PR((arg_t *, int *));
290
static void setexits __PR((int *, int, int));
291
static int join __PR((int *, int, int));
294
* 'read' the next character from pattern
298
if (++(ap)->patp >= (ap)->length) \
301
(ap)->Ch = (ap)->pattern[(ap)->patp]; \
305
* get the next item from pattern
307
static void nextitem(ap)
333
while (ap->Ch != RCLASS && ap->Ch != '\0')
343
setexits(ap->aux, t, a);
346
a = exp(ap, &ap->aux[a]);
347
if (a == ENDSTATE || ap->Ch != RBRACK)
356
* parse an expression (a sequence of primaries)
358
static int exp(ap, altp)
362
int exits = ENDSTATE;
370
if (Ch == ALT || Ch == RBRACK || Ch == '\0') {
371
exits = join(aux, exits, a);
375
altp = &aux[ap->patp];
378
setexits(aux, a, ap->patp);
383
* set all exits in a list to a specified value
385
static void setexits(aux, list, val)
392
while (list != ENDSTATE) {
400
* concatenate two lists
402
static int join(aux, a, b)
412
while (aux[t] != ENDSTATE)
419
* patcompile - the external compiler interface.
421
* The pattern is compiled into the aux array.
422
* Return value on success, is the outermost alternate which is != 0.
423
* Error is indicated by return of 0.
425
int patcompile(pat, len, aux)
439
for(i = 0; i < len; i++)
445
setexits(aux, i, ENDSTATE);