1
/* strmatch.c -- ksh-like extended pattern matching for the shell and filename
4
/* Copyright (C) 1991, 1997 Free Software Foundation, Inc.
6
This file is part of GNU Bash, the Bourne Again SHell.
8
Bash is free software; you can redistribute it and/or modify it under
9
the terms of the GNU General Public License as published by the Free
10
Software Foundation; either version 2, or (at your option) any later
13
Bash is distributed in the hope that it will be useful, but WITHOUT ANY
14
WARRANTY; without even the implied warranty of MERCHANTABILITY or
15
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18
You should have received a copy of the GNU General Public License along
19
with Bash; see the file COPYING. If not, write to the Free Software
20
Foundation, 59 Temple Place, Suite 330, Boston, MA 02111 USA. */
24
#include <stdio.h> /* for debugging */
28
#include <chartypes.h>
30
#if defined (HAVE_STRING_H)
34
#endif /* HAVE_STRING_H */
37
static char *brackmatch ();
39
static int extmatch ();
40
static char *patscan ();
43
#if !defined (isascii) && !defined (HAVE_ISASCII)
44
# define isascii(c) ((unsigned int)(c) <= 0177)
47
/* The result of FOLD is an `unsigned char' */
48
# define FOLD(c) ((flags & FNM_CASEFOLD) \
49
? TOLOWER ((unsigned char)c) \
53
#define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0)
54
#define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0)
57
/* We use strcoll(3) for range comparisons in bracket expressions,
58
even though it can have unwanted side effects in locales
59
other than POSIX or US. For instance, in the de locale, [A-Z] matches
62
#if defined (HAVE_STRCOLL)
63
/* Helper function for collating symbol equivalence. */
64
static int rangecmp (c1, c2)
67
static char s1[2] = { ' ', '\0' };
68
static char s2[2] = { ' ', '\0' };
71
/* Eight bits only. Period. */
81
if ((ret = strcoll (s1, s2)) != 0)
85
#else /* !HAVE_STRCOLL */
86
# define rangecmp(c1, c2) ((int)(c1) - (int)(c2))
87
#endif /* !HAVE_STRCOLL */
89
#if defined (HAVE_STRCOLL)
90
static int collequiv (c1, c2)
93
return (rangecmp (c1, c2) == 0);
96
# define collequiv(c1, c2) ((c1) == (c2))
104
register struct _collsym *csp;
106
for (csp = posix_collsyms; csp->name; csp++)
108
if (STREQN(csp->name, s, len) && csp->name[len] == '\0')
116
#ifdef HAVE_LIBC_FNM_EXTMATCH
118
strmatch (pattern, string, flags)
125
if (string == 0 || pattern == 0)
128
return (fnmatch (pattern, string, flags));
130
#else /* !HAVE_LIBC_FNM_EXTMATCH */
132
strmatch (pattern, string, flags)
139
if (string == 0 || pattern == 0)
142
se = string + strlen (string);
143
pe = pattern + strlen (pattern);
145
return (gmatch (string, se, pattern, pe, flags));
147
#endif /* !HAVE_LIBC_FNM_EXTMATCH */
149
/* Match STRING against the filename pattern PATTERN, returning zero if
150
it matches, FNM_NOMATCH if not. */
152
gmatch (string, se, pattern, pe, flags)
157
register char *p, *n; /* pattern, string */
158
register char c; /* current pattern character */
159
register char sc; /* current string character */
164
if (string == 0 || pattern == 0)
168
fprintf(stderr, "gmatch: string = %s; se = %s\n", string, se);
169
fprintf(stderr, "gmatch: pattern = %s; pe = %s\n", pattern, pe);
177
sc = n < se ? *n : '\0';
180
/* extmatch () will handle recursively calling gmatch, so we can
181
just return what extmatch() returns. */
182
if ((flags & FNM_EXTMATCH) && *p == '(' &&
183
(c == '+' || c == '*' || c == '?' || c == '@' || c == '!')) /* ) */
186
/* If we're not matching the start of the string, we're not
187
concerned about the special cases for matching `.' */
188
lflags = (n == string) ? flags : (flags & ~FNM_PERIOD);
189
return (extmatch (c, n, se, p, pe, lflags));
195
case '?': /* Match single character */
198
else if ((flags & FNM_PATHNAME) && sc == '/')
199
/* If we are matching a pathname, `?' can never match a `/'. */
201
else if ((flags & FNM_PERIOD) && sc == '.' &&
202
(n == string || ((flags & FNM_PATHNAME) && n[-1] == '/')))
203
/* `?' cannot match a `.' if it is the first character of the
204
string or if it is the first character following a slash and
205
we are matching a pathname. */
209
case '\\': /* backslash escape removes special meaning */
213
if ((flags & FNM_NOESCAPE) == 0)
216
/* A trailing `\' cannot match. */
221
if (FOLD (sc) != (unsigned char)c)
225
case '*': /* Match zero or more characters */
229
if ((flags & FNM_PERIOD) && sc == '.' &&
230
(n == string || ((flags & FNM_PATHNAME) && n[-1] == '/')))
231
/* `*' cannot match a `.' if it is the first character of the
232
string or if it is the first character following a slash and
233
we are matching a pathname. */
236
/* Collapse multiple consecutive `*' and `?', but make sure that
237
one character of the string is consumed for each `?'. */
238
for (c = *p++; (c == '?' || c == '*'); c = *p++)
240
if ((flags & FNM_PATHNAME) && sc == '/')
241
/* A slash does not match a wildcard under FNM_PATHNAME. */
247
/* One character of the string is consumed in matching
248
this ? wildcard, so *??? won't match if there are
249
fewer than three characters. */
251
sc = n < se ? *n : '\0';
255
/* Handle ******(patlist) */
256
if ((flags & FNM_EXTMATCH) && c == '*' && *p == '(') /*)*/
259
/* We need to check whether or not the extended glob
260
pattern matches the remainder of the string.
261
If it does, we match the entire pattern. */
262
for (newn = n; newn < se; ++newn)
264
if (extmatch (c, newn, se, p, pe, flags) == 0)
267
/* We didn't match the extended glob pattern, but
268
that's OK, since we can match 0 or more occurrences.
269
We need to skip the glob pattern and see if we
270
match the rest of the string. */
271
newn = patscan (p + 1, pe, 0);
272
/* If NEWN is 0, we have an ill-formed pattern. */
273
p = newn ? newn : pe;
280
/* If we've hit the end of the pattern and the last character of
281
the pattern was handled by the loop above, we've succeeded.
282
Otherwise, we need to match that last character. */
283
if (p == pe && (c == '?' || c == '*'))
286
/* General case, use recursion. */
290
c1 = (unsigned char)((flags & FNM_NOESCAPE) == 0 && c == '\\') ? *p : c;
292
for (--p; n < se; ++n)
294
/* Only call strmatch if the first character indicates a
295
possible match. We can check the first character if
296
we're not doing an extended glob match. */
297
if ((flags & FNM_EXTMATCH) == 0 && c != '[' && FOLD (*n) != c1) /*]*/
300
/* If we're doing an extended glob match and the pattern is not
301
one of the extended glob patterns, we can check the first
303
if ((flags & FNM_EXTMATCH) && p[1] != '(' && /*)*/
304
strchr ("?*+@!", *p) == 0 && c != '[' && FOLD (*n) != c1) /*]*/
307
/* Otherwise, we just recurse. */
308
if (gmatch (n, se, p, pe, flags & ~FNM_PERIOD) == 0)
316
if (sc == '\0' || n == se)
319
/* A character class cannot match a `.' if it is the first
320
character of the string or if it is the first character
321
following a slash and we are matching a pathname. */
322
if ((flags & FNM_PERIOD) && sc == '.' &&
323
(n == string || ((flags & FNM_PATHNAME) && n[-1] == '/')))
324
return (FNM_NOMATCH);
326
p = brackmatch (p, sc, flags);
333
if ((unsigned char)c != FOLD (sc))
334
return (FNM_NOMATCH);
343
if ((flags & FNM_LEADING_DIR) && *n == '/')
344
/* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
347
return (FNM_NOMATCH);
350
/* Parse a bracket expression collating symbol ([.sym.]) starting at P, find
351
the value of the symbol, and move P past the collating symbol expression.
352
The value is returned in *VP, if VP is not null. */
354
parse_collsym (p, vp)
361
p++; /* move past the `.' */
363
for (pc = 0; p[pc]; pc++)
364
if (p[pc] == '.' && p[pc+1] == ']')
366
val = collsym (p, pc);
373
brackmatch (p, test, flags)
378
register char cstart, cend, c;
379
register int not; /* Nonzero if the sense of the character class is inverted. */
387
/* POSIX.2 3.13.1 says that an exclamation mark (`!') shall replace the
388
circumflex (`^') in its role in a `nonmatching list'. A bracket
389
expression starting with an unquoted circumflex character produces
390
unspecified results. This implementation treats the two identically. */
391
if (not = (*p == '!' || *p == '^'))
397
/* Initialize cstart and cend in case `-' is the last
398
character of the pattern. */
401
/* POSIX.2 equivalence class: [=c=]. See POSIX.2 2.8.3.2. Find
402
the end of the equivalence class, move the pattern pointer past
403
it, and check for equivalence. XXX - this handles only
404
single-character equivalence classes, which is wrong, or at
406
if (c == '[' && *p == '=' && p[2] == '=' && p[3] == ']')
410
if (collequiv (test, pc))
412
/*[*/ /* Move past the closing `]', since the first thing we do at
413
the `matched:' label is back p up one. */
421
return ((test == '[') ? savep : (char *)0); /*]*/
427
/* POSIX.2 character class expression. See POSIX.2 2.8.3.2. */
428
if (c == '[' && *p == ':') /*]*/
430
pc = 0; /* make sure invalid char classes don't match. */
431
if (STREQN (p+1, "alnum:]", 7))
432
{ pc = ISALNUM (test); p += 8; }
433
else if (STREQN (p+1, "alpha:]", 7))
434
{ pc = ISALPHA (test); p += 8; }
435
else if (STREQN (p+1, "blank:]", 7))
436
{ pc = ISBLANK (test); p += 8; }
437
else if (STREQN (p+1, "cntrl:]", 7))
438
{ pc = ISCNTRL (test); p += 8; }
439
else if (STREQN (p+1, "digit:]", 7))
440
{ pc = ISDIGIT (test); p += 8; }
441
else if (STREQN (p+1, "graph:]", 7))
442
{ pc = ISGRAPH (test); p += 8; }
443
else if (STREQN (p+1, "lower:]", 7))
444
{ pc = ISLOWER (test); p += 8; }
445
else if (STREQN (p+1, "print:]", 7))
446
{ pc = ISPRINT (test); p += 8; }
447
else if (STREQN (p+1, "punct:]", 7))
448
{ pc = ISPUNCT (test); p += 8; }
449
else if (STREQN (p+1, "space:]", 7))
450
{ pc = ISSPACE (test); p += 8; }
451
else if (STREQN (p+1, "upper:]", 7))
452
{ pc = ISUPPER (test); p += 8; }
453
else if (STREQN (p+1, "xdigit:]", 8))
454
{ pc = ISXDIGIT (test); p += 9; }
455
else if (STREQN (p+1, "ascii:]", 7))
456
{ pc = isascii (test); p += 8; }
459
/*[*/ /* Move past the closing `]', since the first thing we do at
460
the `matched:' label is back p up one. */
466
/* continue the loop here, since this expression can't be
467
the first part of a range expression. */
470
return ((test == '[') ? savep : (char *)0);
478
/* POSIX.2 collating symbols. See POSIX.2 2.8.3.2. Find the end of
479
the symbol name, make sure it is terminated by `.]', translate
480
the name to a character using the external table, and do the
482
if (c == '[' && *p == '.')
484
p = parse_collsym (p, &pc);
485
/* An invalid collating symbol cannot be the first point of a
486
range. If it is, we set cstart to one greater than `test',
487
so any comparisons later will fail. */
488
cstart = (pc == -1) ? test + 1 : pc;
491
if (!(flags & FNM_NOESCAPE) && c == '\\')
495
cstart = cend = *p++;
498
cstart = cend = FOLD (cstart);
500
/* POSIX.2 2.8.3.1.2 says: `An expression containing a `[' that
501
is not preceded by a backslash and is not part of a bracket
502
expression produces undefined results.' This implementation
503
treats the `[' as just a character to be matched if there is
504
not a closing `]'. */
506
return ((test == '[') ? savep : (char *)0);
511
if ((flags & FNM_PATHNAME) && c == '/')
512
/* [/] can never match when matching a pathname. */
515
/* This introduces a range, unless the `-' is the last
516
character of the class. Find the end of the range
518
if (c == '-' && *p != ']')
521
if (!(flags & FNM_NOESCAPE) && cend == '\\')
525
if (cend == '[' && *p == '.')
527
p = parse_collsym (p, &pc);
528
/* An invalid collating symbol cannot be the second part of a
529
range expression. If we get one, we set cend to one fewer
530
than the test character to make sure the range test fails. */
531
cend = (pc == -1) ? test - 1 : pc;
537
/* POSIX.2 2.8.3.2: ``The ending range point shall collate
538
equal to or higher than the starting range point; otherwise
539
the expression shall be treated as invalid.'' Note that this
540
applies to only the range expression; the rest of the bracket
541
expression is still checked for matches. */
542
if (rangecmp (cstart, cend) > 0)
551
if (rangecmp (test, cstart) >= 0 && rangecmp (test, cend) <= 0)
558
return (!not ? (char *)0 : p);
561
/* Skip the rest of the [...] that already matched. */
563
brcnt = (c != ']') + (c == '[' && (*p == '=' || *p == ':' || *p == '.'));
570
/* A `[' without a matching `]' is just another character to match. */
572
return ((test == '[') ? savep : (char *)0);
575
if (c == '[' && (*p == '=' || *p == ':' || *p == '.'))
579
else if (!(flags & FNM_NOESCAPE) && c == '\\')
583
/* XXX 1003.2d11 is unclear if this is right. */
587
return (not ? (char *)0 : p);
590
#if defined (EXTENDED_GLOB)
591
/* ksh-like extended pattern matching:
595
where pat-list is a list of one or patterns separated by `|'. Operation
598
?(patlist) match zero or one of the given patterns
599
*(patlist) match zero or more of the given patterns
600
+(patlist) match one or more of the given patterns
601
@(patlist) match exactly one of the given patterns
602
!(patlist) match anything except one of the given patterns
605
/* Scan a pattern starting at STRING and ending at END, keeping track of
606
embedded () and []. If DELIM is 0, we scan until a matching `)'
607
because we're scanning a `patlist'. Otherwise, we scan until we see
608
DELIM. In all cases, we never scan past END. The return value is the
609
first character after the matching DELIM. */
611
patscan (string, end, delim)
615
int pnest, bnest, cchar;
618
pnest = bnest = cchar = 0;
620
for (s = string; c = *s; s++)
629
/* `[' is not special inside a bracket expression, but it may
630
introduce one of the special POSIX bracket expressions
631
([.SYM.], [=c=], [: ... :]) that needs special handling. */
636
if (*bfirst == '!' || *bfirst == '^')
640
else if (s[1] == ':' || s[1] == '.' || s[1] == '=')
644
/* `]' is not special if it's the first char (after a leading `!'
645
or `^') in a bracket expression or if it's part of one of the
646
special POSIX bracket expressions ([.SYM.], [=c=], [: ... :]) */
650
if (cchar && s[-1] == cchar)
652
else if (s != bfirst)
672
if (bnest == 0 && pnest-- <= 0)
678
if (bnest == 0 && pnest == 0 && delim == '|')
687
/* Return 0 if dequoted pattern matches S in the current locale. */
689
strcompare (p, pe, s, se)
690
char *p, *pe, *s, *se;
699
#if defined (HAVE_STRCOLL)
700
ret = strcoll (p, s);
708
return (ret == 0 ? ret : FNM_NOMATCH);
711
/* Match a ksh extended pattern specifier. Return FNM_NOMATCH on failure or
712
0 on success. This is handed the entire rest of the pattern and string
713
the first time an extended pattern specifier is encountered, so it calls
714
gmatch recursively. */
716
extmatch (xc, s, se, p, pe, flags)
717
int xc; /* select which operation */
722
char *prest; /* pointer to rest of pattern */
723
char *psub; /* pointer to sub-pattern */
724
char *pnext; /* pointer to next sub-pattern */
725
char *srest; /* pointer to rest of string */
729
fprintf(stderr, "extmatch: xc = %c\n", xc);
730
fprintf(stderr, "extmatch: s = %s; se = %s\n", s, se);
731
fprintf(stderr, "extmatch: p = %s; pe = %s\n", p, pe);
734
prest = patscan (p + (*p == '('), pe, 0); /* ) */
736
/* If PREST is 0, we failed to scan a valid pattern. In this
737
case, we just want to compare the two as strings. */
738
return (strcompare (p - 1, pe, s, se));
742
case '+': /* match one or more occurrences */
743
case '*': /* match zero or more occurrences */
744
/* If we can get away with no matches, don't even bother. Just
745
call gmatch on the rest of the pattern and return success if
747
if (xc == '*' && (gmatch (s, se, prest, pe, flags) == 0))
750
/* OK, we have to do this the hard way. First, we make sure one of
751
the subpatterns matches, then we try to match the rest of the
753
for (psub = p + 1; ; psub = pnext)
755
pnext = patscan (psub, pe, '|');
756
for (srest = s; srest <= se; srest++)
758
/* Match this substring (S -> SREST) against this
759
subpattern (psub -> pnext - 1) */
760
m1 = gmatch (s, srest, psub, pnext - 1, flags) == 0;
761
/* OK, we matched a subpattern, so make sure the rest of the
762
string matches the rest of the pattern. Also handle
763
multiple matches of the pattern. */
765
m2 = (gmatch (srest, se, prest, pe, flags) == 0) ||
766
(s != srest && gmatch (srest, se, p - 1, pe, flags) == 0);
773
return (FNM_NOMATCH);
775
case '?': /* match zero or one of the patterns */
776
case '@': /* match exactly one of the patterns */
777
/* If we can get away with no matches, don't even bother. Just
778
call gmatch on the rest of the pattern and return success if
780
if (xc == '?' && (gmatch (s, se, prest, pe, flags) == 0))
783
/* OK, we have to do this the hard way. First, we see if one of
784
the subpatterns matches, then, if it does, we try to match the
785
rest of the string. */
786
for (psub = p + 1; ; psub = pnext)
788
pnext = patscan (psub, pe, '|');
789
srest = (prest == pe) ? se : s;
790
for ( ; srest <= se; srest++)
792
if (gmatch (s, srest, psub, pnext - 1, flags) == 0 &&
793
gmatch (srest, se, prest, pe, flags) == 0)
799
return (FNM_NOMATCH);
801
case '!': /* match anything *except* one of the patterns */
802
for (srest = s; srest <= se; srest++)
805
for (psub = p + 1; ; psub = pnext)
807
pnext = patscan (psub, pe, '|');
808
/* If one of the patterns matches, just bail immediately. */
809
if (m1 = (gmatch (s, srest, psub, pnext - 1, flags) == 0))
814
if (m1 == 0 && gmatch (srest, se, prest, pe, flags) == 0)
817
return (FNM_NOMATCH);
820
return (FNM_NOMATCH);
822
#endif /* EXTENDED_GLOB */
834
if (strmatch (pat, string, 0) == 0)
836
printf ("%s matches %s\n", string, pat);
841
printf ("%s does not match %s\n", string, pat);