1
/* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006
2
Free Software Foundation, Inc.
3
This file is part of the GNU C Library.
5
This program is free software; you can redistribute it and/or modify
6
it under the terms of the GNU General Public License as published by
7
the Free Software Foundation; either version 2, or (at your option)
10
This program is distributed in the hope that it will be useful,
11
but WITHOUT ANY WARRANTY; without even the implied warranty of
12
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
GNU General Public License for more details.
15
You should have received a copy of the GNU General Public License
16
along with this program; if not, write to the Free Software Foundation,
17
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19
/* Match STRING against the file name pattern PATTERN, returning zero if
20
it matches, nonzero if not. */
21
static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
22
const CHAR *string_end, bool no_leading_period, int flags)
24
static const CHAR *END (const CHAR *patternp) internal_function;
28
FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
29
bool no_leading_period, int flags)
31
register const CHAR *p = pattern, *n = string;
34
# if WIDE_CHAR_VERSION
35
const char *collseq = (const char *)
36
_NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
38
const UCHAR *collseq = (const UCHAR *)
39
_NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
43
while ((c = *p++) != L_('\0'))
45
bool new_no_leading_period = false;
51
if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
55
res = EXT (c, p, n, string_end, no_leading_period,
63
else if (*n == L_('/') && (flags & FNM_FILE_NAME))
65
else if (*n == L_('.') && no_leading_period)
70
if (!(flags & FNM_NOESCAPE))
74
/* Trailing \ loses. */
78
if (n == string_end || FOLD ((UCHAR) *n) != c)
83
if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
87
res = EXT (c, p, n, string_end, no_leading_period,
93
if (n != string_end && *n == L_('.') && no_leading_period)
96
for (c = *p++; c == L_('?') || c == L_('*'); c = *p++)
98
if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0)
100
const CHAR *endp = END (p);
103
/* This is a pattern. Skip over it. */
111
/* A ? needs to match one character. */
113
/* There isn't another character; no match. */
115
else if (*n == L_('/')
116
&& __builtin_expect (flags & FNM_FILE_NAME, 0))
117
/* A slash does not match a wildcard under
121
/* One character of the string is consumed in matching
122
this ? wildcard, so *??? won't match if there are
123
less than three characters. */
129
/* The wildcard(s) is/are the last element of the pattern.
130
If the name is a file name and contains another slash
131
this means it cannot match, unless the FNM_LEADING_DIR
134
int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
136
if (flags & FNM_FILE_NAME)
138
if (flags & FNM_LEADING_DIR)
142
if (MEMCHR (n, L_('/'), string_end - n) == NULL)
153
endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'),
159
|| (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
160
&& (c == L_('@') || c == L_('+') || c == L_('!'))
163
int flags2 = ((flags & FNM_FILE_NAME)
164
? flags : (flags & ~FNM_PERIOD));
165
bool no_leading_period2 = no_leading_period;
167
for (--p; n < endp; ++n, no_leading_period2 = false)
168
if (FCT (p, n, string_end, no_leading_period2, flags2)
172
else if (c == L_('/') && (flags & FNM_FILE_NAME))
174
while (n < string_end && *n != L_('/'))
176
if (n < string_end && *n == L_('/')
177
&& (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
183
int flags2 = ((flags & FNM_FILE_NAME)
184
? flags : (flags & ~FNM_PERIOD));
185
int no_leading_period2 = no_leading_period;
187
if (c == L_('\\') && !(flags & FNM_NOESCAPE))
190
for (--p; n < endp; ++n, no_leading_period2 = false)
191
if (FOLD ((UCHAR) *n) == c
192
&& (FCT (p, n, string_end, no_leading_period2, flags2)
198
/* If we come here no match is possible with the wildcard. */
203
/* Nonzero if the sense of the character class is inverted. */
208
if (posixly_correct == 0)
209
posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
214
if (*n == L_('.') && no_leading_period)
217
if (*n == L_('/') && (flags & FNM_FILE_NAME))
218
/* `/' cannot be matched. */
221
not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^')));
225
fn = FOLD ((UCHAR) *n);
230
if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
234
c = FOLD ((UCHAR) *p);
239
else if (c == L_('[') && *p == L_(':'))
241
/* Leave room for the null. */
242
CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
244
#if defined _LIBC || WIDE_CHAR_SUPPORT
247
const CHAR *startp = p;
251
if (c1 == CHAR_CLASS_MAX_LENGTH)
252
/* The name is too long and therefore the pattern
257
if (c == L_(':') && p[1] == L_(']'))
262
if (c < L_('a') || c >= L_('z'))
264
/* This cannot possibly be a character class name.
265
Match it as a normal range. */
274
#if defined _LIBC || WIDE_CHAR_SUPPORT
275
wt = IS_CHAR_CLASS (str);
277
/* Invalid character class name. */
280
# if defined _LIBC && ! WIDE_CHAR_VERSION
281
/* The following code is glibc specific but does
282
there a good job in speeding up the code since
283
we can avoid the btowc() call. */
284
if (_ISCTYPE ((UCHAR) *n, wt))
287
if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
291
if ((STREQ (str, L_("alnum")) && isalnum ((UCHAR) *n))
292
|| (STREQ (str, L_("alpha")) && isalpha ((UCHAR) *n))
293
|| (STREQ (str, L_("blank")) && isblank ((UCHAR) *n))
294
|| (STREQ (str, L_("cntrl")) && iscntrl ((UCHAR) *n))
295
|| (STREQ (str, L_("digit")) && isdigit ((UCHAR) *n))
296
|| (STREQ (str, L_("graph")) && isgraph ((UCHAR) *n))
297
|| (STREQ (str, L_("lower")) && islower ((UCHAR) *n))
298
|| (STREQ (str, L_("print")) && isprint ((UCHAR) *n))
299
|| (STREQ (str, L_("punct")) && ispunct ((UCHAR) *n))
300
|| (STREQ (str, L_("space")) && isspace ((UCHAR) *n))
301
|| (STREQ (str, L_("upper")) && isupper ((UCHAR) *n))
302
|| (STREQ (str, L_("xdigit")) && isxdigit ((UCHAR) *n)))
308
else if (c == L_('[') && *p == L_('='))
312
_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
313
const CHAR *startp = p;
325
if (c != L_('=') || p[1] != L_(']'))
335
if ((UCHAR) *n == str[0])
340
const int32_t *table;
341
# if WIDE_CHAR_VERSION
342
const int32_t *weights;
343
const int32_t *extra;
345
const unsigned char *weights;
346
const unsigned char *extra;
348
const int32_t *indirect;
350
const UCHAR *cp = (const UCHAR *) str;
352
/* This #include defines a local function! */
353
# if WIDE_CHAR_VERSION
354
# include <locale/weightwc.h>
356
# include <locale/weight.h>
359
# if WIDE_CHAR_VERSION
360
table = (const int32_t *)
361
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
362
weights = (const int32_t *)
363
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
364
extra = (const int32_t *)
365
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
366
indirect = (const int32_t *)
367
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
369
table = (const int32_t *)
370
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
371
weights = (const unsigned char *)
372
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
373
extra = (const unsigned char *)
374
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
375
indirect = (const int32_t *)
376
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
382
/* We found a table entry. Now see whether the
383
character we are currently at has the same
384
equivalance class value. */
385
int len = weights[idx];
387
const UCHAR *np = (const UCHAR *) n;
389
idx2 = findidx (&np);
390
if (idx2 != 0 && len == weights[idx2])
395
&& (weights[idx + 1 + cnt]
396
== weights[idx2 + 1 + cnt]))
408
else if (c == L_('\0'))
409
/* [ (unterminated) loses. */
413
bool is_range = false;
416
bool is_seqval = false;
418
if (c == L_('[') && *p == L_('.'))
421
_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
422
const CHAR *startp = p;
428
if (c == L_('.') && p[1] == L_(']'))
438
/* We have to handling the symbols differently in
439
ranges since then the collation sequence is
441
is_range = *p == L_('-') && p[1] != L_('\0');
445
/* There are no names defined in the collation
446
data. Therefore we only accept the trivial
447
names consisting of the character itself. */
451
if (!is_range && *n == startp[1])
460
const int32_t *symb_table;
461
# ifdef WIDE_CHAR_VERSION
465
# define str (startp + 1)
467
const unsigned char *extra;
473
# ifdef WIDE_CHAR_VERSION
474
/* We have to convert the name to a single-byte
475
string. This is possible since the names
476
consist of ASCII characters and the internal
477
representation is UCS4. */
478
for (strcnt = 0; strcnt < c1; ++strcnt)
479
str[strcnt] = startp[1 + strcnt];
483
_NL_CURRENT_WORD (LC_COLLATE,
484
_NL_COLLATE_SYMB_HASH_SIZEMB);
485
symb_table = (const int32_t *)
486
_NL_CURRENT (LC_COLLATE,
487
_NL_COLLATE_SYMB_TABLEMB);
488
extra = (const unsigned char *)
489
_NL_CURRENT (LC_COLLATE,
490
_NL_COLLATE_SYMB_EXTRAMB);
492
/* Locate the character in the hashing table. */
493
hash = elem_hash (str, c1);
496
elem = hash % table_size;
497
if (symb_table[2 * elem] != 0)
499
second = hash % (table_size - 2) + 1;
503
/* First compare the hashing value. */
504
if (symb_table[2 * elem] == hash
506
== extra[symb_table[2 * elem + 1]])
508
&extra[symb_table[2 * elem
512
/* Yep, this is the entry. */
513
idx = symb_table[2 * elem + 1];
514
idx += 1 + extra[idx];
521
while (symb_table[2 * elem] != 0);
524
if (symb_table[2 * elem] != 0)
526
/* Compare the byte sequence but only if
527
this is not part of a range. */
528
# ifdef WIDE_CHAR_VERSION
531
idx += 1 + extra[idx];
532
/* Adjust for the alignment. */
533
idx = (idx + 3) & ~3;
535
wextra = (int32_t *) &extra[idx + 4];
540
# ifdef WIDE_CHAR_VERSION
542
(int32_t) c1 < wextra[idx];
544
if (n[c1] != wextra[1 + c1])
547
if ((int32_t) c1 == wextra[idx])
550
for (c1 = 0; c1 < extra[idx]; ++c1)
551
if (n[c1] != extra[1 + c1])
554
if (c1 == extra[idx])
559
/* Get the collation sequence value. */
561
# ifdef WIDE_CHAR_VERSION
562
cold = wextra[1 + wextra[idx]];
564
/* Adjust for the alignment. */
565
idx += 1 + extra[idx];
566
idx = (idx + 3) & ~4;
567
cold = *((int32_t *) &extra[idx]);
574
/* No valid character. Match it as a
576
if (!is_range && *n == str[0])
593
/* We have to handling the symbols differently in
594
ranges since then the collation sequence is
596
is_range = (*p == L_('-') && p[1] != L_('\0')
599
if (!is_range && c == fn)
603
/* This is needed if we goto normal_bracket; from
604
outside of is_seqval's scope. */
612
if (c == L_('-') && *p != L_(']'))
615
/* We have to find the collation sequence
616
value for C. Collation sequence is nothing
617
we can regularly access. The sequence
618
value is defined by the order in which the
619
definitions of the collation values for the
620
various characters appear in the source
621
file. A strange concept, nowhere
627
# ifdef WIDE_CHAR_VERSION
628
/* Search in the `names' array for the characters. */
629
fcollseq = __collseq_table_lookup (collseq, fn);
630
if (fcollseq == ~((uint32_t) 0))
631
/* XXX We don't know anything about the character
632
we are supposed to match. This means we are
634
goto range_not_matched;
639
lcollseq = __collseq_table_lookup (collseq, cold);
641
fcollseq = collseq[fn];
642
lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
646
if (cend == L_('[') && *p == L_('.'))
649
_NL_CURRENT_WORD (LC_COLLATE,
651
const CHAR *startp = p;
657
if (c == L_('.') && p[1] == L_(']'))
669
/* There are no names defined in the
670
collation data. Therefore we only
671
accept the trivial names consisting
672
of the character itself. */
681
const int32_t *symb_table;
682
# ifdef WIDE_CHAR_VERSION
686
# define str (startp + 1)
688
const unsigned char *extra;
694
# ifdef WIDE_CHAR_VERSION
695
/* We have to convert the name to a single-byte
696
string. This is possible since the names
697
consist of ASCII characters and the internal
698
representation is UCS4. */
699
for (strcnt = 0; strcnt < c1; ++strcnt)
700
str[strcnt] = startp[1 + strcnt];
704
_NL_CURRENT_WORD (LC_COLLATE,
705
_NL_COLLATE_SYMB_HASH_SIZEMB);
706
symb_table = (const int32_t *)
707
_NL_CURRENT (LC_COLLATE,
708
_NL_COLLATE_SYMB_TABLEMB);
709
extra = (const unsigned char *)
710
_NL_CURRENT (LC_COLLATE,
711
_NL_COLLATE_SYMB_EXTRAMB);
713
/* Locate the character in the hashing
715
hash = elem_hash (str, c1);
718
elem = hash % table_size;
719
if (symb_table[2 * elem] != 0)
721
second = hash % (table_size - 2) + 1;
725
/* First compare the hashing value. */
726
if (symb_table[2 * elem] == hash
728
== extra[symb_table[2 * elem + 1]])
730
&extra[symb_table[2 * elem + 1]
733
/* Yep, this is the entry. */
734
idx = symb_table[2 * elem + 1];
735
idx += 1 + extra[idx];
742
while (symb_table[2 * elem] != 0);
745
if (symb_table[2 * elem] != 0)
747
/* Compare the byte sequence but only if
748
this is not part of a range. */
749
# ifdef WIDE_CHAR_VERSION
752
idx += 1 + extra[idx];
753
/* Adjust for the alignment. */
754
idx = (idx + 3) & ~4;
756
wextra = (int32_t *) &extra[idx + 4];
758
/* Get the collation sequence value. */
760
# ifdef WIDE_CHAR_VERSION
761
cend = wextra[1 + wextra[idx]];
763
/* Adjust for the alignment. */
764
idx += 1 + extra[idx];
765
idx = (idx + 3) & ~4;
766
cend = *((int32_t *) &extra[idx]);
769
else if (symb_table[2 * elem] != 0 && c1 == 1)
781
if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
783
if (cend == L_('\0'))
788
/* XXX It is not entirely clear to me how to handle
789
characters which are not mentioned in the
790
collation specification. */
792
# ifdef WIDE_CHAR_VERSION
793
lcollseq == 0xffffffff ||
795
lcollseq <= fcollseq)
797
/* We have to look at the upper bound. */
804
# ifdef WIDE_CHAR_VERSION
806
__collseq_table_lookup (collseq, cend);
807
if (hcollseq == ~((uint32_t) 0))
809
/* Hum, no information about the upper
810
bound. The matching succeeds if the
811
lower bound is matched exactly. */
812
if (lcollseq != fcollseq)
813
goto range_not_matched;
818
hcollseq = collseq[cend];
822
if (lcollseq <= hcollseq && fcollseq <= hcollseq)
825
# ifdef WIDE_CHAR_VERSION
829
/* We use a boring value comparison of the character
830
values. This is better than comparing using
831
`strcoll' since the latter would have surprising
832
and sometimes fatal consequences. */
835
if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
837
if (cend == L_('\0'))
841
if (cold <= fn && fn <= cend)
858
/* Skip the rest of the [...] that already matched. */
865
/* [... (unterminated) loses. */
868
if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
872
/* XXX 1003.2d11 is unclear if this is right. */
875
else if (c == L_('[') && *p == L_(':'))
878
const CHAR *startp = p;
883
if (++c1 == CHAR_CLASS_MAX_LENGTH)
886
if (*p == L_(':') && p[1] == L_(']'))
889
if (c < L_('a') || c >= L_('z'))
898
else if (c == L_('[') && *p == L_('='))
904
if (c != L_('=') || p[1] != L_(']'))
909
else if (c == L_('[') && *p == L_('.'))
918
if (*p == L_('.') && p[1] == L_(']'))
925
while (c != L_(']'));
934
if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
938
res = EXT (c, p, n, string_end, no_leading_period, flags);
945
if (NO_LEADING_PERIOD (flags))
947
if (n == string_end || c != (UCHAR) *n)
950
new_no_leading_period = true;
956
if (n == string_end || c != FOLD ((UCHAR) *n))
960
no_leading_period = new_no_leading_period;
967
if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/'))
968
/* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
977
END (const CHAR *pattern)
979
const CHAR *p = pattern;
982
if (*++p == L_('\0'))
983
/* This is an invalid pattern. */
985
else if (*p == L_('['))
987
/* Handle brackets special. */
988
if (posixly_correct == 0)
989
posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
991
/* Skip the not sign. We have to recognize it because of a possibly
993
if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
995
/* A leading ']' is recognized as such. */
998
/* Skip over all characters of the list. */
999
while (*p != L_(']'))
1000
if (*p++ == L_('\0'))
1001
/* This is no valid pattern. */
1004
else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1005
|| *p == L_('!')) && p[1] == L_('('))
1007
else if (*p == L_(')'))
1016
EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1017
bool no_leading_period, int flags)
1023
struct patternlist *next;
1026
struct patternlist **lastp = &list;
1027
size_t pattern_len = STRLEN (pattern);
1030
enum { ALLOCA_LIMIT = 8000 };
1032
/* Parse the pattern. Store the individual parts in the list. */
1034
for (startp = p = pattern + 1; ; ++p)
1036
/* This is an invalid pattern. */
1038
else if (*p == L_('['))
1040
/* Handle brackets special. */
1041
if (posixly_correct == 0)
1042
posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1044
/* Skip the not sign. We have to recognize it because of a possibly
1046
if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1048
/* A leading ']' is recognized as such. */
1051
/* Skip over all characters of the list. */
1052
while (*p != L_(']'))
1053
if (*p++ == L_('\0'))
1054
/* This is no valid pattern. */
1057
else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1058
|| *p == L_('!')) && p[1] == L_('('))
1059
/* Remember the nesting level. */
1061
else if (*p == L_(')'))
1065
/* This means we found the end of the pattern. */
1066
#define NEW_PATTERN \
1067
struct patternlist *newp; \
1072
assert (p > startp); \
1073
plen = (opt == L_('?') || opt == L_('@') \
1075
: (unsigned) (p - startp) + 1); \
1076
plensize = plen * sizeof (CHAR); \
1077
newpsize = offsetof (struct patternlist, str) + plensize; \
1078
if ((size_t) -1 / sizeof (CHAR) < plen \
1079
|| newpsize < offsetof (struct patternlist, str) \
1080
|| ALLOCA_LIMIT <= newpsize) \
1082
newp = (struct patternlist *) alloca (newpsize); \
1083
*((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L_('\0'); \
1084
newp->next = NULL; \
1091
else if (*p == L_('|'))
1099
assert (list != NULL);
1100
assert (p[-1] == L_(')'));
1106
if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1113
for (rs = string; rs <= string_end; ++rs)
1114
/* First match the prefix with the current pattern with the
1116
if (FCT (list->str, string, rs, no_leading_period,
1117
flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1118
/* This was successful. Now match the rest with the rest
1120
&& (FCT (p, rs, string_end,
1123
: rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1124
flags & FNM_FILE_NAME
1125
? flags : flags & ~FNM_PERIOD) == 0
1126
/* This didn't work. Try the whole pattern. */
1128
&& FCT (pattern - 1, rs, string_end,
1131
: rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1132
flags & FNM_FILE_NAME
1133
? flags : flags & ~FNM_PERIOD) == 0)))
1134
/* It worked. Signal success. */
1137
while ((list = list->next) != NULL);
1139
/* None of the patterns lead to a match. */
1143
if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1149
/* I cannot believe it but `strcat' is actually acceptable
1150
here. Match the entire string with the prefix from the
1151
pattern list and the rest of the pattern following the
1153
if (FCT (STRCAT (list->str, p), string, string_end,
1155
flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1156
/* It worked. Signal success. */
1158
while ((list = list->next) != NULL);
1160
/* None of the patterns lead to a match. */
1164
for (rs = string; rs <= string_end; ++rs)
1166
struct patternlist *runp;
1168
for (runp = list; runp != NULL; runp = runp->next)
1169
if (FCT (runp->str, string, rs, no_leading_period,
1170
flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1173
/* If none of the patterns matched see whether the rest does. */
1175
&& (FCT (p, rs, string_end,
1178
: rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1179
flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1181
/* This is successful. */
1185
/* None of the patterns together with the rest of the pattern
1190
assert (! "Invalid extended matching operator");