1
/* Copyright (C) 1991, 1992, 1993, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
2
2003, 2004, 2005, 2006, 2009, 2010 Free Software Foundation, Inc.
3
This file is part of the GNU C Library.
5
This program is free software; you can redistribute it and/or modify
6
it under the terms of the GNU General Public License as published by
7
the Free Software Foundation; either version 2, or (at your option)
10
This program is distributed in the hope that it will be useful,
11
but WITHOUT ANY WARRANTY; without even the implied warranty of
12
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
GNU General Public License for more details.
15
You should have received a copy of the GNU General Public License
16
along with this program; if not, write to the Free Software Foundation,
17
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19
/* Match STRING against the file name pattern PATTERN, returning zero if
20
it matches, nonzero if not. */
21
static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
22
const CHAR *string_end, bool no_leading_period, int flags)
24
static const CHAR *END (const CHAR *patternp) internal_function;
28
FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
29
bool no_leading_period, int flags)
31
register const CHAR *p = pattern, *n = string;
34
# if WIDE_CHAR_VERSION
35
const char *collseq = (const char *)
36
_NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
38
const UCHAR *collseq = (const UCHAR *)
39
_NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
43
while ((c = *p++) != L_('\0'))
45
bool new_no_leading_period = false;
51
if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
55
res = EXT (c, p, n, string_end, no_leading_period,
63
else if (*n == L_('/') && (flags & FNM_FILE_NAME))
65
else if (*n == L_('.') && no_leading_period)
70
if (!(flags & FNM_NOESCAPE))
74
/* Trailing \ loses. */
78
if (n == string_end || FOLD ((UCHAR) *n) != c)
83
if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
87
res = EXT (c, p, n, string_end, no_leading_period,
93
if (n != string_end && *n == L_('.') && no_leading_period)
96
for (c = *p++; c == L_('?') || c == L_('*'); c = *p++)
98
if (*p == L_('(') && (flags & FNM_EXTMATCH) != 0)
100
const CHAR *endp = END (p);
103
/* This is a pattern. Skip over it. */
111
/* A ? needs to match one character. */
113
/* There isn't another character; no match. */
115
else if (*n == L_('/')
116
&& __builtin_expect (flags & FNM_FILE_NAME, 0))
117
/* A slash does not match a wildcard under
121
/* One character of the string is consumed in matching
122
this ? wildcard, so *??? won't match if there are
123
less than three characters. */
129
/* The wildcard(s) is/are the last element of the pattern.
130
If the name is a file name and contains another slash
131
this means it cannot match, unless the FNM_LEADING_DIR
134
int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
136
if (flags & FNM_FILE_NAME)
138
if (flags & FNM_LEADING_DIR)
142
if (MEMCHR (n, L_('/'), string_end - n) == NULL)
153
endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L_('/') : L_('\0'),
159
|| (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
160
&& (c == L_('@') || c == L_('+') || c == L_('!'))
163
int flags2 = ((flags & FNM_FILE_NAME)
164
? flags : (flags & ~FNM_PERIOD));
165
bool no_leading_period2 = no_leading_period;
167
for (--p; n < endp; ++n, no_leading_period2 = false)
168
if (FCT (p, n, string_end, no_leading_period2, flags2)
172
else if (c == L_('/') && (flags & FNM_FILE_NAME))
174
while (n < string_end && *n != L_('/'))
176
if (n < string_end && *n == L_('/')
177
&& (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags)
183
int flags2 = ((flags & FNM_FILE_NAME)
184
? flags : (flags & ~FNM_PERIOD));
185
int no_leading_period2 = no_leading_period;
187
if (c == L_('\\') && !(flags & FNM_NOESCAPE))
190
for (--p; n < endp; ++n, no_leading_period2 = false)
191
if (FOLD ((UCHAR) *n) == c
192
&& (FCT (p, n, string_end, no_leading_period2, flags2)
198
/* If we come here no match is possible with the wildcard. */
203
/* Nonzero if the sense of the character class is inverted. */
208
if (posixly_correct == 0)
209
posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
214
if (*n == L_('.') && no_leading_period)
217
if (*n == L_('/') && (flags & FNM_FILE_NAME))
218
/* `/' cannot be matched. */
221
not = (*p == L_('!') || (posixly_correct < 0 && *p == L_('^')));
225
fn = FOLD ((UCHAR) *n);
230
if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
234
c = FOLD ((UCHAR) *p);
239
else if (c == L_('[') && *p == L_(':'))
241
/* Leave room for the null. */
242
CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
244
#if defined _LIBC || WIDE_CHAR_SUPPORT
247
const CHAR *startp = p;
251
if (c1 == CHAR_CLASS_MAX_LENGTH)
252
/* The name is too long and therefore the pattern
257
if (c == L_(':') && p[1] == L_(']'))
262
if (c < L_('a') || c >= L_('z'))
264
/* This cannot possibly be a character class name.
265
Match it as a normal range. */
274
#if defined _LIBC || WIDE_CHAR_SUPPORT
275
wt = IS_CHAR_CLASS (str);
277
/* Invalid character class name. */
280
# if defined _LIBC && ! WIDE_CHAR_VERSION
281
/* The following code is glibc specific but does
282
there a good job in speeding up the code since
283
we can avoid the btowc() call. */
284
if (_ISCTYPE ((UCHAR) *n, wt))
287
if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
291
if ((STREQ (str, L_("alnum")) && isalnum ((UCHAR) *n))
292
|| (STREQ (str, L_("alpha")) && isalpha ((UCHAR) *n))
293
|| (STREQ (str, L_("blank")) && isblank ((UCHAR) *n))
294
|| (STREQ (str, L_("cntrl")) && iscntrl ((UCHAR) *n))
295
|| (STREQ (str, L_("digit")) && isdigit ((UCHAR) *n))
296
|| (STREQ (str, L_("graph")) && isgraph ((UCHAR) *n))
297
|| (STREQ (str, L_("lower")) && islower ((UCHAR) *n))
298
|| (STREQ (str, L_("print")) && isprint ((UCHAR) *n))
299
|| (STREQ (str, L_("punct")) && ispunct ((UCHAR) *n))
300
|| (STREQ (str, L_("space")) && isspace ((UCHAR) *n))
301
|| (STREQ (str, L_("upper")) && isupper ((UCHAR) *n))
302
|| (STREQ (str, L_("xdigit")) && isxdigit ((UCHAR) *n)))
308
else if (c == L_('[') && *p == L_('='))
312
_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
313
const CHAR *startp = p;
325
if (c != L_('=') || p[1] != L_(']'))
335
if ((UCHAR) *n == str[0])
340
const int32_t *table;
341
# if WIDE_CHAR_VERSION
342
const int32_t *weights;
343
const int32_t *extra;
345
const unsigned char *weights;
346
const unsigned char *extra;
348
const int32_t *indirect;
350
const UCHAR *cp = (const UCHAR *) str;
352
/* This #include defines a local function! */
353
# if WIDE_CHAR_VERSION
354
# include <locale/weightwc.h>
356
# include <locale/weight.h>
359
# if WIDE_CHAR_VERSION
360
table = (const int32_t *)
361
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
362
weights = (const int32_t *)
363
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
364
extra = (const int32_t *)
365
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
366
indirect = (const int32_t *)
367
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
369
table = (const int32_t *)
370
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
371
weights = (const unsigned char *)
372
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
373
extra = (const unsigned char *)
374
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
375
indirect = (const int32_t *)
376
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
382
/* We found a table entry. Now see whether the
383
character we are currently at has the same
384
equivalance class value. */
385
int len = weights[idx & 0xffffff];
387
const UCHAR *np = (const UCHAR *) n;
389
idx2 = findidx (&np);
391
&& (idx >> 24) == (idx2 >> 24)
392
&& len == weights[idx2 & 0xffffff])
400
&& (weights[idx + 1 + cnt]
401
== weights[idx2 + 1 + cnt]))
413
else if (c == L_('\0'))
414
/* [ (unterminated) loses. */
418
bool is_range = false;
421
bool is_seqval = false;
423
if (c == L_('[') && *p == L_('.'))
426
_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
427
const CHAR *startp = p;
433
if (c == L_('.') && p[1] == L_(']'))
443
/* We have to handling the symbols differently in
444
ranges since then the collation sequence is
446
is_range = *p == L_('-') && p[1] != L_('\0');
450
/* There are no names defined in the collation
451
data. Therefore we only accept the trivial
452
names consisting of the character itself. */
456
if (!is_range && *n == startp[1])
465
const int32_t *symb_table;
466
# ifdef WIDE_CHAR_VERSION
470
# define str (startp + 1)
472
const unsigned char *extra;
478
# ifdef WIDE_CHAR_VERSION
479
/* We have to convert the name to a single-byte
480
string. This is possible since the names
481
consist of ASCII characters and the internal
482
representation is UCS4. */
483
for (strcnt = 0; strcnt < c1; ++strcnt)
484
str[strcnt] = startp[1 + strcnt];
488
_NL_CURRENT_WORD (LC_COLLATE,
489
_NL_COLLATE_SYMB_HASH_SIZEMB);
490
symb_table = (const int32_t *)
491
_NL_CURRENT (LC_COLLATE,
492
_NL_COLLATE_SYMB_TABLEMB);
493
extra = (const unsigned char *)
494
_NL_CURRENT (LC_COLLATE,
495
_NL_COLLATE_SYMB_EXTRAMB);
497
/* Locate the character in the hashing table. */
498
hash = elem_hash (str, c1);
501
elem = hash % table_size;
502
if (symb_table[2 * elem] != 0)
504
second = hash % (table_size - 2) + 1;
508
/* First compare the hashing value. */
509
if (symb_table[2 * elem] == hash
511
== extra[symb_table[2 * elem + 1]])
513
&extra[symb_table[2 * elem
517
/* Yep, this is the entry. */
518
idx = symb_table[2 * elem + 1];
519
idx += 1 + extra[idx];
526
while (symb_table[2 * elem] != 0);
529
if (symb_table[2 * elem] != 0)
531
/* Compare the byte sequence but only if
532
this is not part of a range. */
533
# ifdef WIDE_CHAR_VERSION
536
idx += 1 + extra[idx];
537
/* Adjust for the alignment. */
538
idx = (idx + 3) & ~3;
540
wextra = (int32_t *) &extra[idx + 4];
545
# ifdef WIDE_CHAR_VERSION
547
(int32_t) c1 < wextra[idx];
549
if (n[c1] != wextra[1 + c1])
552
if ((int32_t) c1 == wextra[idx])
555
for (c1 = 0; c1 < extra[idx]; ++c1)
556
if (n[c1] != extra[1 + c1])
559
if (c1 == extra[idx])
564
/* Get the collation sequence value. */
566
# ifdef WIDE_CHAR_VERSION
567
cold = wextra[1 + wextra[idx]];
569
/* Adjust for the alignment. */
570
idx += 1 + extra[idx];
571
idx = (idx + 3) & ~4;
572
cold = *((int32_t *) &extra[idx]);
579
/* No valid character. Match it as a
581
if (!is_range && *n == str[0])
598
/* We have to handling the symbols differently in
599
ranges since then the collation sequence is
601
is_range = (*p == L_('-') && p[1] != L_('\0')
604
if (!is_range && c == fn)
608
/* This is needed if we goto normal_bracket; from
609
outside of is_seqval's scope. */
617
if (c == L_('-') && *p != L_(']'))
620
/* We have to find the collation sequence
621
value for C. Collation sequence is nothing
622
we can regularly access. The sequence
623
value is defined by the order in which the
624
definitions of the collation values for the
625
various characters appear in the source
626
file. A strange concept, nowhere
632
# ifdef WIDE_CHAR_VERSION
633
/* Search in the `names' array for the characters. */
634
fcollseq = __collseq_table_lookup (collseq, fn);
635
if (fcollseq == ~((uint32_t) 0))
636
/* XXX We don't know anything about the character
637
we are supposed to match. This means we are
639
goto range_not_matched;
644
lcollseq = __collseq_table_lookup (collseq, cold);
646
fcollseq = collseq[fn];
647
lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
651
if (cend == L_('[') && *p == L_('.'))
654
_NL_CURRENT_WORD (LC_COLLATE,
656
const CHAR *startp = p;
662
if (c == L_('.') && p[1] == L_(']'))
674
/* There are no names defined in the
675
collation data. Therefore we only
676
accept the trivial names consisting
677
of the character itself. */
686
const int32_t *symb_table;
687
# ifdef WIDE_CHAR_VERSION
691
# define str (startp + 1)
693
const unsigned char *extra;
699
# ifdef WIDE_CHAR_VERSION
700
/* We have to convert the name to a single-byte
701
string. This is possible since the names
702
consist of ASCII characters and the internal
703
representation is UCS4. */
704
for (strcnt = 0; strcnt < c1; ++strcnt)
705
str[strcnt] = startp[1 + strcnt];
709
_NL_CURRENT_WORD (LC_COLLATE,
710
_NL_COLLATE_SYMB_HASH_SIZEMB);
711
symb_table = (const int32_t *)
712
_NL_CURRENT (LC_COLLATE,
713
_NL_COLLATE_SYMB_TABLEMB);
714
extra = (const unsigned char *)
715
_NL_CURRENT (LC_COLLATE,
716
_NL_COLLATE_SYMB_EXTRAMB);
718
/* Locate the character in the hashing
720
hash = elem_hash (str, c1);
723
elem = hash % table_size;
724
if (symb_table[2 * elem] != 0)
726
second = hash % (table_size - 2) + 1;
730
/* First compare the hashing value. */
731
if (symb_table[2 * elem] == hash
733
== extra[symb_table[2 * elem + 1]])
735
&extra[symb_table[2 * elem + 1]
738
/* Yep, this is the entry. */
739
idx = symb_table[2 * elem + 1];
740
idx += 1 + extra[idx];
747
while (symb_table[2 * elem] != 0);
750
if (symb_table[2 * elem] != 0)
752
/* Compare the byte sequence but only if
753
this is not part of a range. */
754
# ifdef WIDE_CHAR_VERSION
757
idx += 1 + extra[idx];
758
/* Adjust for the alignment. */
759
idx = (idx + 3) & ~4;
761
wextra = (int32_t *) &extra[idx + 4];
763
/* Get the collation sequence value. */
765
# ifdef WIDE_CHAR_VERSION
766
cend = wextra[1 + wextra[idx]];
768
/* Adjust for the alignment. */
769
idx += 1 + extra[idx];
770
idx = (idx + 3) & ~4;
771
cend = *((int32_t *) &extra[idx]);
774
else if (symb_table[2 * elem] != 0 && c1 == 1)
786
if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
788
if (cend == L_('\0'))
793
/* XXX It is not entirely clear to me how to handle
794
characters which are not mentioned in the
795
collation specification. */
797
# ifdef WIDE_CHAR_VERSION
798
lcollseq == 0xffffffff ||
800
lcollseq <= fcollseq)
802
/* We have to look at the upper bound. */
809
# ifdef WIDE_CHAR_VERSION
811
__collseq_table_lookup (collseq, cend);
812
if (hcollseq == ~((uint32_t) 0))
814
/* Hum, no information about the upper
815
bound. The matching succeeds if the
816
lower bound is matched exactly. */
817
if (lcollseq != fcollseq)
818
goto range_not_matched;
823
hcollseq = collseq[cend];
827
if (lcollseq <= hcollseq && fcollseq <= hcollseq)
830
# ifdef WIDE_CHAR_VERSION
834
/* We use a boring value comparison of the character
835
values. This is better than comparing using
836
`strcoll' since the latter would have surprising
837
and sometimes fatal consequences. */
840
if (!(flags & FNM_NOESCAPE) && cend == L_('\\'))
842
if (cend == L_('\0'))
846
if (cold <= fn && fn <= cend)
863
/* Skip the rest of the [...] that already matched. */
870
/* [... (unterminated) loses. */
873
if (!(flags & FNM_NOESCAPE) && c == L_('\\'))
877
/* XXX 1003.2d11 is unclear if this is right. */
880
else if (c == L_('[') && *p == L_(':'))
883
const CHAR *startp = p;
888
if (++c1 == CHAR_CLASS_MAX_LENGTH)
891
if (*p == L_(':') && p[1] == L_(']'))
894
if (c < L_('a') || c >= L_('z'))
903
else if (c == L_('[') && *p == L_('='))
909
if (c != L_('=') || p[1] != L_(']'))
914
else if (c == L_('[') && *p == L_('.'))
923
if (*p == L_('.') && p[1] == L_(']'))
930
while (c != L_(']'));
939
if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
943
res = EXT (c, p, n, string_end, no_leading_period, flags);
950
if (NO_LEADING_PERIOD (flags))
952
if (n == string_end || c != (UCHAR) *n)
955
new_no_leading_period = true;
961
if (n == string_end || c != FOLD ((UCHAR) *n))
965
no_leading_period = new_no_leading_period;
972
if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L_('/'))
973
/* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
982
END (const CHAR *pattern)
984
const CHAR *p = pattern;
987
if (*++p == L_('\0'))
988
/* This is an invalid pattern. */
990
else if (*p == L_('['))
992
/* Handle brackets special. */
993
if (posixly_correct == 0)
994
posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
996
/* Skip the not sign. We have to recognize it because of a possibly
998
if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1000
/* A leading ']' is recognized as such. */
1003
/* Skip over all characters of the list. */
1004
while (*p != L_(']'))
1005
if (*p++ == L_('\0'))
1006
/* This is no valid pattern. */
1009
else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1010
|| *p == L_('!')) && p[1] == L_('('))
1012
else if (*p == L_(')'))
1021
EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1022
bool no_leading_period, int flags)
1028
struct patternlist *next;
1031
struct patternlist **lastp = &list;
1032
size_t pattern_len = STRLEN (pattern);
1035
enum { ALLOCA_LIMIT = 8000 };
1037
/* Parse the pattern. Store the individual parts in the list. */
1039
for (startp = p = pattern + 1; ; ++p)
1041
/* This is an invalid pattern. */
1043
else if (*p == L_('['))
1045
/* Handle brackets special. */
1046
if (posixly_correct == 0)
1047
posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1049
/* Skip the not sign. We have to recognize it because of a possibly
1051
if (*++p == L_('!') || (posixly_correct < 0 && *p == L_('^')))
1053
/* A leading ']' is recognized as such. */
1056
/* Skip over all characters of the list. */
1057
while (*p != L_(']'))
1058
if (*p++ == L_('\0'))
1059
/* This is no valid pattern. */
1062
else if ((*p == L_('?') || *p == L_('*') || *p == L_('+') || *p == L_('@')
1063
|| *p == L_('!')) && p[1] == L_('('))
1064
/* Remember the nesting level. */
1066
else if (*p == L_(')'))
1070
/* This means we found the end of the pattern. */
1071
#define NEW_PATTERN \
1072
struct patternlist *newp; \
1077
plen = (opt == L_('?') || opt == L_('@') \
1079
: p - startp + 1UL); \
1080
plensize = plen * sizeof (CHAR); \
1081
newpsize = offsetof (struct patternlist, str) + plensize; \
1082
if ((size_t) -1 / sizeof (CHAR) < plen \
1083
|| newpsize < offsetof (struct patternlist, str) \
1084
|| ALLOCA_LIMIT <= newpsize) \
1086
newp = (struct patternlist *) alloca (newpsize); \
1087
*((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L_('\0'); \
1088
newp->next = NULL; \
1095
else if (*p == L_('|'))
1103
assert (list != NULL);
1104
assert (p[-1] == L_(')'));
1110
if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1117
for (rs = string; rs <= string_end; ++rs)
1118
/* First match the prefix with the current pattern with the
1120
if (FCT (list->str, string, rs, no_leading_period,
1121
flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0
1122
/* This was successful. Now match the rest with the rest
1124
&& (FCT (p, rs, string_end,
1127
: rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1128
flags & FNM_FILE_NAME
1129
? flags : flags & ~FNM_PERIOD) == 0
1130
/* This didn't work. Try the whole pattern. */
1132
&& FCT (pattern - 1, rs, string_end,
1135
: rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1136
flags & FNM_FILE_NAME
1137
? flags : flags & ~FNM_PERIOD) == 0)))
1138
/* It worked. Signal success. */
1141
while ((list = list->next) != NULL);
1143
/* None of the patterns lead to a match. */
1147
if (FCT (p, string, string_end, no_leading_period, flags) == 0)
1153
/* I cannot believe it but `strcat' is actually acceptable
1154
here. Match the entire string with the prefix from the
1155
pattern list and the rest of the pattern following the
1157
if (FCT (STRCAT (list->str, p), string, string_end,
1159
flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1160
/* It worked. Signal success. */
1162
while ((list = list->next) != NULL);
1164
/* None of the patterns lead to a match. */
1168
for (rs = string; rs <= string_end; ++rs)
1170
struct patternlist *runp;
1172
for (runp = list; runp != NULL; runp = runp->next)
1173
if (FCT (runp->str, string, rs, no_leading_period,
1174
flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD) == 0)
1177
/* If none of the patterns matched see whether the rest does. */
1179
&& (FCT (p, rs, string_end,
1182
: rs[-1] == '/' && NO_LEADING_PERIOD (flags),
1183
flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD)
1185
/* This is successful. */
1189
/* None of the patterns together with the rest of the pattern
1194
assert (! "Invalid extended matching operator");