249
/* Print the entries in DBFILE that match shell globbing pattern PATHPART.
259
lc_strcpy(char *dest, const char *src)
263
*dest++ = TOLOWER(*src);
272
uintmax_t items_accepted;
274
static struct locate_limits limits;
279
uintmax_t compressed_bytes;
280
uintmax_t total_filename_count;
281
uintmax_t total_filename_length;
282
uintmax_t whitespace_count;
283
uintmax_t newline_count;
284
uintmax_t highbit_filename_count;
286
static struct locate_stats statistics;
296
static struct stringbuf casebuf;
302
struct stringbuf *pbuf;
305
struct regular_expression
313
int c; /* An input byte. */
314
int count; /* The length of the prefix shared with the previous database entry. */
316
char *original_filename; /* The current input database entry. */
317
size_t pathsize; /* Amount allocated for it. */
318
char *munged_filename; /* path or base_name(path) */
319
FILE *fp; /* The pathname database. */
320
char *dbfile; /* Its name, or "<stdin>" */
321
/* for the old database format,
322
the first and second characters of the most common bigrams. */
328
typedef int (*visitfunc)(struct process_data *procdata,
335
struct visitor *next;
339
static struct visitor *inspectors = NULL;
340
static struct visitor *lastinspector = NULL;
341
static struct visitor *past_pat_inspector = NULL;
343
/* 0 or 1 pattern(s) */
345
process_simple(struct process_data *procdata)
347
int result = VISIT_CONTINUE;
348
const struct visitor *p = inspectors;
350
while ( ((VISIT_CONTINUE | VISIT_ACCEPTED) & result) && (NULL != p) )
352
result = (p->inspector)(procdata, p->context);
359
/* Accept if any pattern matches. */
361
process_or (struct process_data *procdata)
363
int result = VISIT_CONTINUE;
364
const struct visitor *p = inspectors;
366
while ( ((VISIT_CONTINUE | VISIT_REJECTED) & result) && (past_pat_inspector != p) )
368
result = (p->inspector)(procdata, p->context);
372
if (result == VISIT_CONTINUE)
373
result = VISIT_REJECTED;
374
if (result & (VISIT_ABORT | VISIT_REJECTED))
377
p = past_pat_inspector;
378
result = VISIT_CONTINUE;
380
while ( (VISIT_CONTINUE == result) && (NULL != p) )
382
result = (p->inspector)(procdata, p->context);
386
if (VISIT_CONTINUE == result)
387
return VISIT_ACCEPTED;
392
/* Accept if all pattern match. */
394
process_and (struct process_data *procdata)
396
int result = VISIT_CONTINUE;
397
const struct visitor *p = inspectors;
399
while ( ((VISIT_CONTINUE | VISIT_ACCEPTED) & result) && (past_pat_inspector != p) )
401
result = (p->inspector)(procdata, p->context);
405
if (result == VISIT_CONTINUE)
406
result = VISIT_REJECTED;
407
if (result & (VISIT_ABORT | VISIT_REJECTED))
410
p = past_pat_inspector;
411
result = VISIT_CONTINUE;
413
while ( (VISIT_CONTINUE == result) && (NULL != p) )
415
result = (p->inspector)(procdata, p->context);
419
if (VISIT_CONTINUE == result)
420
return VISIT_ACCEPTED;
425
typedef int (*processfunc)(struct process_data *procdata);
427
static processfunc mainprocessor = NULL;
430
add_visitor(visitfunc fn, void *context)
432
struct visitor *p = xmalloc(sizeof(struct visitor));
434
p->context = context;
437
if (NULL == lastinspector)
439
lastinspector = inspectors = p;
443
lastinspector->next = p;
451
visit_justprint_quoted(struct process_data *procdata, void *context)
454
print_quoted (stdout, quote_opts, stdout_is_a_tty,
456
procdata->original_filename);
458
return VISIT_CONTINUE;
462
visit_justprint_unquoted(struct process_data *procdata, void *context)
465
fputs(procdata->original_filename, stdout);
467
return VISIT_CONTINUE;
471
visit_old_format(struct process_data *procdata, void *context)
476
/* Get the offset in the path where this path info starts. */
477
if (procdata->c == LOCATEDB_OLD_ESCAPE)
478
procdata->count += getw (procdata->fp) - LOCATEDB_OLD_OFFSET;
480
procdata->count += procdata->c - LOCATEDB_OLD_OFFSET;
482
/* Overlay the old path with the remainder of the new. */
483
for (s = procdata->original_filename + procdata->count;
484
(procdata->c = getc (procdata->fp)) > LOCATEDB_OLD_ESCAPE;)
485
if (procdata->c < 0200)
486
*s++ = procdata->c; /* An ordinary character. */
489
/* Bigram markers have the high bit set. */
491
*s++ = procdata->bigram1[procdata->c];
492
*s++ = procdata->bigram2[procdata->c];
496
procdata->munged_filename = procdata->original_filename;
498
return VISIT_CONTINUE;
503
visit_locate02_format(struct process_data *procdata, void *context)
509
if (procdata->c == LOCATEDB_ESCAPE)
510
procdata->count += (short)get_short (procdata->fp);
511
else if (procdata->c > 127)
512
procdata->count += procdata->c - 256;
514
procdata->count += procdata->c;
516
if (procdata->count > procdata->len || procdata->count < 0)
518
/* This should not happen generally , but since we're
519
* reading in data which is outside our control, we
522
error(1, 0, _("locate database `%s' is corrupt or invalid"), procdata->dbfile);
525
/* Overlay the old path with the remainder of the new. */
526
nread = locate_read_str (&procdata->original_filename, &procdata->pathsize,
527
procdata->fp, 0, procdata->count);
530
procdata->c = getc (procdata->fp);
531
procdata->len = procdata->count + nread;
532
s = procdata->original_filename + procdata->len - 1; /* Move to the last char in path. */
533
assert (s[0] != '\0');
534
assert (s[1] == '\0'); /* Our terminator. */
535
assert (s[2] == '\0'); /* Added by locate_read_str. */
537
procdata->munged_filename = procdata->original_filename;
539
return VISIT_CONTINUE;
543
visit_basename(struct process_data *procdata, void *context)
546
procdata->munged_filename = base_name(procdata->original_filename);
548
return VISIT_CONTINUE;
553
visit_casefold(struct process_data *procdata, void *context)
555
struct stringbuf *b = context;
557
if (*b->preqlen+1 > b->buffersize)
559
b->buffer = xrealloc(b->buffer, *b->preqlen+1); /* XXX: consider using extendbuf(). */
560
b->buffersize = *b->preqlen+1;
562
lc_strcpy(b->buffer, procdata->munged_filename);
564
return VISIT_CONTINUE;
567
/* visit_existing_follow implements -L -e */
569
visit_existing_follow(struct process_data *procdata, void *context)
574
/* munged_filename has been converted in some way (to lower case,
575
* or is just the base name of the file), and original_filename has not.
576
* Hence only original_filename is still actually the name of the file
577
* whose existence we would need to check.
579
if (stat(procdata->original_filename, &st) != 0)
581
return VISIT_REJECTED;
585
return VISIT_CONTINUE;
589
/* visit_non_existing_follow implements -L -E */
591
visit_non_existing_follow(struct process_data *procdata, void *context)
596
/* munged_filename has been converted in some way (to lower case,
597
* or is just the base name of the file), and original_filename has not.
598
* Hence only original_filename is still actually the name of the file
599
* whose existence we would need to check.
601
if (stat(procdata->original_filename, &st) == 0)
603
return VISIT_REJECTED;
607
return VISIT_CONTINUE;
611
/* visit_existing_nofollow implements -P -e */
613
visit_existing_nofollow(struct process_data *procdata, void *context)
618
/* munged_filename has been converted in some way (to lower case,
619
* or is just the base name of the file), and original_filename has not.
620
* Hence only original_filename is still actually the name of the file
621
* whose existence we would need to check.
623
if (lstat(procdata->original_filename, &st) != 0)
625
return VISIT_REJECTED;
629
return VISIT_CONTINUE;
633
/* visit_non_existing_nofollow implements -P -E */
635
visit_non_existing_nofollow(struct process_data *procdata, void *context)
640
/* munged_filename has been converted in some way (to lower case,
641
* or is just the base name of the file), and original_filename has not.
642
* Hence only original_filename is still actually the name of the file
643
* whose existence we would need to check.
645
if (lstat(procdata->original_filename, &st) == 0)
647
return VISIT_REJECTED;
651
return VISIT_CONTINUE;
656
visit_substring_match_nocasefold(struct process_data *procdata, void *context)
658
const char *pattern = context;
660
if (NULL != strstr(procdata->munged_filename, pattern))
661
return VISIT_ACCEPTED;
663
return VISIT_REJECTED;
667
visit_substring_match_casefold(struct process_data *procdata, void *context)
669
const struct casefolder * p = context;
670
const struct stringbuf * b = p->pbuf;
673
if (NULL != strstr(b->buffer, p->pattern))
674
return VISIT_ACCEPTED;
676
return VISIT_REJECTED;
681
visit_globmatch_nofold(struct process_data *procdata, void *context)
683
const char *glob = context;
684
if (fnmatch(glob, procdata->munged_filename, 0) != 0)
685
return VISIT_REJECTED;
687
return VISIT_ACCEPTED;
692
visit_globmatch_casefold(struct process_data *procdata, void *context)
694
const char *glob = context;
695
if (fnmatch(glob, procdata->munged_filename, FNM_CASEFOLD) != 0)
696
return VISIT_REJECTED;
698
return VISIT_ACCEPTED;
703
visit_regex(struct process_data *procdata, void *context)
705
struct regular_expression *p = context;
707
if (0 == regexec(&p->re, procdata->munged_filename, 0u, NULL, 0))
708
return VISIT_ACCEPTED; /* match */
710
return VISIT_REJECTED; /* no match */
715
visit_stats(struct process_data *procdata, void *context)
717
struct locate_stats *p = context;
718
size_t len = strlen(procdata->original_filename);
720
int highbit, whitespace, newline;
722
++(p->total_filename_count);
723
p->total_filename_length += len;
725
highbit = whitespace = newline = 0;
726
for (s=procdata->original_filename; *s; ++s)
728
if ( (int)(*s) & 128 )
732
newline = whitespace = 1;
734
else if (isspace((unsigned char)*s))
741
++(p->highbit_filename_count);
743
++(p->whitespace_count);
745
++(p->newline_count);
747
return VISIT_CONTINUE;
751
/* Emit the statistics.
754
print_stats(int argc, size_t database_file_size)
756
char hbuf[LONGEST_HUMAN_READABLE + 1];
758
printf(_("Locate database size: %s bytes\n"),
759
human_readable ((uintmax_t) database_file_size,
760
hbuf, human_ceiling, 1, 1));
762
printf(_("Filenames: %s "),
763
human_readable (statistics.total_filename_count,
764
hbuf, human_ceiling, 1, 1));
765
printf(_("with a cumulative length of %s bytes"),
766
human_readable (statistics.total_filename_length,
767
hbuf, human_ceiling, 1, 1));
769
printf(_("\n\tof which %s contain whitespace, "),
770
human_readable (statistics.whitespace_count,
771
hbuf, human_ceiling, 1, 1));
772
printf(_("\n\t%s contain newline characters, "),
773
human_readable (statistics.newline_count,
774
hbuf, human_ceiling, 1, 1));
775
printf(_("\n\tand %s contain characters with the high bit set.\n"),
776
human_readable (statistics.highbit_filename_count,
777
hbuf, human_ceiling, 1, 1));
780
printf(_("Compression ratio %4.2f%%\n"),
781
100.0 * ((double)statistics.total_filename_length
782
- (double) database_file_size)
783
/ (double) statistics.total_filename_length);
788
/* Print the entries in DBFILE that match shell globbing patterns in ARGV.
250
789
Return the number of entries printed. */
253
locate (pathpart, dbfile, ignore_case)
254
char *pathpart, *dbfile;
799
struct locate_limits *plimit,
257
/* The pathname database. */
261
/* Number of bytes read from an entry. */
264
/* true if PATHPART contains globbing metacharacters. */
266
/* The end of the last glob-free subpattern in PATHPART. */
269
/* The current input database entry. */
271
/* Amount allocated for it. */
274
/* The length of the prefix shared with the previous database entry. */
276
/* Where in `path' to stop the backward search for the last character
277
in the subpattern. Set according to `count'. */
280
/* true if we found a fast match (of patend) on the previous path. */
281
boolean prev_fast_match = false;
282
/* The return value. */
285
/* true if reading a bigram-encoded database. */
286
boolean old_format = false;
287
/* For the old database format,
288
the first and second characters of the most common bigrams. */
289
char bigram1[128], bigram2[128];
804
char *pathpart; /* A pattern to consider. */
805
int argn; /* Index to current pattern in argv. */
806
int need_fold; /* Set when folding and any pattern is non-glob. */
807
int nread; /* number of bytes read from an entry. */
808
struct process_data procdata; /* Storage for data shared with visitors. */
810
int old_format = 0; /* true if reading a bigram-encoded database. */
811
static bool did_stdin = false; /* Set to prevent rereading stdin. */
812
struct visitor* pvis; /* temp for determining past_pat_inspector. */
291
814
/* To check the age of the database. */
295
if (stat (dbfile, &st) || (fp = fopen (dbfile, "r")) == NULL)
297
error (0, errno, "%s", dbfile);
301
if (now - st.st_mtime > WARN_SECONDS)
304
warning: database `fred' is more than 8 days old */
305
error (0, 0, _("warning: database `%s' is more than %d %s old"),
306
dbfile, WARN_NUMBER_UNITS, _(warn_name_units));
309
pathsize = 1026; /* Increased as necessary by locate_read_str. */
310
path = xmalloc (pathsize);
312
nread = fread (path, 1, sizeof (LOCATEDB_MAGIC), fp);
819
procdata.len = procdata.count = 0;
820
if (!strcmp (dbfile, "-"))
824
error (0, 0, _("warning: the locate database can only be read from stdin once."));
829
procdata.dbfile = "<stdin>";
835
if (stat (dbfile, &st) || (procdata.fp = fopen (dbfile, "r")) == NULL)
837
error (0, errno, "%s", dbfile);
841
if (now - st.st_mtime > WARN_SECONDS)
844
warning: database `fred' is more than 8 days old */
845
error (0, 0, _("warning: database `%s' is more than %d %s old"),
846
dbfile, WARN_NUMBER_UNITS, _(warn_name_units));
848
procdata.dbfile = dbfile;
851
procdata.pathsize = 1026; /* Increased as necessary by locate_read_str. */
852
procdata.original_filename = xmalloc (procdata.pathsize);
854
nread = fread (procdata.original_filename, 1, sizeof (LOCATEDB_MAGIC),
313
856
if (nread != sizeof (LOCATEDB_MAGIC)
314
|| memcmp (path, LOCATEDB_MAGIC, sizeof (LOCATEDB_MAGIC)))
857
|| memcmp (procdata.original_filename, LOCATEDB_MAGIC,
858
sizeof (LOCATEDB_MAGIC)))
317
861
/* Read the list of the most common bigrams in the database. */
862
nread = fread (procdata.original_filename + sizeof (LOCATEDB_MAGIC), 1,
863
256 - sizeof (LOCATEDB_MAGIC), procdata.fp);
319
864
for (i = 0; i < 128; i++)
321
bigram1[i] = getc (fp);
322
bigram2[i] = getc (fp);
327
/* If we ignore case,
328
convert it to lower first so we don't have to do it every time */
330
for (patend=pathpart;*patend;++patend){
331
*patend=TOLOWER(*patend);
336
globflag = strchr (pathpart, '*') || strchr (pathpart, '?')
337
|| strchr (pathpart, '[');
339
patend = last_literal_end (pathpart);
344
register char *s; /* Scan the path we read in. */
348
/* Get the offset in the path where this path info starts. */
349
if (c == LOCATEDB_OLD_ESCAPE)
350
count += getw (fp) - LOCATEDB_OLD_OFFSET;
352
count += c - LOCATEDB_OLD_OFFSET;
354
/* Overlay the old path with the remainder of the new. */
355
for (s = path + count; (c = getc (fp)) > LOCATEDB_OLD_ESCAPE;)
357
*s++ = c; /* An ordinary character. */
360
/* Bigram markers have the high bit set. */
369
if (c == LOCATEDB_ESCAPE)
370
count += get_short (fp);
376
/* Overlay the old path with the remainder of the new. */
377
nread = locate_read_str (&path, &pathsize, fp, 0, count);
866
procdata.bigram1[i] = procdata.original_filename[i << 1];
867
procdata.bigram2[i] = procdata.original_filename[(i << 1) + 1];
872
/* Set up the inspection regime */
874
lastinspector = NULL;
875
past_pat_inspector = NULL;
878
add_visitor(visit_old_format, NULL);
880
add_visitor(visit_locate02_format, NULL);
883
add_visitor(visit_basename, NULL);
885
/* See if we need fold. */
886
if (ignore_case && !regex)
887
for ( argn = 0; argn < argc; argn++ )
889
pathpart = argv[argn];
890
if (!contains_metacharacter(pathpart))
381
s = path + count + nread - 2; /* Move to the last char in path. */
382
assert (s[0] != '\0');
383
assert (s[1] == '\0'); /* Our terminator. */
384
assert (s[2] == '\0'); /* Added by locate_read_str. */
387
/* If the previous path matched, scan the whole path for the last
388
char in the subpattern. If not, the shared prefix doesn't match
389
the pattern, so don't scan it for the last char. */
390
cutoff = prev_fast_match ? path : path + count;
392
/* Search backward starting at the end of the path we just read in,
393
for the character at the end of the last glob-free subpattern
397
for (prev_fast_match = false; s >= cutoff; s--)
398
/* Fast first char check. */
399
if (TOLOWER(*s) == *patend)
401
char *s2; /* Scan the path we read in. */
402
register char *p2; /* Scan `patend'. */
404
for (s2 = s - 1, p2 = patend - 1; *p2 != '\0' && TOLOWER(*s2) == *p2;
409
/* Success on the fast match. Compare the whole pattern
410
if it contains globbing characters. */
411
prev_fast_match = true;
412
if (globflag == false || fnmatch (pathpart, path, FNM_CASEFOLD) == 0)
414
if (!check_existence || stat(path, &st) == 0)
426
for (prev_fast_match = false; s >= cutoff; s--)
427
/* Fast first char check. */
430
char *s2; /* Scan the path we read in. */
431
register char *p2; /* Scan `patend'. */
433
for (s2 = s - 1, p2 = patend - 1; *p2 != '\0' && *s2 == *p2;
438
/* Success on the fast match. Compare the whole pattern
439
if it contains globbing characters. */
440
prev_fast_match = true;
441
if (globflag == false || fnmatch (pathpart, path,
444
if (!check_existence || stat(path, &st) == 0)
459
error (0, errno, "%s", dbfile);
462
if (fclose (fp) == EOF)
464
error (0, errno, "%s", dbfile);
899
add_visitor(visit_casefold, &casebuf);
900
casebuf.preqlen = &procdata.pathsize;
901
casebuf.soffs = &procdata.count;
904
/* Add an inspector for each pattern we're looking for. */
905
for ( argn = 0; argn < argc; argn++ )
907
pathpart = argv[argn];
910
struct regular_expression *p = xmalloc(sizeof(*p));
911
int cflags = REG_EXTENDED | REG_NOSUB
912
| (ignore_case ? REG_ICASE : 0);
914
if (0 == regcomp(&p->re, pathpart, cflags))
916
add_visitor(visit_regex, p);
920
error (1, errno, "Invalid regular expression; %s", pathpart);
923
else if (contains_metacharacter(pathpart))
926
add_visitor(visit_globmatch_casefold, pathpart);
928
add_visitor(visit_globmatch_nofold, pathpart);
932
/* No glob characters used. Hence we match on
933
* _any part_ of the filename, not just the
934
* basename. This seems odd to me, but it is the
935
* traditional behaviour.
936
* James Youngman <jay@gnu.org>
940
struct casefolder * cf = xmalloc(sizeof(*cf));
941
cf->pattern = pathpart;
943
add_visitor(visit_substring_match_casefold, cf);
944
/* If we ignore case, convert it to lower now so we don't have to
947
lc_strcpy(pathpart, pathpart);
951
add_visitor(visit_substring_match_nocasefold, pathpart);
956
pvis = lastinspector;
958
/* We add visit_existing_*() as late as possible to reduce the
959
* number of stat() calls.
961
switch (check_existence)
963
case ACCEPT_EXISTING:
964
if (follow_symlinks) /* -L, default */
965
add_visitor(visit_existing_follow, NULL);
967
add_visitor(visit_existing_nofollow, NULL);
970
case ACCEPT_NON_EXISTING:
971
if (follow_symlinks) /* -L, default */
972
add_visitor(visit_non_existing_follow, NULL);
974
add_visitor(visit_non_existing_nofollow, NULL);
977
case ACCEPT_EITHER: /* Default, neither -E nor -e */
978
/* do nothing; no extra processing. */
983
add_visitor(visit_stats, &statistics);
987
if (print_quoted_filename)
988
add_visitor(visit_justprint_quoted, NULL);
990
add_visitor(visit_justprint_unquoted, NULL);
996
past_pat_inspector = pvis->next;
998
mainprocessor = process_and;
1000
mainprocessor = process_or;
1003
mainprocessor = process_simple;
1007
printf(_("Database %s is in the %s format.\n"),
1009
old_format ? _("old") : "LOCATE02");
1012
procdata.c = getc (procdata.fp);
1013
while ( (procdata.c != EOF) && (!use_limit || (plimit->limit > 0)) )
1016
/* If we are searching for filename patterns, the inspector list
1017
* will contain an entry for each pattern for which we are searching.
1019
if ((VISIT_ACCEPTED | VISIT_CONTINUE) & (mainprocessor)(&procdata))
1021
if ((++plimit->items_accepted >= plimit->limit) && use_limit)
1031
print_stats(argc, st.st_size);
1034
if (ferror (procdata.fp))
1036
error (0, errno, "%s", procdata.dbfile);
1039
if (procdata.fp != stdin && fclose (procdata.fp) == EOF)
1041
error (0, errno, "%s", dbfile);
1045
return plimit->items_accepted;
471
1051
extern char *version_string;
473
1053
/* The name this program was run with. */
474
1054
char *program_name;
477
usage (stream, status)
1057
usage (FILE *stream)
481
1059
fprintf (stream, _("\
482
Usage: %s [-d path | --database=path] [-e | --existing]\n\
483
[-i | --ignore-case] [--version] [--help] pattern...\n"),
1060
Usage: %s [-d path | --database=path] [-e | -E | --[non-]existing]\n\
1061
[-i | --ignore-case] [-w | --wholename] [-b | --basename] \n\
1062
[--limit=N | -l N] [-S | --statistics] [-0 | --null] [-c | --count]\n\
1063
[-P | -H | --nofollow] [-L | --follow] [-m | --mmap ] [ -s | --stdio ]\n\
1064
[-A | --all] [-p | --print] [-r | --regex ] [--version] [--help]\n\
485
fputs (_("\nReport bugs to <bug-findutils@gnu.org>."), stream);
1067
fputs (_("\nReport bugs to <bug-findutils@gnu.org>.\n"), stream);
489
1070
static struct option const longopts[] =
491
1072
{"database", required_argument, NULL, 'd'},
492
1073
{"existing", no_argument, NULL, 'e'},
1074
{"non-existing", no_argument, NULL, 'E'},
493
1075
{"ignore-case", no_argument, NULL, 'i'},
1076
{"all", no_argument, NULL, 'A'},
494
1077
{"help", no_argument, NULL, 'h'},
495
1078
{"version", no_argument, NULL, 'v'},
1079
{"null", no_argument, NULL, '0'},
1080
{"count", no_argument, NULL, 'c'},
1081
{"wholename", no_argument, NULL, 'w'},
1082
{"wholepath", no_argument, NULL, 'w'}, /* Synonym. */
1083
{"basename", no_argument, NULL, 'b'},
1084
{"print", no_argument, NULL, 'p'},
1085
{"stdio", no_argument, NULL, 's'},
1086
{"mmap", no_argument, NULL, 'm'},
1087
{"limit", required_argument, NULL, 'l'},
1088
{"regex", no_argument, NULL, 'r'},
1089
{"statistics", no_argument, NULL, 'S'},
1090
{"follow", no_argument, NULL, 'L'},
1091
{"nofollow", no_argument, NULL, 'P'},
496
1092
{NULL, no_argument, NULL, 0}
1096
main (int argc, char **argv)
505
int fnmatch_flags = 0;
1099
unsigned long int found = 0uL;
507
1101
int ignore_case = 0;
1104
int basename_only = 0;
509
1111
program_name = argv[0];
511
1113
#ifdef HAVE_SETLOCALE