1
/*************************************************
3
*************************************************/
5
/* This is a grep program that uses the PCRE regular expression library to do
6
its pattern matching. On a Unix or Win32 system it can recurse into
9
Copyright (c) 1997-2007 University of Cambridge
11
-----------------------------------------------------------------------------
12
Redistribution and use in source and binary forms, with or without
13
modification, are permitted provided that the following conditions are met:
15
* Redistributions of source code must retain the above copyright notice,
16
this list of conditions and the following disclaimer.
18
* Redistributions in binary form must reproduce the above copyright
19
notice, this list of conditions and the following disclaimer in the
20
documentation and/or other materials provided with the distribution.
22
* Neither the name of the University of Cambridge nor the names of its
23
contributors may be used to endorse or promote products derived from
24
this software without specific prior written permission.
26
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36
POSSIBILITY OF SUCH DAMAGE.
37
-----------------------------------------------------------------------------
51
#include <sys/types.h>
65
#define MAX_PATTERN_COUNT 100
68
#define MBUFTHIRD BUFSIZ
70
#define MBUFTHIRD 8192
73
/* Values for the "filenames" variable, which specifies options for file name
74
output. The order is important; it is assumed that a file name is wanted for
75
all values greater than FN_DEFAULT. */
77
enum { FN_NONE, FN_DEFAULT, FN_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
79
/* Actions for the -d and -D options */
81
enum { dee_READ, dee_SKIP, dee_RECURSE };
82
enum { DEE_READ, DEE_SKIP };
84
/* Actions for special processing options (flag bits) */
86
#define PO_WORD_MATCH 0x0001
87
#define PO_LINE_MATCH 0x0002
88
#define PO_FIXED_STRINGS 0x0004
90
/* Line ending types */
92
enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
96
/*************************************************
98
*************************************************/
100
/* Jeffrey Friedl has some debugging requirements that are not part of the
104
static int S_arg = -1;
105
static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
106
static unsigned int jfriedl_XT = 0; /* replicate text this many times */
107
static const char *jfriedl_prefix = "";
108
static const char *jfriedl_postfix = "";
111
static int endlinetype;
113
static char *colour_string = (char *)"1;31";
114
static char *colour_option = NULL;
115
static char *dee_option = NULL;
116
static char *DEE_option = NULL;
117
static char *newline = NULL;
118
static char *pattern_filename = NULL;
119
static char *stdin_name = (char *)"(standard input)";
120
static char *locale = NULL;
122
static const unsigned char *pcretables = NULL;
124
static int pattern_count = 0;
125
static pcre **pattern_list = NULL;
126
static pcre_extra **hints_list = NULL;
128
static char *include_pattern = NULL;
129
static char *exclude_pattern = NULL;
131
static pcre *include_compiled = NULL;
132
static pcre *exclude_compiled = NULL;
134
static int after_context = 0;
135
static int before_context = 0;
136
static int both_context = 0;
137
static int dee_action = dee_READ;
138
static int DEE_action = DEE_READ;
139
static int error_count = 0;
140
static int filenames = FN_DEFAULT;
141
static int process_options = 0;
143
static BOOL count_only = FALSE;
144
static BOOL do_colour = FALSE;
145
static BOOL hyphenpending = FALSE;
146
static BOOL invert = FALSE;
147
static BOOL multiline = FALSE;
148
static BOOL number = FALSE;
149
static BOOL only_matching = FALSE;
150
static BOOL quiet = FALSE;
151
static BOOL silent = FALSE;
152
static BOOL utf8 = FALSE;
154
/* Structure for options and list of them */
156
enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
159
typedef struct option_item {
163
const char *long_name;
164
const char *help_text;
167
/* Options without a single-letter equivalent get a negative value. This can be
168
used to identify them. */
170
#define N_COLOUR (-1)
171
#define N_EXCLUDE (-2)
173
#define N_INCLUDE (-4)
175
#define N_LOCALE (-6)
178
static option_item optionlist[] = {
179
{ OP_NODATA, N_NULL, NULL, "", " terminate options" },
180
{ OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
181
{ OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
182
{ OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
183
{ OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
184
{ OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
185
{ OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
186
{ OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
187
{ OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
188
{ OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
189
{ OP_PATLIST, 'e', NULL, "regex(p)", "specify pattern (may be used more than once)" },
190
{ OP_NODATA, 'F', NULL, "fixed_strings", "patterns are sets of newline-separated strings" },
191
{ OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
192
{ OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
193
{ OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
194
{ OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
195
{ OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
196
{ OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
197
{ OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
198
{ OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
199
{ OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
200
{ OP_STRING, 'N', &newline, "newline=type", "specify newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
201
{ OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
202
{ OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
203
{ OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
204
{ OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
205
{ OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
206
{ OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
208
{ OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
210
{ OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
211
{ OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
212
{ OP_NODATA, 'V', NULL, "version", "print version information and exit" },
213
{ OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
214
{ OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
215
{ OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
216
{ OP_NODATA, 0, NULL, NULL, NULL }
219
/* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
220
options. These set the 1, 2, and 4 bits in process_options, respectively. Note
221
that the combination of -w and -x has the same effect as -x on its own, so we
222
can treat them as the same. */
224
static const char *prefix[] = {
225
"", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
227
static const char *suffix[] = {
228
"", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
230
/* UTF-8 tables - used only when the newline setting is "any". */
232
const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
234
const char utf8_table4[] = {
235
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
236
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
237
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
238
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
242
/*************************************************
243
* OS-specific functions *
244
*************************************************/
246
/* These functions are defined so that they can be made system specific,
247
although at present the only ones are for Unix, Win32, and for "no support". */
250
/************* Directory scanning in Unix ***********/
252
#if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
253
#include <sys/types.h>
254
#include <sys/stat.h>
257
typedef DIR directory_type;
260
isdirectory(char *filename)
263
if (stat(filename, &statbuf) < 0)
264
return 0; /* In the expectation that opening as a file will fail */
265
return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
268
static directory_type *
269
opendirectory(char *filename)
271
return opendir(filename);
275
readdirectory(directory_type *dir)
279
struct dirent *dent = readdir(dir);
280
if (dent == NULL) return NULL;
281
if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
284
/* Control never reaches here */
288
closedirectory(directory_type *dir)
294
/************* Test for regular file in Unix **********/
297
isregfile(char *filename)
300
if (stat(filename, &statbuf) < 0)
301
return 1; /* In the expectation that opening as a file will fail */
302
return (statbuf.st_mode & S_IFMT) == S_IFREG;
306
/************* Test stdout for being a terminal in Unix **********/
311
return isatty(fileno(stdout));
315
/************* Directory scanning in Win32 ***********/
317
/* I (Philip Hazel) have no means of testing this code. It was contributed by
318
Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
319
when it did not exist. */
327
#ifndef WIN32_LEAN_AND_MEAN
328
# define WIN32_LEAN_AND_MEAN
330
#ifndef INVALID_FILE_ATTRIBUTES
331
#define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
336
typedef struct directory_type
340
WIN32_FIND_DATA data;
344
isdirectory(char *filename)
346
DWORD attr = GetFileAttributes(filename);
347
if (attr == INVALID_FILE_ATTRIBUTES)
349
return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
353
opendirectory(char *filename)
359
len = strlen(filename);
360
pattern = (char *) malloc(len + 3);
361
dir = (directory_type *) malloc(sizeof(*dir));
362
if ((pattern == NULL) || (dir == NULL))
364
fprintf(stderr, "pcregrep: malloc failed\n");
367
memcpy(pattern, filename, len);
368
memcpy(&(pattern[len]), "\\*", 3);
369
dir->handle = FindFirstFile(pattern, &(dir->data));
370
if (dir->handle != INVALID_HANDLE_VALUE)
376
err = GetLastError();
379
errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
384
readdirectory(directory_type *dir)
390
if (!FindNextFile(dir->handle, &(dir->data)))
397
if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
398
return dir->data.cFileName;
401
return NULL; /* Keep compiler happy; never executed */
406
closedirectory(directory_type *dir)
408
FindClose(dir->handle);
413
/************* Test for regular file in Win32 **********/
415
/* I don't know how to do this, or if it can be done; assume all paths are
416
regular if they are not directories. */
418
int isregfile(char *filename)
420
return !isdirectory(filename)
424
/************* Test stdout for being a terminal in Win32 **********/
426
/* I don't know how to do this; assume never */
435
/************* Directory scanning when we can't do it ***********/
437
/* The type is void, and apart from isdirectory(), the functions do nothing. */
441
typedef void directory_type;
443
int isdirectory(char *filename) { return 0; }
444
directory_type * opendirectory(char *filename) { return (directory_type*)0;}
445
char *readdirectory(directory_type *dir) { return (char*)0;}
446
void closedirectory(directory_type *dir) {}
449
/************* Test for regular when we can't do it **********/
451
/* Assume all files are regular. */
453
int isregfile(char *filename) { return 1; }
456
/************* Test stdout for being a terminal when we can't do it **********/
469
#ifndef HAVE_STRERROR
470
/*************************************************
471
* Provide strerror() for non-ANSI libraries *
472
*************************************************/
474
/* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
475
in their libraries, but can provide the same facility by this simple
476
alternative function. */
479
extern char *sys_errlist[];
484
if (n < 0 || n >= sys_nerr) return "unknown error number";
485
return sys_errlist[n];
487
#endif /* HAVE_STRERROR */
491
/*************************************************
493
*************************************************/
495
/* The length of the endline sequence that is found is set via lenptr. This may
496
be zero at the very end of the file if there is no line-ending sequence there.
499
p current position in line
500
endptr end of available data
501
lenptr where to put the length of the eol sequence
503
Returns: pointer to the last byte of the line
507
end_of_line(char *p, char *endptr, int *lenptr)
511
default: /* Just in case */
513
while (p < endptr && *p != '\n') p++;
523
while (p < endptr && *p != '\r') p++;
535
while (p < endptr && *p != '\r') p++;
553
register int c = *((unsigned char *)p);
555
if (utf8 && c >= 0xc0)
558
extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
560
c = (c & utf8_table3[extra]) << gcss;
561
for (gcii = 1; gcii <= extra; gcii++)
564
c |= (p[gcii] & 0x3f) << gcss;
577
if (p < endptr && *p == 0x0a)
588
} /* End of loop for ANYCRLF case */
590
*lenptr = 0; /* Must have hit the end */
597
register int c = *((unsigned char *)p);
599
if (utf8 && c >= 0xc0)
602
extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
604
c = (c & utf8_table3[extra]) << gcss;
605
for (gcii = 1; gcii <= extra; gcii++)
608
c |= (p[gcii] & 0x3f) << gcss;
623
if (p < endptr && *p == 0x0a)
632
*lenptr = utf8? 2 : 1;
635
case 0x2028: /* LS */
636
case 0x2029: /* PS */
643
} /* End of loop for ANY case */
645
*lenptr = 0; /* Must have hit the end */
647
} /* End of overall switch */
652
/*************************************************
653
* Find start of previous line *
654
*************************************************/
656
/* This is called when looking back for before lines to print.
659
p start of the subsequent line
660
startptr start of available data
662
Returns: pointer to the start of the previous line
666
previous_line(char *p, char *startptr)
670
default: /* Just in case */
673
while (p > startptr && p[-1] != '\n') p--;
678
while (p > startptr && p[-1] != '\n') p--;
685
while (p > startptr && p[-1] != '\n') p--;
686
if (p <= startptr + 1 || p[-2] == '\r') return p;
688
return p; /* But control should never get here */
692
if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
693
if (utf8) while ((*p & 0xc0) == 0x80) p--;
703
while ((*pp & 0xc0) == 0x80) pp--;
704
c = *((unsigned char *)pp);
708
extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
710
c = (c & utf8_table3[extra]) << gcss;
711
for (gcii = 1; gcii <= extra; gcii++)
714
c |= (pp[gcii] & 0x3f) << gcss;
718
else c = *((unsigned char *)pp);
720
if (endlinetype == EL_ANYCRLF) switch (c)
737
case 0x2028: /* LS */
738
case 0x2029: /* PS */
745
p = pp; /* Back one character */
746
} /* End of loop for ANY case */
748
return startptr; /* Hit start of data */
749
} /* End of overall switch */
756
/*************************************************
757
* Print the previous "after" lines *
758
*************************************************/
760
/* This is called if we are about to lose said lines because of buffer filling,
761
and at the end of the file. The data in the line is written using fwrite() so
762
that a binary zero does not terminate it.
765
lastmatchnumber the number of the last matching line, plus one
766
lastmatchrestart where we restarted after the last match
767
endptr end of available data
768
printname filename for printing
773
static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
774
char *endptr, char *printname)
776
if (after_context > 0 && lastmatchnumber > 0)
779
while (lastmatchrestart < endptr && count++ < after_context)
782
char *pp = lastmatchrestart;
783
if (printname != NULL) fprintf(stdout, "%s-", printname);
784
if (number) fprintf(stdout, "%d-", lastmatchnumber++);
785
pp = end_of_line(pp, endptr, &ellength);
786
fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
787
lastmatchrestart = pp;
789
hyphenpending = TRUE;
795
/*************************************************
796
* Grep an individual file *
797
*************************************************/
799
/* This is called from grep_or_recurse() below. It uses a buffer that is three
800
times the value of MBUFTHIRD. The matching point is never allowed to stray into
801
the top third of the buffer, thus keeping more of the file available for
802
context printing or for multiline scanning. For large files, the pointer will
803
be in the middle third most of the time, so the bottom third is available for
804
"before" context printing.
807
in the fopened FILE stream
808
printname the file name if it is to be printed for each match
809
or NULL if the file name is not to be printed
810
it cannot be NULL if filenames[_nomatch]_only is set
812
Returns: 0 if there was at least one match
813
1 otherwise (no matches)
817
pcregrep(FILE *in, char *printname)
821
int lastmatchnumber = 0;
824
char *lastmatchrestart = NULL;
825
char buffer[3*MBUFTHIRD];
829
BOOL endhyphenpending = FALSE;
831
/* Do the first read into the start of the buffer and set up the pointer to
832
end of what we have. */
834
bufflength = fread(buffer, 1, 3*MBUFTHIRD, in);
835
endptr = buffer + bufflength;
837
/* Loop while the current pointer is not at the end of the file. For large
838
files, endptr will be at the end of the buffer when we are in the middle of the
839
file, but ptr will never get there, because as soon as it gets over 2/3 of the
840
way, the buffer is shifted left and re-filled. */
844
int i, endlinelength;
848
size_t length, linelength;
850
/* At this point, ptr is at the start of a line. We need to find the length
851
of the subject string to pass to pcre_exec(). In multiline mode, it is the
852
length remainder of the data in the buffer. Otherwise, it is the length of
853
the next line. After matching, we always advance by the length of the next
854
line. In multiline mode the PCRE_FIRSTLINE option is used for compiling, so
855
that any match is constrained to be in the first line. */
857
t = end_of_line(t, endptr, &endlinelength);
858
linelength = t - ptr - endlinelength;
859
length = multiline? (size_t)(endptr - ptr) : linelength;
861
/* Extra processing for Jeffrey Friedl's debugging. */
864
if (jfriedl_XT || jfriedl_XR)
866
#include <sys/time.h>
868
struct timeval start_time, end_time;
869
struct timezone dummy;
873
unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
874
const char *orig = ptr;
875
ptr = malloc(newlen + 1);
877
printf("out of memory");
881
strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
882
for (i = 0; i < jfriedl_XT; i++) {
883
strncpy(endptr, orig, length);
886
strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
890
if (gettimeofday(&start_time, &dummy) != 0)
891
perror("bad gettimeofday");
894
for (i = 0; i < jfriedl_XR; i++)
895
match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0, 0, offsets, 99) >= 0);
897
if (gettimeofday(&end_time, &dummy) != 0)
898
perror("bad gettimeofday");
900
double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
902
(start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
904
printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
910
/* Run through all the patterns until one matches. Note that we don't include
911
the final newline in the subject string. */
913
for (i = 0; i < pattern_count; i++)
915
mrc = pcre_exec(pattern_list[i], hints_list[i], ptr, length, 0, 0,
917
if (mrc >= 0) { match = TRUE; break; }
918
if (mrc != PCRE_ERROR_NOMATCH)
920
fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", mrc);
921
if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
922
fprintf(stderr, "this line:\n");
923
fwrite(ptr, 1, linelength, stderr); /* In case binary zero included */
924
fprintf(stderr, "\n");
925
if (error_count == 0 &&
926
(mrc == PCRE_ERROR_MATCHLIMIT || mrc == PCRE_ERROR_RECURSIONLIMIT))
928
fprintf(stderr, "pcregrep: error %d means that a resource limit "
929
"was exceeded\n", mrc);
930
fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
932
if (error_count++ > 20)
934
fprintf(stderr, "pcregrep: too many errors - abandoned\n");
937
match = invert; /* No more matching; don't show the line again */
942
/* If it's a match or a not-match (as required), do what's wanted. */
946
BOOL hyphenprinted = FALSE;
948
/* We've failed if we want a file that doesn't have any matches. */
950
if (filenames == FN_NOMATCH_ONLY) return 1;
952
/* Just count if just counting is wanted. */
954
if (count_only) count++;
956
/* If all we want is a file name, there is no need to scan any more lines
959
else if (filenames == FN_ONLY)
961
fprintf(stdout, "%s\n", printname);
965
/* Likewise, if all we want is a yes/no answer. */
967
else if (quiet) return 0;
969
/* The --only-matching option prints just the substring that matched, and
970
does not pring any context. */
972
else if (only_matching)
974
if (printname != NULL) fprintf(stdout, "%s:", printname);
975
if (number) fprintf(stdout, "%d:", linenumber);
976
fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
977
fprintf(stdout, "\n");
980
/* This is the default case when none of the above options is set. We print
981
the matching lines(s), possibly preceded and/or followed by other lines of
986
/* See if there is a requirement to print some "after" lines from a
987
previous match. We never print any overlaps. */
989
if (after_context > 0 && lastmatchnumber > 0)
993
char *p = lastmatchrestart;
995
while (p < ptr && linecount < after_context)
997
p = end_of_line(p, ptr, &ellength);
1001
/* It is important to advance lastmatchrestart during this printing so
1002
that it interacts correctly with any "before" printing below. Print
1003
each line's data using fwrite() in case there are binary zeroes. */
1005
while (lastmatchrestart < p)
1007
char *pp = lastmatchrestart;
1008
if (printname != NULL) fprintf(stdout, "%s-", printname);
1009
if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1010
pp = end_of_line(pp, endptr, &ellength);
1011
fwrite(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1012
lastmatchrestart = pp;
1014
if (lastmatchrestart != ptr) hyphenpending = TRUE;
1017
/* If there were non-contiguous lines printed above, insert hyphens. */
1021
fprintf(stdout, "--\n");
1022
hyphenpending = FALSE;
1023
hyphenprinted = TRUE;
1026
/* See if there is a requirement to print some "before" lines for this
1027
match. Again, don't print overlaps. */
1029
if (before_context > 0)
1034
while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1035
linecount < before_context)
1038
p = previous_line(p, buffer);
1041
if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1042
fprintf(stdout, "--\n");
1048
if (printname != NULL) fprintf(stdout, "%s-", printname);
1049
if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1050
pp = end_of_line(pp, endptr, &ellength);
1051
fwrite(p, 1, pp - p, stdout);
1056
/* Now print the matching line(s); ensure we set hyphenpending at the end
1057
of the file if any context lines are being output. */
1059
if (after_context > 0 || before_context > 0)
1060
endhyphenpending = TRUE;
1062
if (printname != NULL) fprintf(stdout, "%s:", printname);
1063
if (number) fprintf(stdout, "%d:", linenumber);
1065
/* In multiline mode, we want to print to the end of the line in which
1066
the end of the matched string is found, so we adjust linelength and the
1067
line number appropriately, but only when there actually was a match
1068
(invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1069
the match will always be before the first newline sequence. */
1074
char *endmatch = ptr;
1077
endmatch += offsets[1];
1079
while (t < endmatch)
1081
t = end_of_line(t, endptr, &ellength);
1082
if (t <= endmatch) linenumber++; else break;
1085
endmatch = end_of_line(endmatch, endptr, &ellength);
1086
linelength = endmatch - ptr - ellength;
1089
/*** NOTE: Use only fwrite() to output the data line, so that binary
1090
zeroes are treated as just another data character. */
1092
/* This extra option, for Jeffrey Friedl's debugging requirements,
1093
replaces the matched string, or a specific captured string if it exists,
1094
with X. When this happens, colouring is ignored. */
1096
#ifdef JFRIEDL_DEBUG
1097
if (S_arg >= 0 && S_arg < mrc)
1099
int first = S_arg * 2;
1100
int last = first + 1;
1101
fwrite(ptr, 1, offsets[first], stdout);
1102
fprintf(stdout, "X");
1103
fwrite(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1108
/* We have to split the line(s) up if colouring. */
1112
fwrite(ptr, 1, offsets[0], stdout);
1113
fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1114
fwrite(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1115
fprintf(stdout, "%c[00m", 0x1b);
1116
fwrite(ptr + offsets[1], 1, (linelength + endlinelength) - offsets[1],
1119
else fwrite(ptr, 1, linelength + endlinelength, stdout);
1122
/* End of doing what has to be done for a match */
1124
rc = 0; /* Had some success */
1126
/* Remember where the last match happened for after_context. We remember
1127
where we are about to restart, and that line's number. */
1129
lastmatchrestart = ptr + linelength + endlinelength;
1130
lastmatchnumber = linenumber + 1;
1133
/* For a match in multiline inverted mode (which of course did not cause
1134
anything to be printed), we have to move on to the end of the match before
1137
if (multiline && invert && match)
1140
char *endmatch = ptr + offsets[1];
1142
while (t < endmatch)
1144
t = end_of_line(t, endptr, &ellength);
1145
if (t <= endmatch) linenumber++; else break;
1147
endmatch = end_of_line(endmatch, endptr, &ellength);
1148
linelength = endmatch - ptr - ellength;
1151
/* Advance to after the newline and increment the line number. */
1153
ptr += linelength + endlinelength;
1156
/* If we haven't yet reached the end of the file (the buffer is full), and
1157
the current point is in the top 1/3 of the buffer, slide the buffer down by
1158
1/3 and refill it. Before we do this, if some unprinted "after" lines are
1159
about to be lost, print them. */
1161
if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1163
if (after_context > 0 &&
1164
lastmatchnumber > 0 &&
1165
lastmatchrestart < buffer + MBUFTHIRD)
1167
do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1168
lastmatchnumber = 0;
1171
/* Now do the shuffle */
1173
memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1175
bufflength = 2*MBUFTHIRD + fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in);
1176
endptr = buffer + bufflength;
1178
/* Adjust any last match point */
1180
if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1182
} /* Loop through the whole file */
1184
/* End of file; print final "after" lines if wanted; do_after_lines sets
1185
hyphenpending if it prints something. */
1187
if (!only_matching && !count_only)
1189
do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1190
hyphenpending |= endhyphenpending;
1193
/* Print the file name if we are looking for those without matches and there
1194
were none. If we found a match, we won't have got this far. */
1196
if (filenames == FN_NOMATCH_ONLY)
1198
fprintf(stdout, "%s\n", printname);
1202
/* Print the match count if wanted */
1206
if (printname != NULL) fprintf(stdout, "%s:", printname);
1207
fprintf(stdout, "%d\n", count);
1215
/*************************************************
1216
* Grep a file or recurse into a directory *
1217
*************************************************/
1219
/* Given a path name, if it's a directory, scan all the files if we are
1220
recursing; if it's a file, grep it.
1223
pathname the path to investigate
1224
dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1225
only_one_at_top TRUE if the path is the only one at toplevel
1227
Returns: 0 if there was at least one match
1228
1 if there were no matches
1229
2 there was some kind of error
1231
However, file opening failures are suppressed if "silent" is set.
1235
grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1241
/* If the file name is "-" we scan stdin */
1243
if (strcmp(pathname, "-") == 0)
1245
return pcregrep(stdin,
1246
(filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1251
/* If the file is a directory, skip if skipping or if we are recursing, scan
1252
each file within it, subject to any include or exclude patterns that were set.
1253
The scanning code is localized so it can be made system-specific. */
1255
if ((sep = isdirectory(pathname)) != 0)
1257
if (dee_action == dee_SKIP) return 1;
1258
if (dee_action == dee_RECURSE)
1262
directory_type *dir = opendirectory(pathname);
1267
fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1272
while ((nextfile = readdirectory(dir)) != NULL)
1275
sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1276
blen = strlen(buffer);
1278
if (exclude_compiled != NULL &&
1279
pcre_exec(exclude_compiled, NULL, buffer, blen, 0, 0, NULL, 0) >= 0)
1282
if (include_compiled != NULL &&
1283
pcre_exec(include_compiled, NULL, buffer, blen, 0, 0, NULL, 0) < 0)
1286
frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1287
if (frc > 1) rc = frc;
1288
else if (frc == 0 && rc == 1) rc = 0;
1291
closedirectory(dir);
1296
/* If the file is not a directory and not a regular file, skip it if that's
1299
else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1301
/* Control reaches here if we have a regular file, or if we have a directory
1302
and recursion or skipping was not requested, or if we have anything else and
1303
skipping was not requested. The scan proceeds. If this is the first and only
1304
argument at top level, we don't show the file name, unless we are only showing
1305
the file name, or the filename was forced (-H). */
1307
in = fopen(pathname, "r");
1311
fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1316
rc = pcregrep(in, (filenames > FN_DEFAULT ||
1317
(filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1326
/*************************************************
1328
*************************************************/
1334
fprintf(stderr, "Usage: pcregrep [-");
1335
for (op = optionlist; op->one_char != 0; op++)
1337
if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1339
fprintf(stderr, "] [long options] [pattern] [files]\n");
1340
fprintf(stderr, "Type `pcregrep --help' for more information.\n");
1347
/*************************************************
1349
*************************************************/
1356
printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1357
printf("Search for PATTERN in each FILE or standard input.\n");
1358
printf("PATTERN must be present if neither -e nor -f is used.\n");
1359
printf("\"-\" can be used as a file name to mean STDIN.\n\n");
1360
printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1362
printf("Options:\n");
1364
for (op = optionlist; op->one_char != 0; op++)
1368
if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1369
printf(" %s --%s%n", s, op->long_name, &n);
1372
printf("%.*s%s\n", n, " ", op->help_text);
1375
printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1376
printf("trailing white space is removed and blank lines are ignored.\n");
1377
printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1379
printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1380
printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1386
/*************************************************
1387
* Handle a single-letter, no data option *
1388
*************************************************/
1391
handle_option(int letter, int options)
1395
case N_HELP: help(); exit(0);
1396
case 'c': count_only = TRUE; break;
1397
case 'F': process_options |= PO_FIXED_STRINGS; break;
1398
case 'H': filenames = FN_FORCE; break;
1399
case 'h': filenames = FN_NONE; break;
1400
case 'i': options |= PCRE_CASELESS; break;
1401
case 'l': filenames = FN_ONLY; break;
1402
case 'L': filenames = FN_NOMATCH_ONLY; break;
1403
case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1404
case 'n': number = TRUE; break;
1405
case 'o': only_matching = TRUE; break;
1406
case 'q': quiet = TRUE; break;
1407
case 'r': dee_action = dee_RECURSE; break;
1408
case 's': silent = TRUE; break;
1409
case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1410
case 'v': invert = TRUE; break;
1411
case 'w': process_options |= PO_WORD_MATCH; break;
1412
case 'x': process_options |= PO_LINE_MATCH; break;
1415
fprintf(stderr, "pcregrep version %s\n", pcre_version());
1420
fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1430
/*************************************************
1431
* Construct printed ordinal *
1432
*************************************************/
1434
/* This turns a number into "1st", "3rd", etc. */
1439
static char buffer[8];
1441
sprintf(p, "%d", n);
1442
while (*p != 0) p++;
1445
case 1: strcpy(p, "st"); break;
1446
case 2: strcpy(p, "nd"); break;
1447
case 3: strcpy(p, "rd"); break;
1448
default: strcpy(p, "th"); break;
1455
/*************************************************
1456
* Compile a single pattern *
1457
*************************************************/
1459
/* When the -F option has been used, this is called for each substring.
1460
Otherwise it's called for each supplied pattern.
1463
pattern the pattern string
1464
options the PCRE options
1465
filename the file name, or NULL for a command-line pattern
1466
count 0 if this is the only command line pattern, or
1467
number of the command line pattern, or
1468
linenumber for a pattern from a file
1470
Returns: TRUE on success, FALSE after an error
1474
compile_single_pattern(char *pattern, int options, char *filename, int count)
1476
char buffer[MBUFTHIRD + 16];
1480
if (pattern_count >= MAX_PATTERN_COUNT)
1482
fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1483
(filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1487
sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1488
suffix[process_options]);
1489
pattern_list[pattern_count] =
1490
pcre_compile(buffer, options, &error, &errptr, pcretables);
1491
if (pattern_list[pattern_count] != NULL)
1497
/* Handle compile errors */
1499
errptr -= (int)strlen(prefix[process_options]);
1500
if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1502
if (filename == NULL)
1505
fprintf(stderr, "pcregrep: Error in command-line regex "
1506
"at offset %d: %s\n", errptr, error);
1508
fprintf(stderr, "pcregrep: Error in %s command-line regex "
1509
"at offset %d: %s\n", ordin(count), errptr, error);
1513
fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1514
"at offset %d: %s\n", count, filename, errptr, error);
1522
/*************************************************
1523
* Compile one supplied pattern *
1524
*************************************************/
1526
/* When the -F option has been used, each string may be a list of strings,
1527
separated by line breaks. They will be matched literally.
1530
pattern the pattern string
1531
options the PCRE options
1532
filename the file name, or NULL for a command-line pattern
1533
count 0 if this is the only command line pattern, or
1534
number of the command line pattern, or
1535
linenumber for a pattern from a file
1537
Returns: TRUE on success, FALSE after an error
1541
compile_pattern(char *pattern, int options, char *filename, int count)
1543
if ((process_options & PO_FIXED_STRINGS) != 0)
1545
char *eop = pattern + strlen(pattern);
1546
char buffer[MBUFTHIRD];
1550
char *p = end_of_line(pattern, eop, &ellength);
1552
return compile_single_pattern(pattern, options, filename, count);
1553
sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1555
if (!compile_single_pattern(buffer, options, filename, count))
1559
else return compile_single_pattern(pattern, options, filename, count);
1564
/*************************************************
1566
*************************************************/
1568
/* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1571
main(int argc, char **argv)
1575
int pcre_options = 0;
1576
int cmd_pattern_count = 0;
1579
BOOL only_one_at_top;
1580
char *patterns[MAX_PATTERN_COUNT];
1581
const char *locale_from = "--locale";
1584
/* Set the default line ending value from the default in the PCRE library;
1585
"lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1588
(void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1591
default: newline = (char *)"lf"; break;
1592
case '\r': newline = (char *)"cr"; break;
1593
case ('\r' << 8) | '\n': newline = (char *)"crlf"; break;
1594
case -1: newline = (char *)"any"; break;
1595
case -2: newline = (char *)"anycrlf"; break;
1598
/* Process the options */
1600
for (i = 1; i < argc; i++)
1602
option_item *op = NULL;
1603
char *option_data = (char *)""; /* default to keep compiler happy */
1605
BOOL longopwasequals = FALSE;
1607
if (argv[i][0] != '-') break;
1609
/* If we hit an argument that is just "-", it may be a reference to STDIN,
1610
but only if we have previously had -e or -f to define the patterns. */
1612
if (argv[i][1] == 0)
1614
if (pattern_filename != NULL || pattern_count > 0) break;
1615
else exit(usage(2));
1618
/* Handle a long name option, or -- to terminate the options */
1620
if (argv[i][1] == '-')
1622
char *arg = argv[i] + 2;
1623
char *argequals = strchr(arg, '=');
1625
if (*arg == 0) /* -- terminates options */
1628
break; /* out of the options-handling loop */
1633
/* Some long options have data that follows after =, for example file=name.
1634
Some options have variations in the long name spelling: specifically, we
1635
allow "regexp" because GNU grep allows it, though I personally go along
1636
with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
1637
These options are entered in the table as "regex(p)". No option is in both
1638
these categories, fortunately. */
1640
for (op = optionlist; op->one_char != 0; op++)
1642
char *opbra = strchr(op->long_name, '(');
1643
char *equals = strchr(op->long_name, '=');
1644
if (opbra == NULL) /* Not a (p) case */
1646
if (equals == NULL) /* Not thing=data case */
1648
if (strcmp(arg, op->long_name) == 0) break;
1650
else /* Special case xxx=data */
1652
int oplen = equals - op->long_name;
1653
int arglen = (argequals == NULL)? (int)strlen(arg) : argequals - arg;
1654
if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
1656
option_data = arg + arglen;
1657
if (*option_data == '=')
1660
longopwasequals = TRUE;
1666
else /* Special case xxxx(p) */
1670
int baselen = opbra - op->long_name;
1671
sprintf(buff1, "%.*s", baselen, op->long_name);
1672
sprintf(buff2, "%s%.*s", buff1,
1673
(int)strlen(op->long_name) - baselen - 2, opbra + 1);
1674
if (strcmp(arg, buff1) == 0 || strcmp(arg, buff2) == 0)
1679
if (op->one_char == 0)
1681
fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
1687
/* Jeffrey Friedl's debugging harness uses these additional options which
1688
are not in the right form for putting in the option table because they use
1689
only one hyphen, yet are more than one character long. By putting them
1690
separately here, they will not get displayed as part of the help() output,
1691
but I don't think Jeffrey will care about that. */
1693
#ifdef JFRIEDL_DEBUG
1694
else if (strcmp(argv[i], "-pre") == 0) {
1695
jfriedl_prefix = argv[++i];
1697
} else if (strcmp(argv[i], "-post") == 0) {
1698
jfriedl_postfix = argv[++i];
1700
} else if (strcmp(argv[i], "-XT") == 0) {
1701
sscanf(argv[++i], "%d", &jfriedl_XT);
1703
} else if (strcmp(argv[i], "-XR") == 0) {
1704
sscanf(argv[++i], "%d", &jfriedl_XR);
1710
/* One-char options; many that have no data may be in a single argument; we
1711
continue till we hit the last one or one that needs data. */
1715
char *s = argv[i] + 1;
1719
for (op = optionlist; op->one_char != 0; op++)
1720
{ if (*s == op->one_char) break; }
1721
if (op->one_char == 0)
1723
fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
1727
if (op->type != OP_NODATA || s[1] == 0)
1732
pcre_options = handle_option(*s++, pcre_options);
1736
/* At this point we should have op pointing to a matched option. If the type
1737
is NO_DATA, it means that there is no data, and the option might set
1738
something in the PCRE options. */
1740
if (op->type == OP_NODATA)
1742
pcre_options = handle_option(op->one_char, pcre_options);
1746
/* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
1747
either has a value or defaults to something. It cannot have data in a
1748
separate item. At the moment, the only such options are "colo(u)r" and
1749
Jeffrey Friedl's special -S debugging option. */
1751
if (*option_data == 0 &&
1752
(op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
1754
switch (op->one_char)
1757
colour_option = (char *)"auto";
1759
#ifdef JFRIEDL_DEBUG
1768
/* Otherwise, find the data string for the option. */
1770
if (*option_data == 0)
1772
if (i >= argc - 1 || longopwasequals)
1774
fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
1777
option_data = argv[++i];
1780
/* If the option type is OP_PATLIST, it's the -e option, which can be called
1781
multiple times to create a list of patterns. */
1783
if (op->type == OP_PATLIST)
1785
if (cmd_pattern_count >= MAX_PATTERN_COUNT)
1787
fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
1791
patterns[cmd_pattern_count++] = option_data;
1794
/* Otherwise, deal with single string or numeric data values. */
1796
else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
1798
*((char **)op->dataptr) = option_data;
1803
int n = strtoul(option_data, &endptr, 10);
1808
char *equals = strchr(op->long_name, '=');
1809
int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1810
equals - op->long_name;
1811
fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1812
option_data, nlen, op->long_name);
1815
fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1816
option_data, op->one_char);
1819
*((int *)op->dataptr) = n;
1823
/* Options have been decoded. If -C was used, its value is used as a default
1826
if (both_context > 0)
1828
if (after_context == 0) after_context = both_context;
1829
if (before_context == 0) before_context = both_context;
1832
/* If a locale has not been provided as an option, see if the LC_CTYPE or
1833
LC_ALL environment variable is set, and if so, use it. */
1837
locale = getenv("LC_ALL");
1838
locale_from = "LCC_ALL";
1843
locale = getenv("LC_CTYPE");
1844
locale_from = "LC_CTYPE";
1847
/* If a locale has been provided, set it, and generate the tables the PCRE
1848
needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
1852
if (setlocale(LC_CTYPE, locale) == NULL)
1854
fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
1855
locale, locale_from);
1858
pcretables = pcre_maketables();
1861
/* Sort out colouring */
1863
if (colour_option != NULL && strcmp(colour_option, "never") != 0)
1865
if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
1866
else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
1869
fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
1875
char *cs = getenv("PCREGREP_COLOUR");
1876
if (cs == NULL) cs = getenv("PCREGREP_COLOR");
1877
if (cs != NULL) colour_string = cs;
1881
/* Interpret the newline type; the default settings are Unix-like. */
1883
if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
1885
pcre_options |= PCRE_NEWLINE_CR;
1886
endlinetype = EL_CR;
1888
else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
1890
pcre_options |= PCRE_NEWLINE_LF;
1891
endlinetype = EL_LF;
1893
else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
1895
pcre_options |= PCRE_NEWLINE_CRLF;
1896
endlinetype = EL_CRLF;
1898
else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
1900
pcre_options |= PCRE_NEWLINE_ANY;
1901
endlinetype = EL_ANY;
1903
else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
1905
pcre_options |= PCRE_NEWLINE_ANYCRLF;
1906
endlinetype = EL_ANYCRLF;
1910
fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
1914
/* Interpret the text values for -d and -D */
1916
if (dee_option != NULL)
1918
if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
1919
else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
1920
else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
1923
fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
1928
if (DEE_option != NULL)
1930
if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
1931
else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
1934
fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
1939
/* Check the values for Jeffrey Friedl's debugging options. */
1941
#ifdef JFRIEDL_DEBUG
1944
fprintf(stderr, "pcregrep: bad value for -S option\n");
1947
if (jfriedl_XT != 0 || jfriedl_XR != 0)
1949
if (jfriedl_XT == 0) jfriedl_XT = 1;
1950
if (jfriedl_XR == 0) jfriedl_XR = 1;
1954
/* Get memory to store the pattern and hints lists. */
1956
pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
1957
hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
1959
if (pattern_list == NULL || hints_list == NULL)
1961
fprintf(stderr, "pcregrep: malloc failed\n");
1965
/* If no patterns were provided by -e, and there is no file provided by -f,
1966
the first argument is the one and only pattern, and it must exist. */
1968
if (cmd_pattern_count == 0 && pattern_filename == NULL)
1970
if (i >= argc) return usage(2);
1971
patterns[cmd_pattern_count++] = argv[i++];
1974
/* Compile the patterns that were provided on the command line, either by
1975
multiple uses of -e or as a single unkeyed pattern. */
1977
for (j = 0; j < cmd_pattern_count; j++)
1979
if (!compile_pattern(patterns[j], pcre_options, NULL,
1980
(j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
1984
/* Compile the regular expressions that are provided in a file. */
1986
if (pattern_filename != NULL)
1991
char buffer[MBUFTHIRD];
1993
if (strcmp(pattern_filename, "-") == 0)
1996
filename = stdin_name;
2000
f = fopen(pattern_filename, "r");
2003
fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2007
filename = pattern_filename;
2010
while (fgets(buffer, MBUFTHIRD, f) != NULL)
2012
char *s = buffer + (int)strlen(buffer);
2013
while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2016
if (buffer[0] == 0) continue; /* Skip blank lines */
2017
if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2021
if (f != stdin) fclose(f);
2024
/* Study the regular expressions, as we will be running them many times */
2026
for (j = 0; j < pattern_count; j++)
2028
hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2032
if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2033
fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2039
/* If there are include or exclude patterns, compile them. */
2041
if (exclude_pattern != NULL)
2043
exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2045
if (exclude_compiled == NULL)
2047
fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2053
if (include_pattern != NULL)
2055
include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2057
if (include_compiled == NULL)
2059
fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2065
/* If there are no further arguments, do the business on stdin and exit. */
2069
rc = pcregrep(stdin, (filenames > FN_DEFAULT)? stdin_name : NULL);
2073
/* Otherwise, work through the remaining arguments as files or directories.
2074
Pass in the fact that there is only one argument at top level - this suppresses
2075
the file name if the argument is not a directory and filenames are not
2076
otherwise forced. */
2078
only_one_at_top = i == argc - 1; /* Catch initial value of i */
2080
for (; i < argc; i++)
2082
int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2084
if (frc > 1) rc = frc;
2085
else if (frc == 0 && rc == 1) rc = 0;
2089
if (pattern_list != NULL)
2091
for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2094
if (hints_list != NULL)
2096
for (i = 0; i < hint_count; i++) free(hints_list[i]);
2106
/* End of pcregrep */