1
/*************************************************
2
* PCRE testing program *
3
*************************************************/
5
/* This program was hacked up as a tester for PCRE. I really should have
6
written it more tidily in the first place. Will I ever learn? It has grown and
7
been extended and consequently is now rather, er, *very* untidy in places.
9
-----------------------------------------------------------------------------
10
Redistribution and use in source and binary forms, with or without
11
modification, are permitted provided that the following conditions are met:
13
* Redistributions of source code must retain the above copyright notice,
14
this list of conditions and the following disclaimer.
16
* Redistributions in binary form must reproduce the above copyright
17
notice, this list of conditions and the following disclaimer in the
18
documentation and/or other materials provided with the distribution.
20
* Neither the name of the University of Cambridge nor the names of its
21
contributors may be used to endorse or promote products derived from
22
this software without specific prior written permission.
24
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34
POSSIBILITY OF SUCH DAMAGE.
35
-----------------------------------------------------------------------------
52
/* A number of things vary for Windows builds. Originally, pcretest opened its
53
input and output without "b"; then I was told that "b" was needed in some
54
environments, so it was added for release 5.0 to both the input and output. (It
55
makes no difference on Unix-like systems.) Later I was told that it is wrong
56
for the input on Windows. I've now abstracted the modes into two macros that
57
are set here, to make it easier to fiddle with them, and removed "b" from the
58
input mode under Windows. */
60
#if defined(_WIN32) || defined(WIN32)
61
#include <io.h> /* For _setmode() */
62
#include <fcntl.h> /* For _O_BINARY */
63
#define INPUT_MODE "r"
64
#define OUTPUT_MODE "wb"
67
#include <sys/time.h> /* These two includes are needed */
68
#include <sys/resource.h> /* for setrlimit(). */
69
#define INPUT_MODE "rb"
70
#define OUTPUT_MODE "wb"
74
/* We have to include pcre_internal.h because we need the internal info for
75
displaying the results of pcre_study() and we also need to know about the
76
internal macros, structures, and other internal data values; pcretest has
77
"inside information" compared to a program that strictly follows the PCRE API.
79
Although pcre_internal.h does itself include pcre.h, we explicitly include it
80
here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
81
appropriately for an application, not for building PCRE. */
84
#include "pcre_internal.h"
86
/* We need access to the data tables that PCRE uses. So as not to have to keep
87
two copies, we include the source file here, changing the names of the external
88
symbols to prevent clashes. */
90
#define _pcre_utf8_table1 utf8_table1
91
#define _pcre_utf8_table1_size utf8_table1_size
92
#define _pcre_utf8_table2 utf8_table2
93
#define _pcre_utf8_table3 utf8_table3
94
#define _pcre_utf8_table4 utf8_table4
96
#define _pcre_utt_size utt_size
97
#define _pcre_utt_names utt_names
98
#define _pcre_OP_lengths OP_lengths
100
#include "pcre_tables.c"
102
/* We also need the pcre_printint() function for printing out compiled
103
patterns. This function is in a separate file so that it can be included in
104
pcre_compile.c when that module is compiled with debugging enabled.
106
The definition of the macro PRINTABLE, which determines whether to print an
107
output character as-is or as a hex value when showing compiled patterns, is
108
contained in this file. We uses it here also, in cases when the locale has not
109
been explicitly changed, so as to get consistent output from systems that
110
differ in their output from isprint() even in the "C" locale. */
112
#include "pcre_printint.src"
114
#define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
117
/* It is possible to compile this test program without including support for
118
testing the POSIX interface, though this is not available via the standard
122
#include "pcreposix.h"
125
/* It is also possible, for the benefit of the version currently imported into
126
Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
127
interface to the DFA matcher (NODFA), and without the doublecheck of the old
128
"info" function (define NOINFOCHECK). In fact, we automatically cut out the
129
UTF8 support if PCRE is built without it. */
138
/* Other parameters */
140
#ifndef CLOCKS_PER_SEC
142
#define CLOCKS_PER_SEC CLK_TCK
144
#define CLOCKS_PER_SEC 100
148
/* This is the default loop count for timing. */
150
#define LOOPREPEAT 500000
152
/* Static variables */
154
static FILE *outfile;
155
static int log_store = 0;
156
static int callout_count;
157
static int callout_extra;
158
static int callout_fail_count;
159
static int callout_fail_id;
160
static int debug_lengths;
161
static int first_callout;
162
static int locale_set = 0;
163
static int show_malloc;
165
static size_t gotten_store;
167
/* The buffers grow automatically if very long input lines are encountered. */
169
static int buffer_size = 50000;
170
static uschar *buffer = NULL;
171
static uschar *dbuffer = NULL;
172
static uschar *pbuffer = NULL;
176
/*************************************************
177
* Read or extend an input line *
178
*************************************************/
180
/* Input lines are read into buffer, but both patterns and data lines can be
181
continued over multiple input lines. In addition, if the buffer fills up, we
182
want to automatically expand it so as to be able to handle extremely large
183
lines that are needed for certain stress tests. When the input buffer is
184
expanded, the other two buffers must also be expanded likewise, and the
185
contents of pbuffer, which are a copy of the input for callouts, must be
186
preserved (for when expansion happens for a data line). This is not the most
187
optimal way of handling this, but hey, this is just a test program!
191
start where in buffer to start (this *must* be within buffer)
193
Returns: pointer to the start of new data
194
could be a copy of start, or could be moved
195
NULL if no data read and EOF reached
199
extend_inputline(FILE *f, uschar *start)
201
uschar *here = start;
205
int rlen = buffer_size - (here - buffer);
210
if (fgets((char *)here, rlen, f) == NULL)
211
return (here == start)? NULL : start;
212
dlen = (int)strlen((char *)here);
213
if (dlen > 0 && here[dlen - 1] == '\n') return start;
219
int new_buffer_size = 2*buffer_size;
220
uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
221
uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
222
uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
224
if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
226
fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
230
memcpy(new_buffer, buffer, buffer_size);
231
memcpy(new_pbuffer, pbuffer, buffer_size);
233
buffer_size = new_buffer_size;
235
start = new_buffer + (start - buffer);
236
here = new_buffer + (here - buffer);
243
dbuffer = new_dbuffer;
244
pbuffer = new_pbuffer;
248
return NULL; /* Control never gets here */
257
/*************************************************
258
* Read number from string *
259
*************************************************/
261
/* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
262
around with conditional compilation, just do the job by hand. It is only used
263
for unpicking arguments, so just keep it simple.
266
str string to be converted
267
endptr where to put the end pointer
269
Returns: the unsigned long
273
get_value(unsigned char *str, unsigned char **endptr)
276
while(*str != 0 && isspace(*str)) str++;
277
while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
285
/*************************************************
286
* Convert UTF-8 string to value *
287
*************************************************/
289
/* This function takes one or more bytes that represents a UTF-8 character,
290
and returns the value of the character.
293
utf8bytes a pointer to the byte vector
294
vptr a pointer to an int to receive the value
296
Returns: > 0 => the number of bytes consumed
297
-6 to 0 => malformed UTF-8 character at offset = (-return)
303
utf82ord(unsigned char *utf8bytes, int *vptr)
305
int c = *utf8bytes++;
309
for (i = -1; i < 6; i++) /* i is number of additional bytes */
311
if ((d & 0x80) == 0) break;
315
if (i == -1) { *vptr = c; return 1; } /* ascii character */
316
if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
318
/* i now has a value in the range 1-5 */
321
d = (c & utf8_table3[i]) << s;
323
for (j = 0; j < i; j++)
326
if ((c & 0xc0) != 0x80) return -(j+1);
328
d |= (c & 0x3f) << s;
331
/* Check that encoding was the correct unique one */
333
for (j = 0; j < utf8_table1_size; j++)
334
if (d <= utf8_table1[j]) break;
335
if (j != i) return -(i+1);
347
/*************************************************
348
* Convert character value to UTF-8 *
349
*************************************************/
351
/* This function takes an integer value in the range 0 - 0x7fffffff
352
and encodes it as a UTF-8 character in 0 to 6 bytes.
355
cvalue the character value
356
utf8bytes pointer to buffer for result - at least 6 bytes long
358
Returns: number of characters placed in the buffer
364
ord2utf8(int cvalue, uschar *utf8bytes)
367
for (i = 0; i < utf8_table1_size; i++)
368
if (cvalue <= utf8_table1[i]) break;
370
for (j = i; j > 0; j--)
372
*utf8bytes-- = 0x80 | (cvalue & 0x3f);
375
*utf8bytes = utf8_table2[i] | cvalue;
383
/*************************************************
384
* Print character string *
385
*************************************************/
387
/* Character string printing function. Must handle UTF-8 strings in utf8
388
mode. Yields number of characters printed. If handed a NULL file, just counts
389
chars without printing. */
391
static int pchars(unsigned char *p, int length, FILE *f)
401
int rc = utf82ord(p, &c);
403
if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
409
if (f != NULL) fprintf(f, "%c", c);
415
if (f != NULL) fprintf(f, "\\x{%02x}", c);
416
yield += (n <= 0x000000ff)? 2 :
417
(n <= 0x00000fff)? 3 :
418
(n <= 0x0000ffff)? 4 :
419
(n <= 0x000fffff)? 5 : 6;
426
/* Not UTF-8, or malformed UTF-8 */
431
if (f != NULL) fprintf(f, "%c", c);
436
if (f != NULL) fprintf(f, "\\x%02x", c);
446
/*************************************************
448
*************************************************/
450
/* Called from PCRE as a result of the (?C) item. We print out where we are in
451
the match. Yield zero unless more callouts than the fail count, or the callout
454
static int callout(pcre_callout_block *cb)
456
FILE *f = (first_callout | callout_extra)? outfile : NULL;
457
int i, pre_start, post_start, subject_length;
461
fprintf(f, "Callout %d: last capture = %d\n",
462
cb->callout_number, cb->capture_last);
464
for (i = 0; i < cb->capture_top * 2; i += 2)
466
if (cb->offset_vector[i] < 0)
467
fprintf(f, "%2d: <unset>\n", i/2);
470
fprintf(f, "%2d: ", i/2);
471
(void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
472
cb->offset_vector[i+1] - cb->offset_vector[i], f);
478
/* Re-print the subject in canonical form, the first time or if giving full
479
datails. On subsequent calls in the same match, we use pchars just to find the
480
printed lengths of the substrings. */
482
if (f != NULL) fprintf(f, "--->");
484
pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
485
post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
486
cb->current_position - cb->start_match, f);
488
subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
490
(void)pchars((unsigned char *)(cb->subject + cb->current_position),
491
cb->subject_length - cb->current_position, f);
493
if (f != NULL) fprintf(f, "\n");
495
/* Always print appropriate indicators, with callout number if not already
496
shown. For automatic callouts, show the pattern offset. */
498
if (cb->callout_number == 255)
500
fprintf(outfile, "%+3d ", cb->pattern_position);
501
if (cb->pattern_position > 99) fprintf(outfile, "\n ");
505
if (callout_extra) fprintf(outfile, " ");
506
else fprintf(outfile, "%3d ", cb->callout_number);
509
for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
510
fprintf(outfile, "^");
514
for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
515
fprintf(outfile, "^");
518
for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
519
fprintf(outfile, " ");
521
fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
522
pbuffer + cb->pattern_position);
524
fprintf(outfile, "\n");
527
if (cb->callout_data != NULL)
529
int callout_data = *((int *)(cb->callout_data));
530
if (callout_data != 0)
532
fprintf(outfile, "Callout data = %d\n", callout_data);
537
return (cb->callout_number != callout_fail_id)? 0 :
538
(++callout_count >= callout_fail_count)? 1 : 0;
542
/*************************************************
543
* Local malloc functions *
544
*************************************************/
546
/* Alternative malloc function, to test functionality and show the size of the
549
static void *new_malloc(size_t size)
551
void *block = malloc(size);
554
fprintf(outfile, "malloc %3d %p\n", (int)size, block);
558
static void new_free(void *block)
561
fprintf(outfile, "free %p\n", block);
566
/* For recursion malloc/free, to test stacking calls */
568
static void *stack_malloc(size_t size)
570
void *block = malloc(size);
572
fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
576
static void stack_free(void *block)
579
fprintf(outfile, "stack_free %p\n", block);
584
/*************************************************
585
* Call pcre_fullinfo() *
586
*************************************************/
588
/* Get one piece of information from the pcre_fullinfo() function */
590
static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
593
if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
594
fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
599
/*************************************************
600
* Byte flipping function *
601
*************************************************/
603
static unsigned long int
604
byteflip(unsigned long int value, int n)
606
if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
607
return ((value & 0x000000ff) << 24) |
608
((value & 0x0000ff00) << 8) |
609
((value & 0x00ff0000) >> 8) |
610
((value & 0xff000000) >> 24);
616
/*************************************************
617
* Check match or recursion limit *
618
*************************************************/
621
check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
622
int start_offset, int options, int *use_offsets, int use_size_offsets,
623
int flag, unsigned long int *limit, int errnumber, const char *msg)
630
extra->flags |= flag;
636
count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
637
use_offsets, use_size_offsets);
639
if (count == errnumber)
641
/* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
643
mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
646
else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
647
count == PCRE_ERROR_PARTIAL)
651
fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
654
/* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
658
else break; /* Some other error */
661
extra->flags &= ~flag;
667
/*************************************************
668
* Case-independent strncmp() function *
669
*************************************************/
675
n number of characters to compare
677
Returns: < 0, = 0, or > 0, according to the comparison
681
strncmpic(uschar *s, uschar *t, int n)
685
int c = tolower(*s++) - tolower(*t++);
693
/*************************************************
694
* Check newline indicator *
695
*************************************************/
697
/* This is used both at compile and run-time to check for <xxx> escapes, where
698
xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
702
p points after the leading '<'
703
f file for error message
705
Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
709
check_newline(uschar *p, FILE *f)
711
if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
712
if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
713
if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
714
if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
715
if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
716
if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
717
if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
718
fprintf(f, "Unknown newline type at: <%s\n", p);
724
/*************************************************
726
*************************************************/
731
printf("Usage: pcretest [options] [<input> [<output>]]\n");
732
printf(" -b show compiled code (bytecode)\n");
733
printf(" -C show PCRE compile-time options and exit\n");
734
printf(" -d debug: show compiled code and information (-b and -i)\n");
736
printf(" -dfa force DFA matching for all subjects\n");
738
printf(" -help show usage information\n");
739
printf(" -i show information about compiled patterns\n"
740
" -m output memory used information\n"
741
" -o <n> set size of offsets vector to <n>\n");
743
printf(" -p use POSIX interface\n");
745
printf(" -q quiet: do not output PCRE version number at start\n");
746
printf(" -S <n> set stack size to <n> megabytes\n");
747
printf(" -s output store (memory) used information\n"
748
" -t time compilation and execution\n");
749
printf(" -t <n> time compilation and execution, repeating <n> times\n");
750
printf(" -tm time execution (matching) only\n");
751
printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
756
/*************************************************
758
*************************************************/
760
/* Read lines from named file or stdin and write to named file or stdout; lines
761
consist of a regular expression, in delimiters and optionally followed by
762
options, followed by a set of test data, terminated by an empty line. */
764
int main(int argc, char **argv)
766
FILE *infile = stdin;
768
int study_options = 0;
775
int size_offsets = 45;
776
int size_offsets_max;
787
/* These vectors store, end-to-end, a list of captured substring names. Assume
788
that 1024 is plenty long enough for the few names we'll be testing. */
790
uschar copynames[1024];
791
uschar getnames[1024];
793
uschar *copynamesptr;
796
/* Get buffers from malloc() so that Electric Fence will check their misuse
797
when I am debugging. They grow automatically when very long lines are read. */
799
buffer = (unsigned char *)malloc(buffer_size);
800
dbuffer = (unsigned char *)malloc(buffer_size);
801
pbuffer = (unsigned char *)malloc(buffer_size);
803
/* The outfile variable is static so that new_malloc can use it. */
807
/* The following _setmode() stuff is some Windows magic that tells its runtime
808
library to translate CRLF into a single LF character. At least, that's what
809
I've been told: never having used Windows I take this all on trust. Originally
810
it set 0x8000, but then I was advised that _O_BINARY was better. */
812
#if defined(_WIN32) || defined(WIN32)
813
_setmode( _fileno( stdout ), _O_BINARY );
818
while (argc > 1 && argv[op][0] == '-')
820
unsigned char *endptr;
822
if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
824
else if (strcmp(argv[op], "-q") == 0) quiet = 1;
825
else if (strcmp(argv[op], "-b") == 0) debug = 1;
826
else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
827
else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
829
else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
831
else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
832
((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
838
else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
840
int both = argv[op][2] == 0;
842
if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
849
else timeitm = LOOPREPEAT;
850
if (both) timeit = timeitm;
852
else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
853
((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
856
#if defined(_WIN32) || defined(WIN32)
857
printf("PCRE: -S not supported on this OS\n");
862
getrlimit(RLIMIT_STACK, &rlim);
863
rlim.rlim_cur = stack_size * 1024 * 1024;
864
rc = setrlimit(RLIMIT_STACK, &rlim);
867
printf("PCRE: setrlimit() failed with error %d\n", rc);
875
else if (strcmp(argv[op], "-p") == 0) posix = 1;
877
else if (strcmp(argv[op], "-C") == 0)
880
printf("PCRE version %s\n", pcre_version());
881
printf("Compiled with\n");
882
(void)pcre_config(PCRE_CONFIG_UTF8, &rc);
883
printf(" %sUTF-8 support\n", rc? "" : "No ");
884
(void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
885
printf(" %sUnicode properties support\n", rc? "" : "No ");
886
(void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
887
printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
888
(rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
889
(rc == -2)? "ANYCRLF" :
890
(rc == -1)? "ANY" : "???");
891
(void)pcre_config(PCRE_CONFIG_BSR, &rc);
892
printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
893
"all Unicode newlines");
894
(void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
895
printf(" Internal link size = %d\n", rc);
896
(void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
897
printf(" POSIX malloc threshold = %d\n", rc);
898
(void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
899
printf(" Default match limit = %d\n", rc);
900
(void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
901
printf(" Default recursion depth limit = %d\n", rc);
902
(void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
903
printf(" Match recursion uses %s\n", rc? "stack" : "heap");
906
else if (strcmp(argv[op], "-help") == 0 ||
907
strcmp(argv[op], "--help") == 0)
914
printf("** Unknown or malformed option %s\n", argv[op]);
923
/* Get the store for the offsets vector, and remember what it was */
925
size_offsets_max = size_offsets;
926
offsets = (int *)malloc(size_offsets_max * sizeof(int));
929
printf("** Failed to get %d bytes of memory for offsets vector\n",
930
(int)(size_offsets_max * sizeof(int)));
935
/* Sort out the input and output files */
939
infile = fopen(argv[op], INPUT_MODE);
942
printf("** Failed to open %s\n", argv[op]);
950
outfile = fopen(argv[op+1], OUTPUT_MODE);
953
printf("** Failed to open %s\n", argv[op+1]);
959
/* Set alternative malloc function */
961
pcre_malloc = new_malloc;
962
pcre_free = new_free;
963
pcre_stack_malloc = stack_malloc;
964
pcre_stack_free = stack_free;
966
/* Heading line unless quiet, then prompt for first regex if stdin */
968
if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
975
pcre_extra *extra = NULL;
977
#if !defined NOPOSIX /* There are still compilers that require no indent */
983
unsigned char *p, *pp, *ppp;
984
unsigned char *to_file = NULL;
985
const unsigned char *tables = NULL;
986
unsigned long int true_size, true_study_size = 0;
987
size_t size, regex_gotten_store;
989
int do_debug = debug;
992
int do_showinfo = showinfo;
995
int erroroffset, len, delimiter, poffset;
1000
if (infile == stdin) printf(" re> ");
1001
if (extend_inputline(infile, buffer) == NULL) break;
1002
if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1006
while (isspace(*p)) p++;
1007
if (*p == 0) continue;
1009
/* See if the pattern is to be loaded pre-compiled from a file. */
1011
if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1013
unsigned long int magic, get_options;
1018
pp = p + (int)strlen((char *)p);
1019
while (isspace(pp[-1])) pp--;
1022
f = fopen((char *)p, "rb");
1025
fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1029
if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1032
(sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1034
(sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1036
re = (real_pcre *)new_malloc(true_size);
1037
regex_gotten_store = gotten_store;
1039
if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1041
magic = ((real_pcre *)re)->magic_number;
1042
if (magic != MAGIC_NUMBER)
1044
if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1050
fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1056
fprintf(outfile, "Compiled regex%s loaded from %s\n",
1057
do_flip? " (byte-inverted)" : "", p);
1059
/* Need to know if UTF-8 for printing data strings */
1061
new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1062
use_utf8 = (get_options & PCRE_UTF8) != 0;
1064
/* Now see if there is any following study data */
1066
if (true_study_size != 0)
1068
pcre_study_data *psd;
1070
extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1071
extra->flags = PCRE_EXTRA_STUDY_DATA;
1073
psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1074
extra->study_data = psd;
1076
if (fread(psd, 1, true_study_size, f) != true_study_size)
1079
fprintf(outfile, "Failed to read data from %s\n", p);
1080
if (extra != NULL) new_free(extra);
1081
if (re != NULL) new_free(re);
1085
fprintf(outfile, "Study data loaded from %s\n", p);
1086
do_study = 1; /* To get the data output if requested */
1088
else fprintf(outfile, "No study data\n");
1094
/* In-line pattern (the usual case). Get the delimiter and seek the end of
1095
the pattern; if is isn't complete, read more. */
1099
if (isalnum(delimiter) || delimiter == '\\')
1101
fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1106
poffset = p - buffer;
1112
if (*pp == '\\' && pp[1] != 0) pp++;
1113
else if (*pp == delimiter) break;
1116
if (*pp != 0) break;
1117
if (infile == stdin) printf(" > ");
1118
if ((pp = extend_inputline(infile, pp)) == NULL)
1120
fprintf(outfile, "** Unexpected EOF\n");
1124
if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1127
/* The buffer may have moved while being extended; reset the start of data
1128
pointer to the correct relative point in the buffer. */
1130
p = buffer + poffset;
1132
/* If the first character after the delimiter is backslash, make
1133
the pattern end with backslash. This is purely to provide a way
1134
of testing for the error message when a pattern ends with backslash. */
1136
if (pp[1] == '\\') *pp++ = '\\';
1138
/* Terminate the pattern at the delimiter, and save a copy of the pattern
1142
strcpy((char *)pbuffer, (char *)p);
1144
/* Look for options after final delimiter */
1148
log_store = showstore; /* default from command line */
1154
case 'f': options |= PCRE_FIRSTLINE; break;
1155
case 'g': do_g = 1; break;
1156
case 'i': options |= PCRE_CASELESS; break;
1157
case 'm': options |= PCRE_MULTILINE; break;
1158
case 's': options |= PCRE_DOTALL; break;
1159
case 'x': options |= PCRE_EXTENDED; break;
1161
case '+': do_showrest = 1; break;
1162
case 'A': options |= PCRE_ANCHORED; break;
1163
case 'B': do_debug = 1; break;
1164
case 'C': options |= PCRE_AUTO_CALLOUT; break;
1165
case 'D': do_debug = do_showinfo = 1; break;
1166
case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1167
case 'F': do_flip = 1; break;
1168
case 'G': do_G = 1; break;
1169
case 'I': do_showinfo = 1; break;
1170
case 'J': options |= PCRE_DUPNAMES; break;
1171
case 'M': log_store = 1; break;
1172
case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1174
#if !defined NOPOSIX
1175
case 'P': do_posix = 1; break;
1178
case 'S': do_study = 1; break;
1179
case 'U': options |= PCRE_UNGREEDY; break;
1180
case 'X': options |= PCRE_EXTRA; break;
1181
case 'Z': debug_lengths = 0; break;
1182
case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1183
case '?': options |= PCRE_NO_UTF8_CHECK; break;
1187
/* The '\r' test here is so that it works on Windows. */
1188
/* The '0' test is just in case this is an unterminated line. */
1189
while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1191
if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1193
fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1197
tables = pcre_maketables();
1203
while (*pp != 0) pp++;
1204
while (isspace(pp[-1])) pp--;
1210
int x = check_newline(pp, outfile);
1211
if (x == 0) goto SKIP_DATA;
1213
while (*pp++ != '>');
1217
case '\r': /* So that it works in Windows */
1223
fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1228
/* Handle compiling via the POSIX interface, which doesn't support the
1229
timing, showing, or debugging options, nor the ability to pass over
1230
local character tables. */
1232
#if !defined NOPOSIX
1233
if (posix || do_posix)
1238
if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1239
if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1240
if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1241
if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1242
if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1244
rc = regcomp(&preg, (char *)p, cflags);
1246
/* Compilation failed; go back for another re, skipping to blank line
1247
if non-interactive. */
1251
(void)regerror(rc, &preg, (char *)buffer, buffer_size);
1252
fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1257
/* Handle compiling via the native interface */
1260
#endif /* !defined NOPOSIX */
1267
clock_t start_time = clock();
1268
for (i = 0; i < timeit; i++)
1270
re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1271
if (re != NULL) free(re);
1273
time_taken = clock() - start_time;
1274
fprintf(outfile, "Compile time %.4f milliseconds\n",
1275
(((double)time_taken * 1000.0) / (double)timeit) /
1276
(double)CLOCKS_PER_SEC);
1279
re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1281
/* Compilation failed; go back for another re, skipping to blank line
1282
if non-interactive. */
1286
fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1288
if (infile != stdin)
1292
if (extend_inputline(infile, buffer) == NULL)
1297
len = (int)strlen((char *)buffer);
1298
while (len > 0 && isspace(buffer[len-1])) len--;
1299
if (len == 0) break;
1301
fprintf(outfile, "\n");
1306
/* Compilation succeeded; print data if required. There are now two
1307
info-returning functions. The old one has a limited interface and
1308
returns only limited data. Check that it agrees with the newer one. */
1311
fprintf(outfile, "Memory allocation (code space): %d\n",
1312
(int)(gotten_store -
1314
((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1316
/* Extract the size for possible writing before possibly flipping it,
1317
and remember the store that was got. */
1319
true_size = ((real_pcre *)re)->size;
1320
regex_gotten_store = gotten_store;
1322
/* If /S was present, study the regexp to generate additional info to
1323
help with the matching. */
1331
clock_t start_time = clock();
1332
for (i = 0; i < timeit; i++)
1333
extra = pcre_study(re, study_options, &error);
1334
time_taken = clock() - start_time;
1335
if (extra != NULL) free(extra);
1336
fprintf(outfile, " Study time %.4f milliseconds\n",
1337
(((double)time_taken * 1000.0) / (double)timeit) /
1338
(double)CLOCKS_PER_SEC);
1340
extra = pcre_study(re, study_options, &error);
1342
fprintf(outfile, "Failed to study: %s\n", error);
1343
else if (extra != NULL)
1344
true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1347
/* If the 'F' option was present, we flip the bytes of all the integer
1348
fields in the regex data block and the study block. This is to make it
1349
possible to test PCRE's handling of byte-flipped patterns, e.g. those
1350
compiled on a different architecture. */
1354
real_pcre *rre = (real_pcre *)re;
1356
byteflip(rre->magic_number, sizeof(rre->magic_number));
1357
rre->size = byteflip(rre->size, sizeof(rre->size));
1358
rre->options = byteflip(rre->options, sizeof(rre->options));
1359
rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1361
(pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1363
(pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1365
(pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1367
(pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1368
rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1369
sizeof(rre->name_table_offset));
1370
rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1371
sizeof(rre->name_entry_size));
1372
rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1373
sizeof(rre->name_count));
1377
pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1378
rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1379
rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1383
/* Extract information from the compiled data if required */
1389
fprintf(outfile, "------------------------------------------------------------------\n");
1390
pcre_printint(re, outfile, debug_lengths);
1395
unsigned long int get_options, all_options;
1396
#if !defined NOINFOCHECK
1397
int old_first_char, old_options, old_count;
1399
int count, backrefmax, first_char, need_char, okpartial, jchanged,
1401
int nameentrysize, namecount;
1402
const uschar *nametable;
1404
new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1405
new_info(re, NULL, PCRE_INFO_SIZE, &size);
1406
new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1407
new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1408
new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1409
new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1410
new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1411
new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1412
new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1413
new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1414
new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1415
new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1417
#if !defined NOINFOCHECK
1418
old_count = pcre_info(re, &old_options, &old_first_char);
1419
if (count < 0) fprintf(outfile,
1420
"Error %d from pcre_info()\n", count);
1423
if (old_count != count) fprintf(outfile,
1424
"Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1427
if (old_first_char != first_char) fprintf(outfile,
1428
"First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1429
first_char, old_first_char);
1431
if (old_options != (int)get_options) fprintf(outfile,
1432
"Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1433
get_options, old_options);
1437
if (size != regex_gotten_store) fprintf(outfile,
1438
"Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1439
(int)size, (int)regex_gotten_store);
1441
fprintf(outfile, "Capturing subpattern count = %d\n", count);
1443
fprintf(outfile, "Max back reference = %d\n", backrefmax);
1447
fprintf(outfile, "Named capturing subpatterns:\n");
1448
while (namecount-- > 0)
1450
fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1451
nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1452
GET2(nametable, 0));
1453
nametable += nameentrysize;
1457
if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1458
if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1460
all_options = ((real_pcre *)re)->options;
1461
if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1463
if (get_options == 0) fprintf(outfile, "No options\n");
1464
else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1465
((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1466
((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1467
((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1468
((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1469
((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1470
((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1471
((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1472
((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1473
((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1474
((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1475
((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1476
((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1477
((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1478
((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1479
((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1481
if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1483
switch (get_options & PCRE_NEWLINE_BITS)
1485
case PCRE_NEWLINE_CR:
1486
fprintf(outfile, "Forced newline sequence: CR\n");
1489
case PCRE_NEWLINE_LF:
1490
fprintf(outfile, "Forced newline sequence: LF\n");
1493
case PCRE_NEWLINE_CRLF:
1494
fprintf(outfile, "Forced newline sequence: CRLF\n");
1497
case PCRE_NEWLINE_ANYCRLF:
1498
fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1501
case PCRE_NEWLINE_ANY:
1502
fprintf(outfile, "Forced newline sequence: ANY\n");
1509
if (first_char == -1)
1511
fprintf(outfile, "First char at start or follows newline\n");
1513
else if (first_char < 0)
1515
fprintf(outfile, "No first char\n");
1519
int ch = first_char & 255;
1520
const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1523
fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1525
fprintf(outfile, "First char = %d%s\n", ch, caseless);
1530
fprintf(outfile, "No need char\n");
1534
int ch = need_char & 255;
1535
const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1538
fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1540
fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1543
/* Don't output study size; at present it is in any case a fixed
1544
value, but it varies, depending on the computer architecture, and
1545
so messes up the test suite. (And with the /F option, it might be
1551
fprintf(outfile, "Study returned NULL\n");
1554
uschar *start_bits = NULL;
1555
new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1557
if (start_bits == NULL)
1558
fprintf(outfile, "No starting byte set\n");
1563
fprintf(outfile, "Starting byte set: ");
1564
for (i = 0; i < 256; i++)
1566
if ((start_bits[i/8] & (1<<(i&7))) != 0)
1570
fprintf(outfile, "\n ");
1573
if (PRINTHEX(i) && i != ' ')
1575
fprintf(outfile, "%c ", i);
1580
fprintf(outfile, "\\x%02x ", i);
1585
fprintf(outfile, "\n");
1591
/* If the '>' option was present, we write out the regex to a file, and
1592
that is all. The first 8 bytes of the file are the regex length and then
1593
the study length, in big-endian order. */
1595
if (to_file != NULL)
1597
FILE *f = fopen((char *)to_file, "wb");
1600
fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1605
sbuf[0] = (uschar)((true_size >> 24) & 255);
1606
sbuf[1] = (uschar)((true_size >> 16) & 255);
1607
sbuf[2] = (uschar)((true_size >> 8) & 255);
1608
sbuf[3] = (uschar)((true_size) & 255);
1610
sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1611
sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1612
sbuf[6] = (uschar)((true_study_size >> 8) & 255);
1613
sbuf[7] = (uschar)((true_study_size) & 255);
1615
if (fwrite(sbuf, 1, 8, f) < 8 ||
1616
fwrite(re, 1, true_size, f) < true_size)
1618
fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1622
fprintf(outfile, "Compiled regex written to %s\n", to_file);
1625
if (fwrite(extra->study_data, 1, true_study_size, f) <
1628
fprintf(outfile, "Write error on %s: %s\n", to_file,
1631
else fprintf(outfile, "Study data written to %s\n", to_file);
1639
if (extra != NULL) new_free(extra);
1640
if (tables != NULL) new_free((void *)tables);
1641
continue; /* With next regex */
1643
} /* End of non-POSIX compile */
1645
/* Read data lines and test them */
1651
int *use_offsets = offsets;
1652
int use_size_offsets = size_offsets;
1653
int callout_data = 0;
1654
int callout_data_set = 0;
1656
int copystrings = 0;
1657
int find_match_limit = 0;
1661
int start_offset = 0;
1670
copynamesptr = copynames;
1671
getnamesptr = getnames;
1673
pcre_callout = callout;
1677
callout_fail_count = 999999;
1678
callout_fail_id = -1;
1681
if (extra != NULL) extra->flags &=
1682
~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1687
if (infile == stdin) printf("data> ");
1688
if (extend_inputline(infile, buffer + len) == NULL)
1694
if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1695
len = (int)strlen((char *)buffer);
1696
if (buffer[len-1] == '\n') break;
1699
while (len > 0 && isspace(buffer[len-1])) len--;
1701
if (len == 0) break;
1704
while (isspace(*p)) p++;
1707
while ((c = *p++) != 0)
1712
if (c == '\\') switch ((c = *p++))
1714
case 'a': c = 7; break;
1715
case 'b': c = '\b'; break;
1716
case 'e': c = 27; break;
1717
case 'f': c = '\f'; break;
1718
case 'n': c = '\n'; break;
1719
case 'r': c = '\r'; break;
1720
case 't': c = '\t'; break;
1721
case 'v': c = '\v'; break;
1723
case '0': case '1': case '2': case '3':
1724
case '4': case '5': case '6': case '7':
1726
while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1727
c = c * 8 + *p++ - '0';
1730
if (use_utf8 && c > 255)
1732
unsigned char buff8[8];
1734
utn = ord2utf8(c, buff8);
1735
for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1736
c = buff8[ii]; /* Last byte */
1743
/* Handle \x{..} specially - new Perl thing for utf8 */
1748
unsigned char *pt = p;
1750
while (isxdigit(*(++pt)))
1751
c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1754
unsigned char buff8[8];
1756
utn = ord2utf8(c, buff8);
1757
for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1758
c = buff8[ii]; /* Last byte */
1762
/* Not correct form; fall through */
1769
while (i++ < 2 && isxdigit(*p))
1771
c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1776
case 0: /* \ followed by EOF allows for an empty line */
1781
while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1784
case 'A': /* Option setting */
1785
options |= PCRE_ANCHORED;
1789
options |= PCRE_NOTBOL;
1793
if (isdigit(*p)) /* Set copy string */
1795
while(isdigit(*p)) n = n * 10 + *p++ - '0';
1796
copystrings |= 1 << n;
1798
else if (isalnum(*p))
1800
uschar *npp = copynamesptr;
1801
while (isalnum(*p)) *npp++ = *p++;
1804
n = pcre_get_stringnumber(re, (char *)copynamesptr);
1806
fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1816
pcre_callout = NULL;
1821
callout_fail_id = 0;
1824
callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1825
callout_fail_count = 0;
1830
callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1837
if (*(++p) == '-') { sign = -1; p++; }
1839
callout_data = callout_data * 10 + *p++ - '0';
1840
callout_data *= sign;
1841
callout_data_set = 1;
1847
#if !defined NOPOSIX
1848
if (posix || do_posix)
1849
printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1856
options |= PCRE_DFA_SHORTEST;
1863
while(isdigit(*p)) n = n * 10 + *p++ - '0';
1864
getstrings |= 1 << n;
1866
else if (isalnum(*p))
1868
uschar *npp = getnamesptr;
1869
while (isalnum(*p)) *npp++ = *p++;
1872
n = pcre_get_stringnumber(re, (char *)getnamesptr);
1874
fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1884
find_match_limit = 1;
1888
options |= PCRE_NOTEMPTY;
1892
while(isdigit(*p)) n = n * 10 + *p++ - '0';
1893
if (n > size_offsets_max)
1895
size_offsets_max = n;
1897
use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1898
if (offsets == NULL)
1900
printf("** Failed to get %d bytes of memory for offsets vector\n",
1901
(int)(size_offsets_max * sizeof(int)));
1906
use_size_offsets = n;
1907
if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1911
options |= PCRE_PARTIAL;
1915
while(isdigit(*p)) n = n * 10 + *p++ - '0';
1918
extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1921
extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1922
extra->match_limit_recursion = n;
1926
while(isdigit(*p)) n = n * 10 + *p++ - '0';
1929
extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1932
extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1933
extra->match_limit = n;
1938
options |= PCRE_DFA_RESTART;
1947
options |= PCRE_NOTEOL;
1951
options |= PCRE_NO_UTF8_CHECK;
1956
int x = check_newline(p, outfile);
1957
if (x == 0) goto NEXT_DATA;
1959
while (*p++ != '>');
1968
if ((all_use_dfa || use_dfa) && find_match_limit)
1970
printf("**Match limit not relevant for DFA matching: ignored\n");
1971
find_match_limit = 0;
1974
/* Handle matching via the POSIX interface, which does not
1975
support timing or playing with the match limit or callout data. */
1977
#if !defined NOPOSIX
1978
if (posix || do_posix)
1982
regmatch_t *pmatch = NULL;
1983
if (use_size_offsets > 0)
1984
pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1985
if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1986
if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1988
rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1992
(void)regerror(rc, &preg, (char *)buffer, buffer_size);
1993
fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1995
else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1998
fprintf(outfile, "Matched with REG_NOSUB\n");
2003
for (i = 0; i < (size_t)use_size_offsets; i++)
2005
if (pmatch[i].rm_so >= 0)
2007
fprintf(outfile, "%2d: ", (int)i);
2008
(void)pchars(dbuffer + pmatch[i].rm_so,
2009
pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2010
fprintf(outfile, "\n");
2011
if (i == 0 && do_showrest)
2013
fprintf(outfile, " 0+ ");
2014
(void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2016
fprintf(outfile, "\n");
2024
/* Handle matching via the native interface - repeats for /g and /G */
2027
#endif /* !defined NOPOSIX */
2029
for (;; gmatched++) /* Loop for /g or /G */
2035
clock_t start_time = clock();
2038
if (all_use_dfa || use_dfa)
2040
int workspace[1000];
2041
for (i = 0; i < timeitm; i++)
2042
count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2043
options | g_notempty, use_offsets, use_size_offsets, workspace,
2044
sizeof(workspace)/sizeof(int));
2049
for (i = 0; i < timeitm; i++)
2050
count = pcre_exec(re, extra, (char *)bptr, len,
2051
start_offset, options | g_notempty, use_offsets, use_size_offsets);
2053
time_taken = clock() - start_time;
2054
fprintf(outfile, "Execute time %.4f milliseconds\n",
2055
(((double)time_taken * 1000.0) / (double)timeitm) /
2056
(double)CLOCKS_PER_SEC);
2059
/* If find_match_limit is set, we want to do repeated matches with
2060
varying limits in order to find the minimum value for the match limit and
2061
for the recursion limit. */
2063
if (find_match_limit)
2067
extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2071
(void)check_match_limit(re, extra, bptr, len, start_offset,
2072
options|g_notempty, use_offsets, use_size_offsets,
2073
PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2074
PCRE_ERROR_MATCHLIMIT, "match()");
2076
count = check_match_limit(re, extra, bptr, len, start_offset,
2077
options|g_notempty, use_offsets, use_size_offsets,
2078
PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2079
PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2082
/* If callout_data is set, use the interface with additional data */
2084
else if (callout_data_set)
2088
extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2091
extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2092
extra->callout_data = &callout_data;
2093
count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2094
options | g_notempty, use_offsets, use_size_offsets);
2095
extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2098
/* The normal case is just to do the match once, with the default
2099
value of match_limit. */
2102
else if (all_use_dfa || use_dfa)
2104
int workspace[1000];
2105
count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2106
options | g_notempty, use_offsets, use_size_offsets, workspace,
2107
sizeof(workspace)/sizeof(int));
2110
fprintf(outfile, "Matched, but too many subsidiary matches\n");
2111
count = use_size_offsets/2;
2118
count = pcre_exec(re, extra, (char *)bptr, len,
2119
start_offset, options | g_notempty, use_offsets, use_size_offsets);
2122
fprintf(outfile, "Matched, but too many substrings\n");
2123
count = use_size_offsets/3;
2134
if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2136
maxcount = use_size_offsets/3;
2138
/* This is a check against a lunatic return value. */
2140
if (count > maxcount)
2143
"** PCRE error: returned count %d is too big for offset size %d\n",
2144
count, use_size_offsets);
2145
count = use_size_offsets/3;
2148
fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2149
do_g = do_G = FALSE; /* Break g/G loop */
2153
for (i = 0; i < count * 2; i += 2)
2155
if (use_offsets[i] < 0)
2156
fprintf(outfile, "%2d: <unset>\n", i/2);
2159
fprintf(outfile, "%2d: ", i/2);
2160
(void)pchars(bptr + use_offsets[i],
2161
use_offsets[i+1] - use_offsets[i], outfile);
2162
fprintf(outfile, "\n");
2167
fprintf(outfile, " 0+ ");
2168
(void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2170
fprintf(outfile, "\n");
2176
for (i = 0; i < 32; i++)
2178
if ((copystrings & (1 << i)) != 0)
2180
char copybuffer[256];
2181
int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2182
i, copybuffer, sizeof(copybuffer));
2184
fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2186
fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2190
for (copynamesptr = copynames;
2192
copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2194
char copybuffer[256];
2195
int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2196
count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2198
fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2200
fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2203
for (i = 0; i < 32; i++)
2205
if ((getstrings & (1 << i)) != 0)
2207
const char *substring;
2208
int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2211
fprintf(outfile, "get substring %d failed %d\n", i, rc);
2214
fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2215
pcre_free_substring(substring);
2220
for (getnamesptr = getnames;
2222
getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2224
const char *substring;
2225
int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2226
count, (char *)getnamesptr, &substring);
2228
fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2231
fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2232
pcre_free_substring(substring);
2238
const char **stringlist;
2239
int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2242
fprintf(outfile, "get substring list failed %d\n", rc);
2245
for (i = 0; i < count; i++)
2246
fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2247
if (stringlist[i] != NULL)
2248
fprintf(outfile, "string list not terminated by NULL\n");
2249
/* free((void *)stringlist); */
2250
pcre_free_substring_list(stringlist);
2255
/* There was a partial match */
2257
else if (count == PCRE_ERROR_PARTIAL)
2259
fprintf(outfile, "Partial match");
2261
if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2262
fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2263
bptr + use_offsets[0]);
2265
fprintf(outfile, "\n");
2266
break; /* Out of the /g loop */
2269
/* Failed to match. If this is a /g or /G loop and we previously set
2270
g_notempty after a null match, this is not necessarily the end. We want
2271
to advance the start offset, and continue. We won't be at the end of the
2272
string - that was checked before setting g_notempty.
2274
Complication arises in the case when the newline option is "any" or
2275
"anycrlf". If the previous match was at the end of a line terminated by
2276
CRLF, an advance of one character just passes the \r, whereas we should
2277
prefer the longer newline sequence, as does the code in pcre_exec().
2278
Fudge the offset value to achieve this.
2280
Otherwise, in the case of UTF-8 matching, the advance must be one
2281
character, not one byte. */
2285
if (g_notempty != 0)
2288
unsigned int obits = ((real_pcre *)re)->options;
2289
use_offsets[0] = start_offset;
2290
if ((obits & PCRE_NEWLINE_BITS) == 0)
2293
(void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2294
obits = (d == '\r')? PCRE_NEWLINE_CR :
2295
(d == '\n')? PCRE_NEWLINE_LF :
2296
(d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2297
(d == -2)? PCRE_NEWLINE_ANYCRLF :
2298
(d == -1)? PCRE_NEWLINE_ANY : 0;
2300
if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2301
(obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2303
start_offset < len - 1 &&
2304
bptr[start_offset] == '\r' &&
2305
bptr[start_offset+1] == '\n')
2309
while (start_offset + onechar < len)
2311
int tb = bptr[start_offset+onechar];
2312
if (tb <= 127) break;
2314
if (tb != 0 && tb != 0xc0) onechar++;
2317
use_offsets[1] = start_offset + onechar;
2321
if (count == PCRE_ERROR_NOMATCH)
2323
if (gmatched == 0) fprintf(outfile, "No match\n");
2325
else fprintf(outfile, "Error %d\n", count);
2326
break; /* Out of the /g loop */
2330
/* If not /g or /G we are done */
2332
if (!do_g && !do_G) break;
2334
/* If we have matched an empty string, first check to see if we are at
2335
the end of the subject. If so, the /g loop is over. Otherwise, mimic
2336
what Perl's /g options does. This turns out to be rather cunning. First
2337
we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2338
same point. If this fails (picked up above) we advance to the next
2343
if (use_offsets[0] == use_offsets[1])
2345
if (use_offsets[0] == len) break;
2346
g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2349
/* For /g, update the start offset, leaving the rest alone */
2351
if (do_g) start_offset = use_offsets[1];
2353
/* For /G, update the pointer and length */
2357
bptr += use_offsets[1];
2358
len -= use_offsets[1];
2360
} /* End of loop for /g and /G */
2362
NEXT_DATA: continue;
2363
} /* End of loop for data lines */
2367
#if !defined NOPOSIX
2368
if (posix || do_posix) regfree(&preg);
2371
if (re != NULL) new_free(re);
2372
if (extra != NULL) new_free(extra);
2375
new_free((void *)tables);
2376
setlocale(LC_CTYPE, "C");
2381
if (infile == stdin) fprintf(outfile, "\n");
2385
if (infile != NULL && infile != stdin) fclose(infile);
2386
if (outfile != NULL && outfile != stdout) fclose(outfile);
2396
/* End of pcretest.c */