1
/*************************************************
2
* Perl-Compatible Regular Expressions *
3
*************************************************/
6
This is a library of functions to support regular expressions whose syntax
7
and semantics are as close as possible to those of the Perl 5 language. See
8
the file Tech.Notes for some information on the internals.
10
Written by: Philip Hazel <ph10@cam.ac.uk>
12
Copyright (c) 1997-2004 University of Cambridge
14
-----------------------------------------------------------------------------
15
Redistribution and use in source and binary forms, with or without
16
modification, are permitted provided that the following conditions are met:
18
* Redistributions of source code must retain the above copyright notice,
19
this list of conditions and the following disclaimer.
21
* Redistributions in binary form must reproduce the above copyright
22
notice, this list of conditions and the following disclaimer in the
23
documentation and/or other materials provided with the distribution.
25
* Neither the name of the University of Cambridge nor the names of its
26
contributors may be used to endorse or promote products derived from
27
this software without specific prior written permission.
29
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39
POSSIBILITY OF SUCH DAMAGE.
40
-----------------------------------------------------------------------------
44
/* This module contains a debugging function for printing out the internal form
45
of a compiled regular expression. It is kept in a separate file so that it can
46
be #included both in the pcretest program, and in the library itself when
47
compiled with the debugging switch. */
50
static const char *OP_names[] = { OP_NAME_LIST };
53
/*************************************************
54
* Print single- or multi-byte character *
55
*************************************************/
57
/* These tables are actually copies of ones in pcre.c. If we compile the
58
library with debugging, they are included twice, but that isn't really a
59
problem - compiling with debugging is pretty rare and these are very small. */
61
static const int utf8_t3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
63
static const uschar utf8_t4[] = {
64
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
65
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
66
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
67
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
70
print_char(FILE *f, uschar *ptr, BOOL utf8)
74
if (!utf8 || (c & 0xc0) != 0xc0)
76
if (isprint(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
82
int a = utf8_t4[c & 0x3f]; /* Number of additional bytes */
84
c = (c & utf8_t3[a]) << s;
85
for (i = 1; i <= a; i++)
87
/* This is a check for malformed UTF-8; it should only occur if the sanity
88
check has been turned off. Rather than swallow random bytes, just stop if
89
we hit a bad one. Print it with \X instead of \x as an indication. */
91
if ((ptr[i] & 0xc0) != 0x80)
93
fprintf(f, "\\X{%x}", c);
100
c |= (ptr[i] & 0x3f) << s;
102
if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
110
/*************************************************
111
* Find Unicode property name *
112
*************************************************/
115
get_ucpname(int property)
118
for (i = sizeof(utt)/sizeof(ucp_type_table); i >= 0; i--)
120
if (property == utt[i].value) break;
122
return (i >= 0)? utt[i].name : "??";
127
/*************************************************
128
* Print compiled regex *
129
*************************************************/
131
/* Make this function work for a regex with integers either byte order.
132
However, we assume that what we are passed is a compiled regex. */
135
print_internals(pcre *external_re, FILE *f)
137
real_pcre *re = (real_pcre *)external_re;
138
uschar *codestart, *code;
141
unsigned int options = re->options;
142
int offset = re->name_table_offset;
143
int count = re->name_count;
144
int size = re->name_entry_size;
146
if (re->magic_number != MAGIC_NUMBER)
148
offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
149
count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
150
size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
151
options = ((options << 24) & 0xff000000) |
152
((options << 8) & 0x00ff0000) |
153
((options >> 8) & 0x0000ff00) |
154
((options >> 24) & 0x000000ff);
157
code = codestart = (uschar *)re + offset + count * size;
158
utf8 = (options & PCRE_UTF8) != 0;
166
fprintf(f, "%3d ", code - codestart);
170
if (*code - OP_BRA > EXTRACT_BASIC_MAX)
171
fprintf(f, "%3d Bra extra\n", GET(code, 1));
173
fprintf(f, "%3d Bra %d\n", GET(code, 1), *code - OP_BRA);
174
code += OP_lengths[OP_BRA];
181
fprintf(f, " %s\n", OP_names[*code]);
182
fprintf(f, "------------------------------------------------------------------\n");
186
fprintf(f, " %.2x %s", code[1], OP_names[*code]);
195
code += 1 + print_char(f, code, utf8);
197
while (*code == OP_CHAR);
209
code += 1 + print_char(f, code, utf8);
211
while (*code == OP_CHARNC);
224
case OP_ASSERTBACK_NOT:
228
fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
232
printf("%3d %s", GET2(code, 1), OP_names[*code]);
236
if (GET2(code, 1) == CREF_RECURSE)
237
fprintf(f, " Cond recurse");
239
fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
253
case OP_TYPEMINQUERY:
255
if (*code >= OP_TYPESTAR)
257
fprintf(f, "%s", OP_names[code[1]]);
258
if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
260
fprintf(f, " %s ", get_ucpname(code[2]));
264
else extra = print_char(f, code+1, utf8);
265
fprintf(f, "%s", OP_names[*code]);
272
extra = print_char(f, code+3, utf8);
274
if (*code != OP_EXACT) fprintf(f, ",");
275
fprintf(f, "%d}", GET2(code,1));
276
if (*code == OP_MINUPTO) fprintf(f, "?");
282
fprintf(f, " %s", OP_names[code[3]]);
283
if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
285
fprintf(f, " %s ", get_ucpname(code[4]));
289
if (*code != OP_TYPEEXACT) fprintf(f, "0,");
290
fprintf(f, "%d}", GET2(code,1));
291
if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
295
if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
296
else fprintf(f, " [^\\x%02x]", c);
305
if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
306
else fprintf(f, " [^\\x%02x]", c);
307
fprintf(f, "%s", OP_names[*code]);
313
if (isprint(c = code[3])) fprintf(f, " [^%c]{", c);
314
else fprintf(f, " [^\\x%02x]{", c);
315
if (*code != OP_NOTEXACT) fprintf(f, ",");
316
fprintf(f, "%d}", GET2(code,1));
317
if (*code == OP_NOTMINUPTO) fprintf(f, "?");
321
fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
325
fprintf(f, " \\%d", GET2(code,1));
326
ccode = code + OP_lengths[*code];
327
goto CLASS_REF_REPEAT;
330
fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
331
GET(code, 2 + LINK_SIZE));
336
fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1]));
339
/* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
340
having this code always here, and it makes it less messy without all those
352
if (*code == OP_XCLASS)
354
extra = GET(code, 1);
355
ccode = code + LINK_SIZE + 1;
356
printmap = (*ccode & XCL_MAP) != 0;
357
if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
365
/* Print a bit map */
369
for (i = 0; i < 256; i++)
371
if ((ccode[i/8] & (1 << (i&7))) != 0)
374
for (j = i+1; j < 256; j++)
375
if ((ccode[j/8] & (1 << (j&7))) == 0) break;
376
if (i == '-' || i == ']') fprintf(f, "\\");
377
if (isprint(i)) fprintf(f, "%c", i); else fprintf(f, "\\x%02x", i);
380
if (j != i + 1) fprintf(f, "-");
381
if (j == '-' || j == ']') fprintf(f, "\\");
382
if (isprint(j)) fprintf(f, "%c", j); else fprintf(f, "\\x%02x", j);
390
/* For an XCLASS there is always some additional data */
392
if (*code == OP_XCLASS)
395
while ((ch = *ccode++) != XCL_END)
399
fprintf(f, "\\p{%s}", get_ucpname(*ccode++));
401
else if (ch == XCL_NOTPROP)
403
fprintf(f, "\\P{%s}", get_ucpname(*ccode++));
407
ccode += 1 + print_char(f, ccode, TRUE);
411
ccode += 1 + print_char(f, ccode, TRUE);
417
/* Indicate a non-UTF8 class which was created by negation */
419
fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
421
/* Handle repeats after a class or a back reference */
432
fprintf(f, "%s", OP_names[*ccode]);
433
extra += OP_lengths[*ccode];
440
if (max == 0) fprintf(f, "{%d,}", min);
441
else fprintf(f, "{%d,%d}", min, max);
442
if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
443
extra += OP_lengths[*ccode];
449
/* Anything else is just an item with no data*/
452
fprintf(f, " %s", OP_names[*code]);
456
code += OP_lengths[*code] + extra;
461
/* End of printint.c */