3
# Program for testing regular expressions with perl to check that PCRE handles
4
# them the same. This is the version that supports /8 for UTF-8 testing. As it
5
# stands, it requires at least Perl 5.8 for UTF-8 support. For Perl 5.6, it
6
# can be used as is for non-UTF-8 testing, but you have to uncomment the
7
# "use utf8" lines in order to to UTF-8 stuff (and you mustn't uncomment them
11
# Function for turning a string into a string of printing chars. There are
12
# currently problems with UTF-8 strings; this fudges round them.
19
# use utf8; <=============== For UTF-8 in Perl 5.6
20
@p = unpack('U*', $_[0]);
23
if ($c >= 32 && $c < 127) { $t .= chr $c; }
24
else { $t .= sprintf("\\x{%02x}", $c); }
30
foreach $c (split(//, $_[0]))
32
if (ord $c >= 32 && ord $c < 127) { $t .= $c; }
33
else { $t .= sprintf("\\x%02x", ord $c); }
42
# Read lines from named file or stdin and write to named file or stdout; lines
43
# consist of a regular expression, in delimiters and optionally followed by
44
# options, followed by a set of test data, terminated by an empty line.
46
# Sort out the input and output files
50
open(INFILE, "<$ARGV[0]") || die "Failed to open $ARGV[0]\n";
53
else { $infile = "STDIN"; }
57
open(OUTFILE, ">$ARGV[1]") || die "Failed to open $ARGV[1]\n";
60
else { $outfile = "STDOUT"; }
62
printf($outfile "Perl $] Regular Expressions\n\n");
69
printf " re> " if $infile eq "STDIN";
70
last if ! ($_ = <$infile>);
71
printf $outfile "$_" if $infile ne "STDIN";
76
while ($pattern !~ /^\s*(.).*\1/s)
78
printf " > " if $infile eq "STDIN";
79
last if ! ($_ = <$infile>);
80
printf $outfile "$_" if $infile ne "STDIN";
87
# The private /+ modifier means "print $' afterwards".
89
$showrest = ($pattern =~ s/\+(?=[a-z]*$)//);
91
# The private /8 modifier means "operate in UTF-8". Currently, Perl
92
# has bugs that we try to work around using this flag.
94
$utf8 = ($pattern =~ s/8(?=[a-z]*$)//);
96
# Check that the pattern is valid
100
# use utf8; <=============== For UTF-8 in Perl 5.6
101
eval "\$_ =~ ${pattern}";
105
eval "\$_ =~ ${pattern}";
110
printf $outfile "Error: $@";
114
# If the /g modifier is present, we want to put a loop round the matching;
115
# otherwise just a single "if".
117
$cmd = ($pattern =~ /g[a-z]*$/)? "while" : "if";
119
# If the pattern is actually the null string, Perl uses the most recently
120
# executed (and successfully compiled) regex is used instead. This is a
121
# nasty trap for the unwary! The PCRE test suite does contain null strings
122
# in places - if they are allowed through here all sorts of weird and
123
# unexpected effects happen. To avoid this, we replace such patterns with
124
# a non-null pattern that has the same effect.
126
$pattern = "/(?#)/$2" if ($pattern =~ /^(.)\1(.*)$/);
128
# Read data lines and test them
132
printf "data> " if $infile eq "STDIN";
133
last NEXT_RE if ! ($_ = <$infile>);
135
printf $outfile "$_\n" if $infile ne "STDIN";
142
$x = eval "\"$_\""; # To get escapes processed
144
# Empty array for holding results, then do the matching.
148
$pushes = "push \@subs,\$&;" .
158
"push \@subs,\$10;" .
159
"push \@subs,\$11;" .
160
"push \@subs,\$12;" .
161
"push \@subs,\$13;" .
162
"push \@subs,\$14;" .
163
"push \@subs,\$15;" .
164
"push \@subs,\$16;" .
165
"push \@subs,\$'; }";
169
# use utf8; <=============== For UTF-8 in Perl 5.6
170
eval "${cmd} (\$x =~ ${pattern}) {" . $pushes;
174
eval "${cmd} (\$x =~ ${pattern}) {" . $pushes;
179
printf $outfile "Error: $@\n";
182
elsif (scalar(@subs) == 0)
184
printf $outfile "No match\n";
188
while (scalar(@subs) != 0)
190
printf $outfile (" 0: %s\n", &pchars($subs[0]));
191
printf $outfile (" 0+ %s\n", &pchars($subs[17])) if $showrest;
193
for ($i = 1; $i <= 16; $i++)
195
if (defined $subs[$i])
197
while ($last_printed++ < $i-1)
198
{ printf $outfile ("%2d: <unset>\n", $last_printed); }
199
printf $outfile ("%2d: %s\n", $i, &pchars($subs[$i]));
203
splice(@subs, 0, 18);
209
# printf $outfile "\n";