1
# Copyright (C) 2008-2009 Sun Microsystems, Inc. All rights reserved.
2
# Use is subject to license terms.
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; version 2 of the License.
8
# This program is distributed in the hope that it will be useful, but
9
# WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
# General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
18
package GenTest::Grammar;
23
GRAMMAR_FLAG_COMPACT_RULES
24
GRAMMAR_FLAG_SKIP_RECURSIVE_RULES
30
use GenTest::Constants;
31
use GenTest::Grammar::Rule;
36
use constant GRAMMAR_RULES => 0;
37
use constant GRAMMAR_FILE => 1;
38
use constant GRAMMAR_STRING => 2;
39
use constant GRAMMAR_FLAGS => 3;
41
use constant GRAMMAR_FLAG_COMPACT_RULES => 1;
42
use constant GRAMMAR_FLAG_SKIP_RECURSIVE_RULES => 2;
50
my $grammar = $class->SUPER::new({
51
'grammar_file' => GRAMMAR_FILE,
52
'grammar_string' => GRAMMAR_STRING,
53
'grammar_flags' => GRAMMAR_FLAGS,
54
'grammar_rules' => GRAMMAR_RULES
58
if (defined $grammar->rules()) {
59
$grammar->[GRAMMAR_STRING] = $grammar->toString();
61
$grammar->[GRAMMAR_RULES] = {};
63
if (defined $grammar->file()) {
64
my $parse_result = $grammar->parseFromFile($grammar->file());
65
return undef if $parse_result > STATUS_OK;
68
if (defined $grammar->string()) {
69
my $parse_result = $grammar->parseFromString($grammar->string());
70
return undef if $parse_result > STATUS_OK;
78
return $_[0]->[GRAMMAR_FILE];
82
return $_[0]->[GRAMMAR_STRING];
88
my $rules = $grammar->rules();
89
return join("\n\n", map { $grammar->rule($_)->toString() } sort keys %$rules);
94
my ($grammar, $grammar_file) = @_;
96
open (GF, $grammar_file) or die "Unable to open() grammar $grammar_file: $!";
97
read (GF, my $grammar_string, -s $grammar_file) or die "Unable to read() $grammar_file: $!";
99
$grammar->[GRAMMAR_STRING] = $grammar_string;
101
return $grammar->parseFromString($grammar_string);
104
sub parseFromString {
105
my ($grammar, $grammar_string) = @_;
108
# provide an #include directive
111
while ($grammar_string =~ s{#include [<"](.*?)[>"]$}{
114
my $include_file = $1;
115
open (IF, $1) or die "Unable to open include file $include_file: $!";
116
read (IF, my $include_string, -s $include_file) or die "Unable to open $include_file: $!";
120
# Strip comments. Note that this is not Perl-code safe, since perl fragments
121
# can contain both comments with # and the $# expression. A proper lexer will fix this
123
$grammar_string =~ s{#.*$}{}iomg;
125
# Join lines ending in \
127
$grammar_string =~ s{\\$}{ }iomg;
129
# Strip end-line whitespace
131
$grammar_string =~ s{\s+$}{}iomg;
133
# Add terminating \n to ease parsing
135
$grammar_string = $grammar_string."\n";
137
my @rule_strings = split (";\s*[\r\n]+", $grammar_string);
141
foreach my $rule_string (@rule_strings) {
142
my ($rule_name, $components_string) = $rule_string =~ m{^(.*?)\s*:(.*)$}sio;
144
$rule_name =~ s{[\r\n]}{}gsio;
145
$rule_name =~ s{^\s*}{}gsio;
147
next if $rule_name eq '';
149
say("Warning: Rule $rule_name is defined twice.") if exists $rules{$rule_name};
151
my @component_strings = split (m{\|}, $components_string);
155
foreach my $component_string (@component_strings) {
156
# Remove leading whitespace
157
$component_string =~ s{^\s+}{}sgio;
158
$component_string =~ s{\s+$}{}sgio;
160
# Rempove repeating whitespaces
161
$component_string =~ s{\s+}{ }sgio;
163
# Split this so that each identifier is separated from all syntax elements
164
# The identifier can start with a lowercase letter or an underscore , plus quotes
166
$component_string =~ s{([_a-z0-9'"`\{\}\$\[\]]+)}{|$1|}sgio;
168
# Revert overzealous splitting that splits things like _varchar(32) into several tokens
170
$component_string =~ s{([a-z0-9_]+)\|\(\|(\d+)\|\)}{$1($2)|}sgo;
172
# Remove leading and trailing pipes
173
$component_string =~ s{^\|}{}sgio;
174
$component_string =~ s{\|$}{}sgio;
177
(exists $components{$component_string}) &&
178
($grammar->[GRAMMAR_FLAGS] & GRAMMAR_FLAG_COMPACT_RULES)
182
$components{$component_string}++;
185
my @component_parts = split (m{\|}, $component_string);
188
(grep { $_ eq $rule_name } @component_parts) &&
189
($grammar->[GRAMMAR_FLAGS] & GRAMMAR_FLAG_SKIP_RECURSIVE_RULES)
191
say("Skipping recursive production in rule '$rule_name'.") if rqg_debug();
196
# If this grammar rule contains Perl code, assemble it between the various
197
# component parts it was split into. This "reconstructive" step is definitely bad design
198
# The way to do it properly would be to tokenize the grammar using a full-blown lexer
199
# which should hopefully come up in a future version.
202
my $nesting_level = 0;
207
if ($component_parts[$pos] =~ m{\{}so) {
208
$code_start = $pos if $nesting_level == 0; # Code segment starts here
209
my $bracket_count = ($component_parts[$pos] =~ tr/{//);
210
$nesting_level = $nesting_level + $bracket_count;
213
if ($component_parts[$pos] =~ m{\}}so) {
214
my $bracket_count = ($component_parts[$pos] =~ tr/}//);
215
$nesting_level = $nesting_level - $bracket_count;
216
if ($nesting_level == 0) {
217
# Resemble the entire Perl code segment into a single string
218
splice(@component_parts, $code_start, ($pos - $code_start + 1) , join ('', @component_parts[$code_start..$pos]));
219
$pos = $code_start + 1;
223
last if $pos > $#component_parts;
227
push @components, \@component_parts;
230
my $rule = GenTest::Grammar::Rule->new(
232
components => \@components
234
$rules{$rule_name} = $rule;
237
$grammar->[GRAMMAR_RULES] = \%rules;
242
return $_[0]->[GRAMMAR_RULES]->{$_[1]};
246
return $_[0]->[GRAMMAR_RULES];
250
delete $_[0]->[GRAMMAR_RULES]->{$_[1]};
254
# Check if the grammar is tagged with query properties such as RESULTSET_ or ERROR_1234
258
if ($_[0]->[GRAMMAR_STRING] =~ m{RESULTSET_|ERROR_|QUERY_}so) {
266
## Make a new grammar using the patch_grammar to replace old rules and
270
my ($self, $patch_grammar) = @_;
272
my $patch_rules = $patch_grammar->rules();
274
my $rules = $self->rules();
276
foreach my $ruleName (keys %$patch_rules) {
277
$rules->{$ruleName} = $patch_rules->{$ruleName};
280
my $new_grammar = GenTest::Grammar->new(grammar_rules => $rules);
285
sub firstMatchingRule {
286
my ($self, @ids) = @_;
287
foreach my $x (@ids) {
288
return $self->rule($x) if defined $self->rule($x);
294
## The "body" of topGrammar
298
my ($self, $level, $max, @rules) = @_;
301
foreach my $rule (@rules) {
302
foreach my $c (@{$rule->components()}) {
304
foreach my $cp (@$c) {
305
push @subrules,$self->rule($cp) if defined $self->rule($cp);
308
$self->topGrammarX($level + 1, $max -1,@subrules);
309
if (defined $componentrules) {
310
foreach my $sr (keys %$componentrules) {
311
$result->{$sr} = $componentrules->{$sr};
315
$result->{$rule->name()} = $rule;
325
## Produce a new grammar which is the toplevel $level rules of this
330
my ($self, $levels, @startrules) = @_;
332
my $start = $self->firstMatchingRule(@startrules);
334
my $rules = $self->topGrammarX(0,$levels, $start);
336
return GenTest::Grammar->new(grammar_rules => $rules);
340
## Produce a new grammar keeping a masked set of rules. The mask is 16
341
## bits. If the mask is too short, we use the original mask as a seed
342
## for a random number generator and generate more 16-bit values as
343
## needed. The mask is applied in alphapetical order on the rules to
344
## ensure a deterministicresult since I don't trust the Perl %hashes
345
## to be always ordered the same twhen they are produced e.g. from
346
## topGrammar or whatever...
351
my ($self, $mask) = @_;
354
my $rules = $self->rules();
359
my $prng = GenTest::Random->new(seed => $mask);
360
## Generate the first 16 bits.
361
my $mask16 = $prng->uint16(0,0x7fff);
362
foreach my $rulename (sort keys %$rules) {
363
my $rule = $self->rule($rulename);
365
foreach my $x (@{$rule->components()}) {
366
push @newComponents, $x if (1 << ($i++)) & $mask16;
370
$mask = $prng->uint16(0,0x7fff);
376
## If no components were chosen, we chose all to have a working
378
if ($#newComponents < 0) {
381
$newRule= GenTest::Grammar::Rule->new(name => $rulename,
382
components => \@newComponents);
384
$newRuleset{$rulename}= $newRule;
388
return GenTest::Grammar->new(grammar_rules => \%newRuleset);