1
# Copyright (C) 2008-2009 Sun Microsystems, Inc. All rights reserved.
2
# Use is subject to license terms.
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; version 2 of the License.
8
# This program is distributed in the hope that it will be useful, but
9
# WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11
# General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, write to the Free Software
15
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
18
package GenTest::Grammar;
23
GRAMMAR_FLAG_COMPACT_RULES
29
use GenTest::Constants;
30
use GenTest::Grammar::Rule;
35
use constant GRAMMAR_RULES => 0;
36
use constant GRAMMAR_FILE => 1;
37
use constant GRAMMAR_STRING => 2;
38
use constant GRAMMAR_FLAGS => 3;
40
use constant GRAMMAR_FLAG_COMPACT_RULES => 1;
48
my $grammar = $class->SUPER::new({
49
'grammar_file' => GRAMMAR_FILE,
50
'grammar_string' => GRAMMAR_STRING,
51
'grammar_flags' => GRAMMAR_FLAGS,
52
'grammar_rules' => GRAMMAR_RULES
56
if (defined $grammar->rules()) {
57
$grammar->[GRAMMAR_STRING] = $grammar->toString();
59
$grammar->[GRAMMAR_RULES] = {};
61
if (defined $grammar->file()) {
62
my $parse_result = $grammar->parseFromFile($grammar->file());
63
return undef if $parse_result > STATUS_OK;
66
if (defined $grammar->string()) {
67
my $parse_result = $grammar->parseFromString($grammar->string());
68
return undef if $parse_result > STATUS_OK;
76
return $_[0]->[GRAMMAR_FILE];
80
return $_[0]->[GRAMMAR_STRING];
86
my $rules = $grammar->rules();
87
return join("\n\n", map { $grammar->rule($_)->toString() } sort keys %$rules);
92
my ($grammar, $grammar_file) = @_;
94
open (GF, $grammar_file) or die "Unable to open() grammar $grammar_file: $!";
95
read (GF, my $grammar_string, -s $grammar_file) or die "Unable to read() $grammar_file: $!";
97
$grammar->[GRAMMAR_STRING] = $grammar_string;
99
return $grammar->parseFromString($grammar_string);
102
sub parseFromString {
103
my ($grammar, $grammar_string) = @_;
106
# provide an #include directive
109
while ($grammar_string =~ s{#include [<"](.*?)[>"]$}{
112
my $include_file = $1;
113
open (IF, $1) or die "Unable to open include file $include_file: $!";
114
read (IF, my $include_string, -s $include_file) or die "Unable to open $include_file: $!";
118
# Strip comments. Note that this is not Perl-code safe, since perl fragments
119
# can contain both comments with # and the $# expression. A proper lexer will fix this
121
$grammar_string =~ s{#.*$}{}iomg;
123
# Join lines ending in \
125
$grammar_string =~ s{\\$}{ }iomg;
127
# Strip end-line whitespace
129
$grammar_string =~ s{\s+$}{}iomg;
131
# Add terminating \n to ease parsing
133
$grammar_string = $grammar_string."\n";
135
my @rule_strings = split (";\s*[\r\n]+", $grammar_string);
139
foreach my $rule_string (@rule_strings) {
140
my ($rule_name, $components_string) = $rule_string =~ m{^(.*?)\s*:(.*)$}sio;
142
$rule_name =~ s{[\r\n]}{}gsio;
143
$rule_name =~ s{^\s*}{}gsio;
145
next if $rule_name eq '';
147
say("Warning: Rule $rule_name is defined twice.") if exists $rules{$rule_name};
149
my @component_strings = split (m{\|}, $components_string);
153
foreach my $component_string (@component_strings) {
154
# Remove leading whitespace
155
$component_string =~ s{^\s+}{}sgio;
156
$component_string =~ s{\s+$}{}sgio;
158
# Rempove repeating whitespaces
159
$component_string =~ s{\s+}{ }sgio;
161
# Split this so that each identifier is separated from all syntax elements
162
# The identifier can start with a lowercase letter or an underscore , plus quotes
164
$component_string =~ s{([_a-z0-9'"`\{\}\$\[\]]+)}{|$1|}sgo;
166
# Revert overzealous splitting that splits things like _varchar(32) into several tokens
168
$component_string =~ s{([a-z0-9_]+)\|\(\|(\d+)\|\)}{$1($2)|}sgo;
170
# Remove leading and trailing pipes
171
$component_string =~ s{^\|}{}sgio;
172
$component_string =~ s{\|$}{}sgio;
175
(exists $components{$component_string}) &&
176
($grammar->[GRAMMAR_FLAGS] & GRAMMAR_FLAG_COMPACT_RULES)
180
$components{$component_string}++;
183
my @component_parts = split (m{\|}, $component_string);
186
# If this grammar rule contains Perl code, assemble it between the various
187
# component parts it was split into. This "reconstructive" step is definitely bad design
188
# The way to do it properly would be to tokenize the grammar using a full-blown lexer
189
# which should hopefully come up in a future version.
192
my $nesting_level = 0;
197
if ($component_parts[$pos] =~ m{\{}so) {
198
$code_start = $pos if $nesting_level == 0; # Code segment starts here
199
my $bracket_count = ($component_parts[$pos] =~ tr/{//);
200
$nesting_level = $nesting_level + $bracket_count;
203
if ($component_parts[$pos] =~ m{\}}so) {
204
my $bracket_count = ($component_parts[$pos] =~ tr/}//);
205
$nesting_level = $nesting_level - $bracket_count;
206
if ($nesting_level == 0) {
207
# Resemble the entire Perl code segment into a single string
208
splice(@component_parts, $code_start, ($pos - $code_start + 1) , join ('', @component_parts[$code_start..$pos]));
209
$pos = $code_start + 1;
213
last if $pos > $#component_parts;
217
push @components, \@component_parts;
220
my $rule = GenTest::Grammar::Rule->new(
222
components => \@components
224
$rules{$rule_name} = $rule;
227
$grammar->[GRAMMAR_RULES] = \%rules;
232
return $_[0]->[GRAMMAR_RULES]->{$_[1]};
236
return $_[0]->[GRAMMAR_RULES];
240
delete $_[0]->[GRAMMAR_RULES]->{$_[1]};
244
# Check if the grammar is tagged with query properties such as RESULTSET_ or ERROR_1234
248
if ($_[0]->[GRAMMAR_STRING] =~ m{RESULTSET_|ERROR_|QUERY_}so) {
256
## Make a new grammar using the patch_grammar to replace old rules and
260
my ($self, $patch_grammar) = @_;
262
my $patch_rules = $patch_grammar->rules();
264
my $rules = $self->rules();
266
foreach my $ruleName (keys %$patch_rules) {
267
$rules->{$ruleName} = $patch_rules->{$ruleName};
270
my $new_grammar = GenTest::Grammar->new(grammar_rules => $rules);
275
sub firstMatchingRule {
276
my ($self, @ids) = @_;
277
foreach my $x (@ids) {
278
return $self->rule($x) if defined $self->rule($x);
284
## The "body" of topGrammar
288
my ($self, $level, $max, @rules) = @_;
291
foreach my $rule (@rules) {
292
foreach my $c (@{$rule->components()}) {
294
foreach my $cp (@$c) {
295
push @subrules,$self->rule($cp) if defined $self->rule($cp);
298
$self->topGrammarX($level + 1, $max -1,@subrules);
299
if (defined $componentrules) {
300
foreach my $sr (keys %$componentrules) {
301
$result->{$sr} = $componentrules->{$sr};
305
$result->{$rule->name()} = $rule;
315
## Produce a new grammar which is the toplevel $level rules of this
320
my ($self, $levels, @startrules) = @_;
322
my $start = $self->firstMatchingRule(@startrules);
324
my $rules = $self->topGrammarX(0,$levels, $start);
326
return GenTest::Grammar->new(grammar_rules => $rules);
330
## Produce a new grammar keeping a masked set of rules. The mask is 16
331
## bits. If the mask is too short, we use the original mask as a seed
332
## for a random number generator and generate more 16-bit values as
333
## needed. The mask is applied in alphapetical order on the rules to
334
## ensure a deterministicresult since I don't trust the Perl %hashes
335
## to be always ordered the same twhen they are produced e.g. from
336
## topGrammar or whatever...
341
my ($self, $mask) = @_;
344
my $rules = $self->rules();
349
my $prng = GenTest::Random->new(seed => $mask);
350
## Generate the first 16 bits.
351
my $mask16 = $prng->uint16(0,0x7fff);
352
foreach my $rulename (sort keys %$rules) {
353
my $rule = $self->rule($rulename);
355
foreach my $x (@{$rule->components()}) {
356
push @newComponents, $x if (1 << ($i++)) & $mask16;
360
$mask = $prng->uint16(0,0x7fff);
366
## If no components were chosen, we chose all to have a working
368
if ($#newComponents < 0) {
371
$newRule= GenTest::Grammar::Rule->new(name => $rulename,
372
components => \@newComponents);
374
$newRuleset{$rulename}= $newRule;
378
return GenTest::Grammar->new(grammar_rules => \%newRuleset);