1
# See bottom of file for copyright and license details
5
---+ package Foswiki::Infix::Parser
7
A simple stack-based parser that parses infix expressions with nonary,
8
unary and binary operators specified using an operator table.
10
Escapes are supported in strings, using backslash.
14
package Foswiki::Infix::Parser;
19
require Foswiki::Infix::Error;
20
require Foswiki::Infix::Node;
23
sub MONITOR_PARSER { 0 }
27
---++ new($client_class, \%options) -> parser object
29
Creates a new infix parser. Operators must be added for it to be useful.
31
The tokeniser matches tokens in the following order: operators,
32
quotes (" and '), numbers, words, brackets. If you have any overlaps (e.g.
33
an operator '<' and a bracket operator '<<') then the first choice
36
=$client_class= needs to be the _name_ of a _package_ that supports the
37
following two functions:
38
* =newLeaf($val, $type)= - create a terminal. $type will be:
39
1 if the terminal matched the =words= specification (see below).
40
2 if it is a number matched the =numbers= specification (see below)
41
3 if it is a quoted string
42
* =newNode($op, @params) - create a new operator node. @params
43
is a variable-length list of parameters, left to right. $op
44
is a reference to the operator hash in the \@opers list.
45
These functions should throw Error::Simple in the event of errors.
46
Foswiki::Infix::Node is such a class, ripe for subclassing.
48
The remaining parameters are named, and specify options that affect the
49
behaviour of the parser:
50
1 =words=>qr//= - should be an RE specifying legal words (unquoted
51
terminals that are not operators i.e. names and numbers). By default
53
It's ok if operator names match this RE; operators always have precedence
55
2 =numbers=>qr//= - should be an RE specifying legal numbers (unquoted
56
terminals that are not operators or words). By default
57
this is =qr/[+-]?(?:\d+\.\d+|\d+\.|\.\d+|\d+)(?:[eE][+-]?\d+)?/=,
58
which matches integers and floating-point numbers. Number
59
matching always takes precedence over word matching (i.e. "1xy" will
60
be parsed as a number followed by a word. A typical usage of this option
61
is when you only want to recognise integers, in which case you would set
62
this to =numbers => qr/\d+/=.
67
my ( $class, $options ) = @_;
71
client_class => $options->{nodeClass},
79
defined( $options->{numbers} )
81
: qr/[+-]?(\d+\.\d+|\d+\.|\.\d+|\d+)([eE][+-]?\d+)?/;
84
defined( $options->{words} )
93
---++ ObjectMethod addOperator(\%oper)
94
Add an operator to the parser.
96
=\%oper= is a hash (or an object), containing the following fields:
97
* =name= - operator string
98
* =prec= - operator precedence, positive non-zero integer.
99
Larger number => higher precedence.
100
* =arity= - set to 1 if this operator is unary, 2 for binary. Arity 0
101
is legal, should you ever need it.
102
* =close= - used with bracket operators. =name= should be the open
103
bracket string, and =close= the close bracket. The existance of =close=
104
marks this as a bracket operator.
105
* =casematters== - indicates that the parser should check case in the
106
operator name (i.e. treat 'AND' and 'and' as different).
107
By default operators are case insensitive. *Note* that operator
108
names must be caselessly unique i.e. you can't define 'AND' and 'and'
109
as different operators in the same parser. Does not affect the
110
interpretation of non-operator terminals (names).
111
Other fields in the hash can be used for other purposes; the parse tree
112
generated by this parser will point to the hashes passed to this function.
114
Field names in the hash starting with =InfixParser_= are reserved for use
120
my ( $this, $op ) = @_;
121
push( @{ $this->{operators} }, $op );
122
$this->{initialised} = 0;
125
# Initialise on demand before a first parse
129
return if $this->{initialised};
131
# Build operator lists
134
foreach my $op ( @{ $this->{operators} } ) {
136
# Build a RE for the operator. Note that operators
137
# that end in \w are terminated with \b
138
my $opre = quotemeta( $op->{name} );
139
$opre .= ( $op->{name} =~ /\w$/ ) ? '\b' : '';
140
if ( $op->{casematters} ) {
141
$op->{InfixParser_RE} = qr/$opre/;
144
$op->{InfixParser_RE} = qr/$opre/i;
146
if ( defined( $op->{close} ) ) {
149
$this->{bracket_ops}->{ lc( $op->{name} ) } = $op;
151
$opre = quotemeta( $op->{close} );
152
$opre .= ( $op->{close} =~ /\w$/ ) ? '\b' : '';
153
if ( $op->{casematters} ) {
154
$op->{InfixParser_closeRE} = qr/$opre/;
157
$op->{InfixParser_closeRE} = qr/$opre/i;
159
push( @bracketOpsRE, $op->{InfixParser_RE} );
162
$this->{standard_ops}->{ lc( $op->{name} ) } = $op;
163
push( @stdOpsRE, $op->{InfixParser_RE} );
167
# Build regular expression of all standard operators.
168
$this->{standard_op_REs} = join( '|', @stdOpsRE );
170
# and repeat for bracket operators
171
$this->{bracket_op_REs} = join( '|', @bracketOpsRE );
173
$this->{initialised} = 1;
178
---++ ObjectMethod parse($string) -> $parseTree
179
Parses =$string=, calling =newLeaf= and =newNode= in the client class
180
as necessary to create a parse tree. Returns the result of calling =newNode=
181
on the root of the parse.
183
Throws Foswiki::Infix::Error in the event of parse errors.
188
my ( $this, $expr ) = @_;
190
$this->_initialise();
191
return _parse( $this, $expr, \$data );
194
# Simple stack parser, after Knuth
196
my ( $this, $expr, $input, $term ) = @_;
198
throw Foswiki::Infix::Error("Empty expression")
199
unless $expr && $expr =~ /\S/;
206
print STDERR "Parse: $$input\n" if MONITOR_PARSER;
208
while ( $$input =~ /\S/ ) {
209
if ( $$input =~ s/^\s*($this->{standard_op_REs})// ) {
211
print STDERR "Tok: op '$opname'\n" if MONITOR_PARSER;
212
my $op = $this->{standard_ops}->{ lc($opname) };
213
ASSERT( $op, $opname ) if DEBUG;
214
_apply( $this, $op->{prec}, \@opers, \@opands );
217
elsif ( $$input =~ s/^\s*(['"])(|.*?[^\\])\1// ) {
218
print STDERR "Tok: qs '$1'\n" if MONITOR_PARSER;
221
$this->{client_class}
222
->newLeaf( $val, $Foswiki::Infix::Node::STRING ) );
224
elsif ( $$input =~ s/^\s*($this->{numbers})// ) {
225
print STDERR "Tok: number '$1'\n" if MONITOR_PARSER;
228
$this->{client_class}
229
->newLeaf( $val, $Foswiki::Infix::Node::NUMBER ) );
231
elsif ( $$input =~ s/^\s*($this->{words})// ) {
232
print STDERR "Tok: word '$1'\n" if MONITOR_PARSER;
235
$this->{client_class}
236
->newLeaf( $val, $Foswiki::Infix::Node::NAME ) );
238
elsif ( $$input =~ s/^\s*($this->{bracket_op_REs})// ) {
240
print STDERR "Tok: open bracket $opname\n" if MONITOR_PARSER;
241
my $op = $this->{bracket_ops}->{ lc($opname) };
242
ASSERT($op) if DEBUG;
243
_apply( $this, $op->{prec}, \@opers, \@opands );
246
$this->_parse( $expr, $input, $op->{InfixParser_closeRE} )
249
elsif ( defined($term) && $$input =~ s/^\s*$term// ) {
250
print STDERR "Tok: close bracket $term\n" if MONITOR_PARSER;
254
throw Foswiki::Infix::Error( 'Syntax error', $expr, $$input );
257
_apply( $this, 0, \@opers, \@opands );
259
catch Error::Simple with {
261
# Catch errors thrown during the tree building process
262
throw Foswiki::Infix::Error( shift, $expr, $$input );
264
throw Foswiki::Infix::Error( 'Missing operator', $expr, $$input )
265
unless scalar(@opands) == 1;
266
throw Foswiki::Infix::Error(
267
'Excess operators (' . join( ' ', map { $_->{name} } @opers ) . ')',
270
my $result = pop(@opands);
271
print STDERR "Return " . $result->stringify() . "\n" if MONITOR_PARSER;
275
# Apply ops on the stack while their precedence is higher than $prec
276
# For each operator on the stack with precedence >= $prec, pop the
277
# required number of operands, construct a new parse node and push
278
# the node back onto the operand stack.
280
my ( $this, $prec, $opers, $opands ) = @_;
282
while (scalar(@$opers)
283
&& $opers->[-1]->{prec} >= $prec
284
&& scalar(@$opands) >= $opers->[-1]->{arity} )
286
my $op = pop(@$opers);
287
my $arity = $op->{arity};
290
unshift( @prams, pop(@$opands) );
292
# Should never get thrown, but just in case...
293
throw Foswiki::Infix::Error("Missing operand to '$op->{name}'")
296
if (MONITOR_PARSER) {
297
print STDERR "Apply $op->{name}("
298
. join( ', ', map { $_->stringify() } @prams ) . ")\n";
300
push( @$opands, $this->{client_class}->newNode( $op, @prams ) );
308
Module of Foswiki - The Free and Open Source Wiki, http://foswiki.org/, http://Foswiki.org/
310
# Copyright (C) 2008-2009 Foswiki Contributors. All Rights Reserved.
311
# Foswiki Contributors are listed in the AUTHORS file in the root
312
# of this distribution. NOTE: Please extend that file, not this notice.
314
# Additional copyrights apply to some or all of the code in this
317
# Copyright (C) 2005-2007 TWiki Contributors. All Rights Reserved.
318
# TWiki Contributors are listed in the AUTHORS file in the root
319
# of this distribution. NOTE: Please extend that file, not this notice.
321
This program is free software; you can redistribute it and/or
322
modify it under the terms of the GNU General Public License
323
as published by the Free Software Foundation; either version 2
324
of the License, or (at your option) any later version. For
325
more details read LICENSE in the root of this distribution.
327
This program is distributed in the hope that it will be useful,
328
but WITHOUT ANY WARRANTY; without even the implied warranty of
329
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
331
As per the GPL, removal of this notice is prohibited.
333
Author: Crawford Currie http://c-dot.co.uk