2
#============================================================================
3
# This library is free software; you can redistribute it and/or
4
# modify it under the terms of version 2.1 of the GNU Lesser General Public
5
# License as published by the Free Software Foundation.
7
# This library is distributed in the hope that it will be useful,
8
# but WITHOUT ANY WARRANTY; without even the implied warranty of
9
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10
# Lesser General Public License for more details.
12
# You should have received a copy of the GNU Lesser General Public
13
# License along with this library; if not, write to the Free Software
14
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
15
#============================================================================
16
# Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
17
#============================================================================
20
Input-driven parsing for s-expression (sxp) format.
21
Create a parser: pin = Parser();
22
Then call pin.input(buf) with your input.
23
Call pin.input_eof() when done.
24
Use pin.read() to see if a value has been parsed, pin.get_val()
25
to get a parsed value. You can call ready and get_val at any time -
26
you don't have to wait until after calling input_eof.
29
from __future__ import generators
35
from StringIO import StringIO
68
mime_type = "application/sxp"
89
escapes_rev[escapes[k]] = k
91
class ParseError(StandardError):
93
def __init__(self, parser, value):
102
def __init__(self, fn, parent=None):
110
return ParserState(fn, parent=self)
115
self.error = sys.stderr
126
def push_state(self, fn):
127
self.state = self.state.push(fn)
131
self.state = self.state.parent
132
if self.state and self.state.fn == self.state_start:
133
# Return to start state - produce the value.
134
self.val += self.state.val
138
def in_class(self, c, s):
139
return s.find(c) >= 0
141
def in_space_class(self, c):
142
return self.in_class(c, ' \t\n\v\f\r')
144
def is_separator(self, c):
145
return self.in_class(c, '{}()<>[];')
147
def in_comment_class(self, c):
148
return self.in_class(c, '#')
150
def in_string_quote_class(self, c):
151
return self.in_class(c, '"\'')
153
def in_printable_class(self, c):
154
return self.in_class(c, string.printable)
156
def set_error_stream(self, error):
169
def input(self, buf):
170
if not buf or len(buf) == 0:
176
def input_char(self, c):
185
if self.state is None:
186
self.begin_start(None)
190
return len(self.val) > 0
194
self.val = self.val[1:]
200
def begin_start(self, c):
201
self.state = ParserState(self.state_start)
204
self.val += self.state.val
207
def state_start(self, c):
210
elif self.in_space_class(c):
212
elif self.in_comment_class(c):
213
self.begin_comment(c)
214
elif c == k_list_open:
216
elif c == k_list_close:
217
raise ParseError(self, "syntax error: "+c)
218
elif self.in_string_quote_class(c):
220
elif self.in_printable_class(c):
223
# ctrl-D, EOT: end-of-text.
226
raise ParseError(self, "invalid character: code %d" % ord(c))
228
def begin_comment(self, c):
229
self.push_state(self.state_comment)
232
def end_comment(self):
235
def state_comment(self, c):
236
if c == '\n' or self.at_eof():
241
def begin_string(self, c):
242
self.push_state(self.state_string)
245
def end_string(self):
247
self.state.parent.val.append(val)
250
def state_string(self, c):
252
raise ParseError(self, "unexpected EOF")
253
elif c == self.state.delim:
256
self.push_state(self.state_escape)
260
def state_escape(self, c):
262
raise ParseError(self, "unexpected EOF")
265
self.state.parent.buf += d
268
self.state.fn = self.state_hex
270
elif c in string.octdigits:
271
self.state.fn = self.state_octal
275
# ignore escape if it doesn't match anything we know
276
self.state.parent.buf += '\\'
279
def state_octal(self, c):
282
self.state.val += ord(c) - ord('0')
284
if self.state.val < 0 or self.state.val > 0xff:
285
raise ParseError(self, "invalid octal escape: out of range " + self.state.buf)
286
if len(self.state.buf) == 3:
290
d = chr(self.state.val)
291
self.state.parent.buf += d
295
raise ParseError(self, "unexpected EOF")
296
elif '0' <= c <= '7':
298
elif len(self.state.buf):
302
def state_hex(self, c):
304
d = chr(self.state.val)
305
self.state.parent.buf += d
310
self.state.val += ord(c) - ord(d)
312
if self.state.val < 0 or self.state.val > 0xff:
313
raise ParseError(self, "invalid hex escape: out of range " + self.state.buf)
314
if len(self.state.buf) == 2:
318
raise ParseError(self, "unexpected EOF")
319
elif '0' <= c <= '9':
321
elif 'A' <= c <= 'F':
323
elif 'a' <= c <= 'f':
329
def begin_atom(self, c):
330
self.push_state(self.state_atom)
335
self.state.parent.val.append(val)
338
def state_atom(self, c):
341
elif (self.is_separator(c) or
342
self.in_space_class(c) or
343
self.in_comment_class(c)):
349
def begin_list(self, c):
350
self.push_state(self.state_list)
354
self.state.parent.val.append(val)
357
def state_list(self, c):
359
raise ParseError(self, "unexpected EOF")
360
elif c == k_list_close:
366
"""Check if an sxpr is an atom.
368
if sxpr.isalnum() or sxpr == '@':
371
if c in string.whitespace: return 0
372
if c in '"\'\\(){}[]<>$#&%^': return 0
373
if c in string.ascii_letters: continue
374
if c in string.digits: continue
375
if c in '.-_:/~': continue
379
def show(sxpr, out=sys.stdout):
380
"""Print an sxpr in bracketed (lisp-style) syntax.
382
if isinstance(sxpr, (types.ListType, types.TupleType)):
383
out.write(k_list_open)
389
out.write(k_list_close)
390
elif isinstance(sxpr, (types.IntType, types.FloatType)):
392
elif isinstance(sxpr, types.StringType) and atomp(sxpr):
395
out.write(repr(str(sxpr)))
397
def show_xml(sxpr, out=sys.stdout):
398
"""Print an sxpr in XML syntax.
400
if isinstance(sxpr, (types.ListType, types.TupleType)):
402
out.write('<%s' % element)
403
for attr in attributes(sxpr):
404
out.write(' %s=%s' % (attr[0], attr[1]))
407
for x in children(sxpr):
411
out.write('</%s>' % element)
412
elif isinstance(sxpr, types.StringType) and atomp(sxpr):
417
def elementp(sxpr, elt=None):
418
"""Check if an sxpr is an element of the given type.
423
return (isinstance(sxpr, (types.ListType, types.TupleType))
425
and (None == elt or sxpr[0] == elt))
428
"""Get the element name of an sxpr.
429
If the sxpr is not an element (i.e. it's an atomic value) its name
434
returns name (None if not an element).
437
if isinstance(sxpr, types.StringType):
439
elif isinstance(sxpr, (types.ListType, types.TupleType)) and len(sxpr):
443
def attributes(sxpr):
444
"""Get the attribute list of an sxpr.
448
returns attribute list
451
if isinstance(sxpr, (types.ListType, types.TupleType)) and len(sxpr) > 1:
453
if elementp(attr, k_attr_open):
457
def attribute(sxpr, key, val=None):
458
"""Get an attribute of an sxpr.
462
val default value (default None)
464
returns attribute value
466
for x in attributes(sxpr):
472
def children(sxpr, elt=None):
473
"""Get children of an sxpr.
476
elt optional element type to filter by
478
returns children (filtered by elt if specified)
481
if isinstance(sxpr, (types.ListType, types.TupleType)) and len(sxpr) > 1:
484
if elementp(x, k_attr_open):
489
return elementp(x, elt)
490
val = filter(iselt, val)
493
def child(sxpr, elt, val=None):
494
"""Get the first child of the given element type.
500
for x in children(sxpr):
506
def child_at(sxpr, index, val=None):
507
"""Get the child at the given index (zero-based).
513
kids = children(sxpr)
514
if len(kids) > index:
518
def child0(sxpr, val=None):
519
"""Get the zeroth child.
521
return child_at(sxpr, 0, val)
523
def child1(sxpr, val=None):
524
"""Get the first child.
526
return child_at(sxpr, 1, val)
528
def child2(sxpr, val=None):
529
"""Get the second child.
531
return child_at(sxpr, 2, val)
533
def child3(sxpr, val=None):
534
"""Get the third child.
536
return child_at(sxpr, 3, val)
538
def child4(sxpr, val=None):
539
"""Get the fourth child.
541
return child_at(sxpr, 4, val)
543
def child_value(sxpr, elt, val=None):
544
"""Get the value of the first child of the given element type.
545
Assumes the child has an atomic value.
551
kid = child(sxpr, elt)
553
val = child_at(kid, 0, val)
556
def has_id(sxpr, id):
557
"""Test if an s-expression has a given id.
559
return attribute(sxpr, 'id') == id
561
def with_id(sxpr, id, val=None):
562
"""Find the first s-expression with a given id, at any depth.
566
val value if not found (default None)
570
if isinstance(sxpr, (types.ListType, types.TupleType)):
576
if v is None: continue
581
def child_with_id(sxpr, id, val=None):
582
"""Find the first child with a given id.
586
val value if not found (default None)
590
if isinstance(sxpr, (types.ListType, types.TupleType)):
597
def elements(sxpr, ctxt=None):
598
"""Generate elements (at any depth).
599
Visit elements in pre-order.
600
Values generated are (node, context)
601
The context is None if there is no parent, otherwise
602
(index, parent, context) where index is the node's index w.r.t its parent,
603
and context is the parent's context.
611
for n in children(sxpr):
612
if isinstance(n, (types.ListType, types.TupleType)):
613
# Calling elements() recursively does not generate recursively,
614
# it just returns a generator object. So we must iterate over it.
615
for v in elements(n, (i, sxpr, ctxt)):
620
"""Merge sxprs s1 and s2.
621
Returns an sxpr containing all the fields from s1 and s2, with
622
entries in s1 overriding s2. Recursively merges fields.
634
(m1, v1) = child_map(s1)
635
(m2, v2) = child_map(s2)
637
for (k1, f1) in m1.items():
638
merge_list(val, f1, m2.get(k1, []))
639
for (k2, f2) in m2.items():
640
if k2 in m1: continue
647
def merge_list(sxpr, l1, l2):
648
"""Merge element lists l1 and l2 into sxpr.
649
The lists l1 and l2 are all element with the same name.
650
Values from l1 are merged with values in l2 and stored in sxpr.
651
If one list is longer than the other the excess values are used
654
@param sxpr to merge into
657
@return modified sxpr
662
for i in range(0, nmin):
663
sxpr.append(merge(l1[i], l2[i]))
664
for i in range(nmin, n1):
666
for i in range(nmin, n2):
671
"""Get a dict of the elements in sxpr and a list of its values.
672
The dict maps element name to the list of elements with that name,
673
and the list is the non-element children.
680
for x in children(sxpr):
691
"""Convert an sxpr to a string.
704
"""Create an sxpr by parsing a string.
720
def all_from_string(s):
721
"""Create an sxpr list by parsing a string.
731
"""Completely parse all input from 'io'.
734
returns list of values, None if incomplete
735
raises ParseError on parse error
750
if __name__ == '__main__':
754
buf = sys.stdin.read(1024)
755
#buf = sys.stdin.readline()
760
print '****** val=', val