2
A library of useful helper classes to the saxlib classes, for the
3
convenience of application and driver writers.
8
import types, sys, urllib, urlparse, os, string
9
import handler, _exceptions, xmlreader
12
_StringTypes = [types.StringType, types.UnicodeType]
13
except AttributeError: # 1.5 compatibility:UnicodeType not defined
14
_StringTypes = [types.StringType]
16
def escape(data, entities={}):
17
"""Escape &, <, and > in a string of data.
19
You can escape other strings of data by passing a dictionary as
20
the optional entities parameter. The keys and values must all be
21
strings; each key will be replaced with its corresponding value.
23
data = string.replace(data, "&", "&")
24
data = string.replace(data, "<", "<")
25
data = string.replace(data, ">", ">")
26
for chars, entity in entities.items():
27
data = string.replace(data, chars, entity)
32
class DefaultHandler(handler.EntityResolver, handler.DTDHandler,
33
handler.ContentHandler, handler.ErrorHandler):
34
"""Default base class for SAX2 event handlers. Implements empty
35
methods for all callback methods, which can be overridden by
36
application implementors. Replaces the deprecated SAX1 HandlerBase
42
"""Represents a location in an XML entity. Initialized by being passed
43
a locator, from which it reads off the current location, which is then
46
def __init__(self, locator):
47
self.__col = locator.getColumnNumber()
48
self.__line = locator.getLineNumber()
49
self.__pubid = locator.getPublicId()
50
self.__sysid = locator.getSystemId()
52
def getColumnNumber(self):
55
def getLineNumber(self):
58
def getPublicId(self):
61
def getSystemId(self):
67
"A simple class that just prints error messages to standard out."
69
def __init__(self, level=0, outfile=sys.stderr):
71
self._outfile = outfile
73
def warning(self, exception):
75
self._outfile.write("WARNING in %s: %s\n" %
76
(self.__getpos(exception),
77
exception.getMessage()))
79
def error(self, exception):
81
self._outfile.write("ERROR in %s: %s\n" %
82
(self.__getpos(exception),
83
exception.getMessage()))
85
def fatalError(self, exception):
87
self._outfile.write("FATAL ERROR in %s: %s\n" %
88
(self.__getpos(exception),
89
exception.getMessage()))
91
def __getpos(self, exception):
92
if isinstance(exception, _exceptions.SAXParseException):
93
return "%s:%s:%s" % (exception.getSystemId(),
94
exception.getLineNumber(),
95
exception.getColumnNumber())
102
"A simple class that just raises the exceptions it is passed."
104
def __init__(self, level = 0):
107
def error(self, exception):
111
def fatalError(self, exception):
115
def warning(self, exception):
119
# --- AttributesImpl now lives in xmlreader
120
from xmlreader import AttributesImpl
122
# --- XMLGenerator is the SAX2 ContentHandler for writing back XML
125
def _outputwrapper(stream,encoding):
126
writerclass = codecs.lookup(encoding)[3]
127
return writerclass(stream)
128
except ImportError: # 1.5 compatibility: fall back to do-nothing
129
def _outputwrapper(stream,encoding):
132
class XMLGenerator(handler.ContentHandler):
134
def __init__(self, out=None, encoding="iso-8859-1"):
138
handler.ContentHandler.__init__(self)
139
self._out = _outputwrapper(out,encoding)
140
self._ns_contexts = [{}] # contains uri -> prefix dicts
141
self._current_context = self._ns_contexts[-1]
142
self._undeclared_ns_maps = []
143
self._encoding = encoding
145
# ContentHandler methods
147
def startDocument(self):
148
self._out.write('<?xml version="1.0" encoding="%s"?>\n' %
151
def startPrefixMapping(self, prefix, uri):
152
self._ns_contexts.append(self._current_context.copy())
153
self._current_context[uri] = prefix
154
self._undeclared_ns_maps.append((prefix, uri))
156
def endPrefixMapping(self, prefix):
157
self._current_context = self._ns_contexts[-1]
158
del self._ns_contexts[-1]
160
def startElement(self, name, attrs):
161
self._out.write('<' + name)
162
for (name, value) in attrs.items():
163
self._out.write(' %s="%s"' % (name, escape(value)))
166
def endElement(self, name):
167
self._out.write('</%s>' % name)
169
def startElementNS(self, name, qname, attrs):
172
elif self._current_context[name[0]] is None:
176
name = self._current_context[name[0]] + ":" + name[1]
177
self._out.write('<' + name)
179
for k,v in self._undeclared_ns_maps:
181
self._out.write(' xmlns="%s"' % v)
183
self._out.write(' xmlns:%s="%s"' % (k,v))
184
self._undeclared_ns_maps = []
186
for (name, value) in attrs.items():
187
name = self._current_context[name[0]] + ":" + name[1]
188
self._out.write(' %s="%s"' % (name, escape(value)))
191
def endElementNS(self, name, qname):
192
# XXX: if qname is not None, we better use it.
193
# Python 2.0b2 requires us to use the recorded prefix for
197
elif self._current_context[name[0]] is None:
200
qname = self._current_context[name[0]] + ":" + name[1]
201
self._out.write('</%s>' % qname)
203
def characters(self, content):
204
self._out.write(escape(content))
206
def ignorableWhitespace(self, content):
207
self._out.write(content)
209
def processingInstruction(self, target, data):
210
self._out.write('<?%s %s?>' % (target, data))
212
# --- ContentGenerator is the SAX1 DocumentHandler for writing back XML
213
class ContentGenerator(XMLGenerator):
215
def characters(self, str, start, end):
216
# In SAX1, characters receives start and end; in SAX2, it receives
217
# a string. For plain strings, we may want to use a buffer object.
218
return XMLGenerator.characters(self, str[start:start+end])
221
class XMLFilterBase(xmlreader.XMLReader):
222
"""This class is designed to sit between an XMLReader and the
223
client application's event handlers. By default, it does nothing
224
but pass requests up to the reader and events on to the handlers
225
unmodified, but subclasses can override specific methods to modify
226
the event stream or the configuration requests as they pass
229
# ErrorHandler methods
231
def error(self, exception):
232
self._err_handler.error(exception)
234
def fatalError(self, exception):
235
self._err_handler.fatalError(exception)
237
def warning(self, exception):
238
self._err_handler.warning(exception)
240
# ContentHandler methods
242
def setDocumentLocator(self, locator):
243
self._cont_handler.setDocumentLocator(locator)
245
def startDocument(self):
246
self._cont_handler.startDocument()
248
def endDocument(self):
249
self._cont_handler.endDocument()
251
def startPrefixMapping(self, prefix, uri):
252
self._cont_handler.startPrefixMapping(prefix, uri)
254
def endPrefixMapping(self, prefix):
255
self._cont_handler.endPrefixMapping(prefix)
257
def startElement(self, name, attrs):
258
self._cont_handler.startElement(name, attrs)
260
def endElement(self, name):
261
self._cont_handler.endElement(name)
263
def startElementNS(self, name, qname, attrs):
264
self._cont_handler.startElementNS(name, qname, attrs)
266
def endElementNS(self, name, qname):
267
self._cont_handler.endElementNS(name, qname)
269
def characters(self, content):
270
self._cont_handler.characters(content)
272
def ignorableWhitespace(self, chars):
273
self._cont_handler.ignorableWhitespace(chars)
275
def processingInstruction(self, target, data):
276
self._cont_handler.processingInstruction(target, data)
278
def skippedEntity(self, name):
279
self._cont_handler.skippedEntity(name)
283
def notationDecl(self, name, publicId, systemId):
284
self._dtd_handler.notationDecl(name, publicId, systemId)
286
def unparsedEntityDecl(self, name, publicId, systemId, ndata):
287
self._dtd_handler.unparsedEntityDecl(name, publicId, systemId, ndata)
289
# EntityResolver methods
291
def resolveEntity(self, publicId, systemId):
292
self._ent_handler.resolveEntity(publicId, systemId)
296
def parse(self, source):
297
self._parent.setContentHandler(self)
298
self._parent.setErrorHandler(self)
299
self._parent.setEntityResolver(self)
300
self._parent.setDTDHandler(self)
301
self._parent.parse(source)
303
def setLocale(self, locale):
304
self._parent.setLocale(locale)
306
def getFeature(self, name):
307
return self._parent.getFeature(name)
309
def setFeature(self, name, state):
310
self._parent.setFeature(name, state)
312
def getProperty(self, name):
313
return self._parent.getProperty(name)
315
def setProperty(self, name, value):
316
self._parent.setProperty(name, value)
318
# FIXME: remove this backward compatibility hack when not needed anymore
319
XMLFilterImpl = XMLFilterBase
321
# --- BaseIncrementalParser
323
class BaseIncrementalParser(xmlreader.IncrementalParser):
324
"""This class implements the parse method of the XMLReader
325
interface using the feed, close and reset methods of the
326
IncrementalParser interface as a convenience to SAX 2.0 driver
329
def parse(self, source):
330
source = prepare_input_source(source)
331
self.prepareParser(source)
333
self._cont_handler.startDocument()
335
# FIXME: what about char-stream?
336
inf = source.getByteStream()
337
buffer = inf.read(16384)
340
buffer = inf.read(16384)
345
self._cont_handler.endDocument()
347
def prepareParser(self, source):
348
"""This method is called by the parse implementation to allow
349
the SAX 2.0 driver to prepare itself for parsing."""
350
raise NotImplementedError("prepareParser must be overridden!")
352
# --- Utility functions
354
def prepare_input_source(source, base = ""):
355
"""This function takes an InputSource and an optional base URL and
356
returns a fully resolved InputSource object ready for reading."""
358
if type(source) in _StringTypes:
359
source = xmlreader.InputSource(source)
360
elif hasattr(source, "read"):
362
source = xmlreader.InputSource()
363
source.setByteStream(f)
364
if hasattr(f, "name"):
365
source.setSystemId(f.name)
367
if source.getByteStream() is None:
368
sysid = source.getSystemId()
369
if os.path.isfile(sysid):
370
basehead = os.path.split(os.path.normpath(base))[0]
371
source.setSystemId(os.path.join(basehead, sysid))
372
f = open(sysid, "rb")
374
source.setSystemId(urlparse.urljoin(base, sysid))
375
f = urllib.urlopen(source.getSystemId())
377
source.setByteStream(f)
381
# ===========================================================================
383
# DEPRECATED SAX 1.0 CLASSES
385
# ===========================================================================
390
"""An implementation of AttributeList that takes an (attr,val) hash
391
and uses it to implement the AttributeList interface."""
393
def __init__(self, map):
397
return len(self.map.keys())
399
def getName(self, i):
401
return self.map.keys()[i]
405
def getType(self, i):
408
def getValue(self, i):
410
if type(i)==types.IntType:
411
return self.map[self.getName(i)]
420
def __getitem__(self, key):
421
if type(key)==types.IntType:
422
return self.map.keys()[key]
427
return self.map.items()
430
return self.map.keys()
432
def has_key(self,key):
433
return self.map.has_key(key)
435
def get(self, key, alternative=None):
436
return self.map.get(key, alternative)
439
return AttributeMap(self.map.copy())
442
return self.map.values()
444
# --- Event broadcasting object
446
class EventBroadcaster:
447
"""Takes a list of objects and forwards any method calls received
448
to all objects in the list. The attribute list holds the list and
449
can freely be modified by clients."""
452
"Helper objects that represent event methods."
454
def __init__(self,list,name):
458
def __call__(self,*rest):
459
for obj in self.list:
460
apply(getattr(obj,self.name), rest)
462
def __init__(self,list):
465
def __getattr__(self,name):
466
return self.Event(self.list,name)
469
return "<EventBroadcaster instance at %d>" % id(self)
471
# --- ESIS document handler
473
class ESISDocHandler(saxlib.HandlerBase):
474
"A SAX document handler that produces naive ESIS output."
476
def __init__(self,writer=sys.stdout):
479
def processingInstruction (self,target, remainder):
480
"""Receive an event signalling that a processing instruction
482
self.writer.write("?"+target+" "+remainder+"\n")
484
def startElement(self,name,amap):
485
"Receive an event signalling the start of an element."
486
self.writer.write("("+name+"\n")
487
for a_name in amap.keys():
488
self.writer.write("A"+a_name+" "+amap[a_name]+"\n")
490
def endElement(self,name):
491
"Receive an event signalling the end of an element."
492
self.writer.write(")"+name+"\n")
494
def characters(self,data,start_ix,length):
495
"Receive an event signalling that character data has been found."
496
self.writer.write("-"+data[start_ix:start_ix+length]+"\n")
500
class Canonizer(saxlib.HandlerBase):
501
"A SAX document handler that produces canonized XML output."
503
def __init__(self,writer=sys.stdout):
507
def processingInstruction (self,target, remainder):
508
if not target=="xml":
509
self.writer.write("<?"+target+" "+remainder+"?>")
511
def startElement(self,name,amap):
512
self.writer.write("<"+name)
517
for a_name in a_names:
518
self.writer.write(" "+a_name+"=\"")
519
self.write_data(amap[a_name])
520
self.writer.write("\"")
521
self.writer.write(">")
522
self.elem_level=self.elem_level+1
524
def endElement(self,name):
525
self.writer.write("</"+name+">")
526
self.elem_level=self.elem_level-1
528
def ignorableWhitespace(self,data,start_ix,length):
529
self.characters(data,start_ix,length)
531
def characters(self,data,start_ix,length):
532
if self.elem_level>0:
533
self.write_data(data[start_ix:start_ix+length])
535
def write_data(self,data):
536
"Writes datachars to writer."
537
data=string.replace(data,"&","&")
538
data=string.replace(data,"<","<")
539
data=string.replace(data,"\"",""")
540
data=string.replace(data,">",">")
541
data=string.replace(data,chr(9),"	")
542
data=string.replace(data,chr(10)," ")
543
data=string.replace(data,chr(13)," ")
544
self.writer.write(data)
549
"""A re-implementation of the htmllib, sgmllib and xmllib interfaces as a
550
SAX DocumentHandler."""
555
# - translate_references
568
import saxexts # only used here
569
self.parser=saxexts.XMLParserFactory.make_parser()
570
self.handler=mllib.Handler(self.parser,self)
574
self.parser.feed(data)
580
return self.handler.get_stack()
582
# --- Handler methods (to be overridden)
584
def handle_starttag(self,name,method,atts):
587
def handle_endtag(self,name,method):
590
def handle_data(self,data):
593
def handle_proc(self,target,data):
596
def unknown_starttag(self,name,atts):
599
def unknown_endtag(self,name):
602
def syntax_error(self,message):
605
# --- The internal handler class
607
class Handler(saxlib.DocumentHandler,saxlib.ErrorHandler):
608
"""An internal class to handle SAX events and translate them to mllib
611
def __init__(self,driver,handler):
613
self.driver.setDocumentHandler(self)
614
self.driver.setErrorHandler(self)
624
# --- DocumentHandler methods
626
def characters(self, ch, start, length):
627
self.handler.handle_data(ch[start:start+length])
629
def endElement(self, name):
630
if hasattr(self.handler,"end_"+name):
631
self.handler.handle_endtag(name,
632
getattr(self.handler,"end_"+name))
634
self.handler.unknown_endtag(name)
638
def ignorableWhitespace(self, ch, start, length):
639
self.handler.handle_data(ch[start:start+length])
641
def processingInstruction(self, target, data):
642
self.handler.handle_proc(target,data)
644
def startElement(self, name, atts):
645
self.stack.append(name)
647
if hasattr(self.handler,"start_"+name):
648
self.handler.handle_starttag(name,
649
getattr(self.handler,
653
self.handler.unknown_starttag(name,atts)
655
# --- ErrorHandler methods
657
def error(self, exception):
658
self.handler.syntax_error(str(exception))
660
def fatalError(self, exception):
661
raise RuntimeError(str(exception))