2
SAX driver for the pyexpat C module. This driver works with
3
pyexpat.__version__ == '2.22'.
8
from xml.sax._exceptions import *
9
from xml.sax.handler import feature_validation, feature_namespaces
10
from xml.sax.handler import feature_namespace_prefixes
11
from xml.sax.handler import feature_external_ges, feature_external_pes
12
from xml.sax.handler import feature_string_interning
13
from xml.sax.handler import property_xml_string, property_interning_dict
15
# xml.parsers.expat does not raise ImportError in Jython
17
if sys.platform[:4] == "java":
18
raise SAXReaderNotAvailable("expat not available in Java", None)
22
from xml.parsers import expat
24
raise SAXReaderNotAvailable("expat not supported", None)
26
if not hasattr(expat, "ParserCreate"):
27
raise SAXReaderNotAvailable("expat not supported", None)
28
from xml.sax import xmlreader, saxutils, handler
30
AttributesImpl = xmlreader.AttributesImpl
31
AttributesNSImpl = xmlreader.AttributesNSImpl
33
# If we're using a sufficiently recent version of Python, we can use
34
# weak references to avoid cycles between the parser and content
35
# handler, otherwise we'll just have to pretend.
43
_mkproxy = weakref.proxy
48
class ExpatLocator(xmlreader.Locator):
49
"""Locator for use with the ExpatParser class.
51
This uses a weak reference to the parser object to avoid creating
52
a circular reference between the parser and the content handler.
54
def __init__(self, parser):
55
self._ref = _mkproxy(parser)
57
def getColumnNumber(self):
59
if parser._parser is None:
61
return parser._parser.ErrorColumnNumber
63
def getLineNumber(self):
65
if parser._parser is None:
67
return parser._parser.ErrorLineNumber
69
def getPublicId(self):
73
return parser._source.getPublicId()
75
def getSystemId(self):
79
return parser._source.getSystemId()
84
class ExpatParser(xmlreader.IncrementalParser, xmlreader.Locator):
85
"""SAX driver for the pyexpat C module."""
87
def __init__(self, namespaceHandling=0, bufsize=2**16-20):
88
xmlreader.IncrementalParser.__init__(self, bufsize)
89
self._source = xmlreader.InputSource()
91
self._namespaces = namespaceHandling
92
self._lex_handler_prop = None
94
self._entity_stack = []
95
self._external_ges = 1
96
self._interning = None
100
def parse(self, source):
101
"Parse an XML document from a URL or an InputSource."
102
source = saxutils.prepare_input_source(source)
104
self._source = source
106
self._cont_handler.setDocumentLocator(ExpatLocator(self))
107
xmlreader.IncrementalParser.parse(self, source)
109
def prepareParser(self, source):
110
if source.getSystemId() is not None:
111
self._parser.SetBase(source.getSystemId())
113
# Redefined setContentHandler to allow changing handlers during parsing
115
def setContentHandler(self, handler):
116
xmlreader.IncrementalParser.setContentHandler(self, handler)
118
self._reset_cont_handler()
120
def getFeature(self, name):
121
if name == feature_namespaces:
122
return self._namespaces
123
elif name == feature_string_interning:
124
return self._interning is not None
125
elif name in (feature_validation, feature_external_pes,
126
feature_namespace_prefixes):
128
elif name == feature_external_ges:
129
return self._external_ges
130
raise SAXNotRecognizedException("Feature '%s' not recognized" % name)
132
def setFeature(self, name, state):
134
raise SAXNotSupportedException("Cannot set features while parsing")
136
if name == feature_namespaces:
137
self._namespaces = state
138
elif name == feature_external_ges:
139
self._external_ges = state
140
elif name == feature_string_interning:
142
if self._interning is None:
145
self._interning = None
146
elif name == feature_validation:
148
raise SAXNotSupportedException(
149
"expat does not support validation")
150
elif name == feature_external_pes:
152
raise SAXNotSupportedException(
153
"expat does not read external parameter entities")
154
elif name == feature_namespace_prefixes:
156
raise SAXNotSupportedException(
157
"expat does not report namespace prefixes")
159
raise SAXNotRecognizedException(
160
"Feature '%s' not recognized" % name)
162
def getProperty(self, name):
163
if name == handler.property_lexical_handler:
164
return self._lex_handler_prop
165
elif name == property_interning_dict:
166
return self._interning
167
elif name == property_xml_string:
169
if hasattr(self._parser, "GetInputContext"):
170
return self._parser.GetInputContext()
172
raise SAXNotRecognizedException(
173
"This version of expat does not support getting"
176
raise SAXNotSupportedException(
177
"XML string cannot be returned when not parsing")
178
raise SAXNotRecognizedException("Property '%s' not recognized" % name)
180
def setProperty(self, name, value):
181
if name == handler.property_lexical_handler:
182
self._lex_handler_prop = value
184
self._reset_lex_handler_prop()
185
elif name == property_interning_dict:
186
self._interning = value
187
elif name == property_xml_string:
188
raise SAXNotSupportedException("Property '%s' cannot be set" %
191
raise SAXNotRecognizedException("Property '%s' not recognized" %
194
# IncrementalParser methods
196
def feed(self, data, isFinal = 0):
197
if not self._parsing:
200
self._cont_handler.startDocument()
203
# The isFinal parameter is internal to the expat reader.
204
# If it is set to true, expat will check validity of the entire
205
# document. When feeding chunks, they are not normally final -
206
# except when invoked from close.
207
self._parser.Parse(data, isFinal)
208
except expat.error, e:
209
exc = SAXParseException(expat.ErrorString(e.code), e, self)
210
# FIXME: when to invoke error()?
211
self._err_handler.fatalError(exc)
214
if self._entity_stack:
215
# If we are completing an external entity, do nothing here
217
self.feed("", isFinal = 1)
218
self._cont_handler.endDocument()
220
# break cycle created by expat handlers pointing to our methods
223
def _reset_cont_handler(self):
224
self._parser.ProcessingInstructionHandler = \
225
self._cont_handler.processingInstruction
226
self._parser.CharacterDataHandler = self._cont_handler.characters
228
def _reset_lex_handler_prop(self):
229
lex = self._lex_handler_prop
230
parser = self._parser
232
parser.CommentHandler = None
233
parser.StartCdataSectionHandler = None
234
parser.EndCdataSectionHandler = None
235
parser.StartDoctypeDeclHandler = None
236
parser.EndDoctypeDeclHandler = None
238
parser.CommentHandler = lex.comment
239
parser.StartCdataSectionHandler = lex.startCDATA
240
parser.EndCdataSectionHandler = lex.endCDATA
241
parser.StartDoctypeDeclHandler = self.start_doctype_decl
242
parser.EndDoctypeDeclHandler = lex.endDTD
246
self._parser = expat.ParserCreate(self._source.getEncoding(), " ",
247
intern=self._interning)
248
self._parser.namespace_prefixes = 1
249
self._parser.StartElementHandler = self.start_element_ns
250
self._parser.EndElementHandler = self.end_element_ns
252
self._parser = expat.ParserCreate(self._source.getEncoding(),
253
intern = self._interning)
254
self._parser.StartElementHandler = self.start_element
255
self._parser.EndElementHandler = self.end_element
257
self._reset_cont_handler()
258
self._parser.UnparsedEntityDeclHandler = self.unparsed_entity_decl
259
self._parser.NotationDeclHandler = self.notation_decl
260
self._parser.StartNamespaceDeclHandler = self.start_namespace_decl
261
self._parser.EndNamespaceDeclHandler = self.end_namespace_decl
263
self._decl_handler_prop = None
264
if self._lex_handler_prop:
265
self._reset_lex_handler_prop()
266
# self._parser.DefaultHandler =
267
# self._parser.DefaultHandlerExpand =
268
# self._parser.NotStandaloneHandler =
269
self._parser.ExternalEntityRefHandler = self.external_entity_ref
271
self._parser.SkippedEntityHandler = self.skipped_entity_handler
272
except AttributeError:
273
# This pyexpat does not support SkippedEntity
275
self._parser.SetParamEntityParsing(
276
expat.XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE)
279
self._entity_stack = []
283
def getColumnNumber(self):
284
if self._parser is None:
286
return self._parser.ErrorColumnNumber
288
def getLineNumber(self):
289
if self._parser is None:
291
return self._parser.ErrorLineNumber
293
def getPublicId(self):
294
return self._source.getPublicId()
296
def getSystemId(self):
297
return self._source.getSystemId()
300
def start_element(self, name, attrs):
301
self._cont_handler.startElement(name, AttributesImpl(attrs))
303
def end_element(self, name):
304
self._cont_handler.endElement(name)
306
def start_element_ns(self, name, attrs):
312
pair = pair[0], pair[1]
319
for (aname, value) in attrs.items():
320
parts = aname.split()
325
apair = (None, aname)
327
qname = "%s:%s" % (parts[2], parts[1])
328
apair = parts[0], parts[1]
334
newattrs[apair] = value
335
qnames[apair] = qname
337
self._cont_handler.startElementNS(pair, None,
338
AttributesNSImpl(newattrs, qnames))
340
def end_element_ns(self, name):
345
pair = pair[0], pair[1]
349
self._cont_handler.endElementNS(pair, None)
351
# this is not used (call directly to ContentHandler)
352
def processing_instruction(self, target, data):
353
self._cont_handler.processingInstruction(target, data)
355
# this is not used (call directly to ContentHandler)
356
def character_data(self, data):
357
self._cont_handler.characters(data)
359
def start_namespace_decl(self, prefix, uri):
360
self._cont_handler.startPrefixMapping(prefix, uri)
362
def end_namespace_decl(self, prefix):
363
self._cont_handler.endPrefixMapping(prefix)
365
def start_doctype_decl(self, name, sysid, pubid, has_internal_subset):
366
self._lex_handler_prop.startDTD(name, pubid, sysid)
368
def unparsed_entity_decl(self, name, base, sysid, pubid, notation_name):
369
self._dtd_handler.unparsedEntityDecl(name, pubid, sysid, notation_name)
371
def notation_decl(self, name, base, sysid, pubid):
372
self._dtd_handler.notationDecl(name, pubid, sysid)
374
def external_entity_ref(self, context, base, sysid, pubid):
375
if not self._external_ges:
378
source = self._ent_handler.resolveEntity(pubid, sysid)
379
source = saxutils.prepare_input_source(source,
380
self._source.getSystemId() or
383
self._entity_stack.append((self._parser, self._source))
384
self._parser = self._parser.ExternalEntityParserCreate(context)
385
self._source = source
388
xmlreader.IncrementalParser.parse(self, source)
390
return 0 # FIXME: save error info here?
392
(self._parser, self._source) = self._entity_stack[-1]
393
del self._entity_stack[-1]
396
def skipped_entity_handler(self, name, is_pe):
398
# The SAX spec requires to report skipped PEs with a '%'
400
self._cont_handler.skippedEntity(name)
404
def create_parser(*args, **kwargs):
405
return ExpatParser(*args, **kwargs)
409
if __name__ == "__main__":
412
p.setContentHandler(xml.sax.XMLGenerator())
413
p.setErrorHandler(xml.sax.ErrorHandler())
414
p.parse("../../../hamlet.xml")