2
This module contains the core classes of version 2.0 of SAX for Python.
3
This file provides only default classes with absolutely minimum
4
functionality, from which drivers and applications can be subclassed.
6
Many of these classes are empty and are included only as documentation
9
$Id: handler.py 53686 2007-02-09 05:37:30Z guido.van.rossum $
14
#============================================================================
18
#============================================================================
20
# ===== ERRORHANDLER =====
23
"""Basic interface for SAX error handlers.
25
If you create an object that implements this interface, then
26
register the object with your XMLReader, the parser will call the
27
methods in your object to report all warnings and errors. There
28
are three levels of errors available: warnings, (possibly)
29
recoverable errors, and unrecoverable errors. All methods take a
30
SAXParseException as the only parameter."""
32
def error(self, exception):
33
"Handle a recoverable error."
36
def fatalError(self, exception):
37
"Handle a non-recoverable error."
40
def warning(self, exception):
45
# ===== CONTENTHANDLER =====
48
"""Interface for receiving logical document content events.
50
This is the main callback interface in SAX, and the one most
51
important to applications. The order of events in this interface
52
mirrors the order of the information in the document."""
57
def setDocumentLocator(self, locator):
58
"""Called by the parser to give the application a locator for
59
locating the origin of document events.
61
SAX parsers are strongly encouraged (though not absolutely
62
required) to supply a locator: if it does so, it must supply
63
the locator to the application by invoking this method before
64
invoking any of the other methods in the DocumentHandler
67
The locator allows the application to determine the end
68
position of any document-related event, even if the parser is
69
not reporting an error. Typically, the application will use
70
this information for reporting its own errors (such as
71
character content that does not match an application's
72
business rules). The information returned by the locator is
73
probably not sufficient for use with a search engine.
75
Note that the locator will return correct information only
76
during the invocation of the events in this interface. The
77
application should not attempt to use it at any other time."""
78
self._locator = locator
80
def startDocument(self):
81
"""Receive notification of the beginning of a document.
83
The SAX parser will invoke this method only once, before any
84
other methods in this interface or in DTDHandler (except for
85
setDocumentLocator)."""
87
def endDocument(self):
88
"""Receive notification of the end of a document.
90
The SAX parser will invoke this method only once, and it will
91
be the last method invoked during the parse. The parser shall
92
not invoke this method until it has either abandoned parsing
93
(because of an unrecoverable error) or reached the end of
96
def startPrefixMapping(self, prefix, uri):
97
"""Begin the scope of a prefix-URI Namespace mapping.
99
The information from this event is not necessary for normal
100
Namespace processing: the SAX XML reader will automatically
101
replace prefixes for element and attribute names when the
102
http://xml.org/sax/features/namespaces feature is true (the
105
There are cases, however, when applications need to use
106
prefixes in character data or in attribute values, where they
107
cannot safely be expanded automatically; the
108
start/endPrefixMapping event supplies the information to the
109
application to expand prefixes in those contexts itself, if
112
Note that start/endPrefixMapping events are not guaranteed to
113
be properly nested relative to each-other: all
114
startPrefixMapping events will occur before the corresponding
115
startElement event, and all endPrefixMapping events will occur
116
after the corresponding endElement event, but their order is
119
def endPrefixMapping(self, prefix):
120
"""End the scope of a prefix-URI mapping.
122
See startPrefixMapping for details. This event will always
123
occur after the corresponding endElement event, but the order
124
of endPrefixMapping events is not otherwise guaranteed."""
126
def startElement(self, name, attrs):
127
"""Signals the start of an element in non-namespace mode.
129
The name parameter contains the raw XML 1.0 name of the
130
element type as a string and the attrs parameter holds an
131
instance of the Attributes class containing the attributes of
134
def endElement(self, name):
135
"""Signals the end of an element in non-namespace mode.
137
The name parameter contains the name of the element type, just
138
as with the startElement event."""
140
def startElementNS(self, name, qname, attrs):
141
"""Signals the start of an element in namespace mode.
143
The name parameter contains the name of the element type as a
144
(uri, localname) tuple, the qname parameter the raw XML 1.0
145
name used in the source document, and the attrs parameter
146
holds an instance of the Attributes class containing the
147
attributes of the element.
149
The uri part of the name tuple is None for elements which have
152
def endElementNS(self, name, qname):
153
"""Signals the end of an element in namespace mode.
155
The name parameter contains the name of the element type, just
156
as with the startElementNS event."""
158
def characters(self, content):
159
"""Receive notification of character data.
161
The Parser will call this method to report each chunk of
162
character data. SAX parsers may return all contiguous
163
character data in a single chunk, or they may split it into
164
several chunks; however, all of the characters in any single
165
event must come from the same external entity so that the
166
Locator provides useful information."""
168
def ignorableWhitespace(self, whitespace):
169
"""Receive notification of ignorable whitespace in element content.
171
Validating Parsers must use this method to report each chunk
172
of ignorable whitespace (see the W3C XML 1.0 recommendation,
173
section 2.10): non-validating parsers may also use this method
174
if they are capable of parsing and using content models.
176
SAX parsers may return all contiguous whitespace in a single
177
chunk, or they may split it into several chunks; however, all
178
of the characters in any single event must come from the same
179
external entity, so that the Locator provides useful
182
def processingInstruction(self, target, data):
183
"""Receive notification of a processing instruction.
185
The Parser will invoke this method once for each processing
186
instruction found: note that processing instructions may occur
187
before or after the main document element.
189
A SAX parser should never report an XML declaration (XML 1.0,
190
section 2.8) or a text declaration (XML 1.0, section 4.3.1)
191
using this method."""
193
def skippedEntity(self, name):
194
"""Receive notification of a skipped entity.
196
The Parser will invoke this method once for each entity
197
skipped. Non-validating processors may skip entities if they
198
have not seen the declarations (because, for example, the
199
entity was declared in an external DTD subset). All processors
200
may skip external entities, depending on the values of the
201
http://xml.org/sax/features/external-general-entities and the
202
http://xml.org/sax/features/external-parameter-entities
206
# ===== DTDHandler =====
209
"""Handle DTD events.
211
This interface specifies only those DTD events required for basic
212
parsing (unparsed entities and attributes)."""
214
def notationDecl(self, name, publicId, systemId):
215
"Handle a notation declaration event."
217
def unparsedEntityDecl(self, name, publicId, systemId, ndata):
218
"Handle an unparsed entity declaration event."
221
# ===== ENTITYRESOLVER =====
223
class EntityResolver:
224
"""Basic interface for resolving entities. If you create an object
225
implementing this interface, then register the object with your
226
Parser, the parser will call the method in your object to
227
resolve all external entities. Note that DefaultHandler implements
228
this interface with the default behaviour."""
230
def resolveEntity(self, publicId, systemId):
231
"""Resolve the system identifier of an entity and return either
232
the system identifier to read from as a string, or an InputSource
237
#============================================================================
241
#============================================================================
243
feature_namespaces = "http://xml.org/sax/features/namespaces"
244
# true: Perform Namespace processing (default).
245
# false: Optionally do not perform Namespace processing
246
# (implies namespace-prefixes).
247
# access: (parsing) read-only; (not parsing) read/write
249
feature_namespace_prefixes = "http://xml.org/sax/features/namespace-prefixes"
250
# true: Report the original prefixed names and attributes used for Namespace
252
# false: Do not report attributes used for Namespace declarations, and
253
# optionally do not report original prefixed names (default).
254
# access: (parsing) read-only; (not parsing) read/write
256
feature_string_interning = "http://xml.org/sax/features/string-interning"
257
# true: All element names, prefixes, attribute names, Namespace URIs, and
258
# local names are interned using the built-in intern function.
259
# false: Names are not necessarily interned, although they may be (default).
260
# access: (parsing) read-only; (not parsing) read/write
262
feature_validation = "http://xml.org/sax/features/validation"
263
# true: Report all validation errors (implies external-general-entities and
264
# external-parameter-entities).
265
# false: Do not report validation errors.
266
# access: (parsing) read-only; (not parsing) read/write
268
feature_external_ges = "http://xml.org/sax/features/external-general-entities"
269
# true: Include all external general (text) entities.
270
# false: Do not include external general entities.
271
# access: (parsing) read-only; (not parsing) read/write
273
feature_external_pes = "http://xml.org/sax/features/external-parameter-entities"
274
# true: Include all external parameter entities, including the external
276
# false: Do not include any external parameter entities, even the external
278
# access: (parsing) read-only; (not parsing) read/write
280
all_features = [feature_namespaces,
281
feature_namespace_prefixes,
282
feature_string_interning,
284
feature_external_ges,
285
feature_external_pes]
288
#============================================================================
292
#============================================================================
294
property_lexical_handler = "http://xml.org/sax/properties/lexical-handler"
295
# data type: xml.sax.sax2lib.LexicalHandler
296
# description: An optional extension handler for lexical events like comments.
299
property_declaration_handler = "http://xml.org/sax/properties/declaration-handler"
300
# data type: xml.sax.sax2lib.DeclHandler
301
# description: An optional extension handler for DTD-related events other
302
# than notations and unparsed entities.
305
property_dom_node = "http://xml.org/sax/properties/dom-node"
306
# data type: org.w3c.dom.Node
307
# description: When parsing, the current DOM node being visited if this is
308
# a DOM iterator; when not parsing, the root DOM node for
310
# access: (parsing) read-only; (not parsing) read/write
312
property_xml_string = "http://xml.org/sax/properties/xml-string"
314
# description: The literal string of characters that was the source for
318
property_encoding = "http://www.python.org/sax/properties/encoding"
320
# description: The name of the encoding to assume for input data.
321
# access: write: set the encoding, e.g. established by a higher-level
322
# protocol. May change during parsing (e.g. after
323
# processing a META tag)
324
# read: return the current encoding (possibly established through
326
# initial value: UTF-8
329
property_interning_dict = "http://www.python.org/sax/properties/interning-dict"
330
# data type: Dictionary
331
# description: The dictionary used to intern common strings in the document
332
# access: write: Request that the parser uses a specific dictionary, to
333
# allow interning across different documents
334
# read: return the current interning dictionary, or None
337
all_properties = [property_lexical_handler,
339
property_declaration_handler,
342
property_interning_dict]