1
"""Implementation of the DOM Level 3 'LS-Load' feature."""
6
from xml.dom.NodeFilter import NodeFilter
9
__all__ = ["DOMBuilder", "DOMEntityResolver", "DOMInputSource"]
13
"""Features object that has variables set for each DOMBuilder feature.
15
The DOMBuilder class uses an instance of this class to pass settings to
16
the ExpatBuilder class.
19
# Note that the DOMBuilder class in LoadSave constrains which of these
20
# values can be set using the DOM Level 3 LoadSave feature.
23
namespace_declarations = True
25
external_parameter_entities = True
26
external_general_entities = True
27
external_dtd_subset = True
28
validate_if_schema = False
30
datatype_normalization = False
31
create_entity_ref_nodes = True
33
whitespace_in_element_content = True
36
charset_overrides_xml_encoding = True
38
supported_mediatypes_only = False
50
ACTION_APPEND_AS_CHILDREN = 2
51
ACTION_INSERT_AFTER = 3
52
ACTION_INSERT_BEFORE = 4
54
_legal_actions = (ACTION_REPLACE, ACTION_APPEND_AS_CHILDREN,
55
ACTION_INSERT_AFTER, ACTION_INSERT_BEFORE)
58
self._options = Options()
60
def _get_entityResolver(self):
61
return self.entityResolver
62
def _set_entityResolver(self, entityResolver):
63
self.entityResolver = entityResolver
65
def _get_errorHandler(self):
66
return self.errorHandler
67
def _set_errorHandler(self, errorHandler):
68
self.errorHandler = errorHandler
70
def _get_filter(self):
72
def _set_filter(self, filter):
75
def setFeature(self, name, state):
76
if self.supportsFeature(name):
77
state = state and 1 or 0
79
settings = self._settings[(_name_xform(name), state)]
81
raise xml.dom.NotSupportedErr(
82
"unsupported feature: %r" % (name,))
84
for name, value in settings:
85
setattr(self._options, name, value)
87
raise xml.dom.NotFoundErr("unknown feature: " + repr(name))
89
def supportsFeature(self, name):
90
return hasattr(self._options, _name_xform(name))
92
def canSetFeature(self, name, state):
93
key = (_name_xform(name), state and 1 or 0)
94
return key in self._settings
96
# This dictionary maps from (feature,value) to a list of
97
# (option,value) pairs that should be set on the Options object.
98
# If a (feature,value) setting is not in this dictionary, it is
99
# not supported by the DOMBuilder.
102
("namespace_declarations", 0): [
103
("namespace_declarations", 0)],
104
("namespace_declarations", 1): [
105
("namespace_declarations", 1)],
108
("external_general_entities", 0): [
109
("external_general_entities", 0)],
110
("external_general_entities", 1): [
111
("external_general_entities", 1)],
112
("external_parameter_entities", 0): [
113
("external_parameter_entities", 0)],
114
("external_parameter_entities", 1): [
115
("external_parameter_entities", 1)],
116
("validate_if_schema", 0): [
117
("validate_if_schema", 0)],
118
("create_entity_ref_nodes", 0): [
119
("create_entity_ref_nodes", 0)],
120
("create_entity_ref_nodes", 1): [
121
("create_entity_ref_nodes", 1)],
123
("create_entity_ref_nodes", 0),
127
("whitespace_in_element_content", 0): [
128
("whitespace_in_element_content", 0)],
129
("whitespace_in_element_content", 1): [
130
("whitespace_in_element_content", 1)],
131
("cdata_sections", 0): [
132
("cdata_sections", 0)],
133
("cdata_sections", 1): [
134
("cdata_sections", 1)],
139
("charset_overrides_xml_encoding", 0): [
140
("charset_overrides_xml_encoding", 0)],
141
("charset_overrides_xml_encoding", 1): [
142
("charset_overrides_xml_encoding", 1)],
145
("namespace_declarations", 0),
146
("validate_if_schema", 0),
147
("create_entity_ref_nodes", 0),
149
("cdata_sections", 0),
150
("datatype_normalization", 1),
151
("whitespace_in_element_content", 1),
153
("charset_overrides_xml_encoding", 1)],
154
("supported_mediatypes_only", 0): [
155
("supported_mediatypes_only", 0)],
162
def getFeature(self, name):
163
xname = _name_xform(name)
165
return getattr(self._options, xname)
166
except AttributeError:
167
if name == "infoset":
168
options = self._options
169
return (options.datatype_normalization
170
and options.whitespace_in_element_content
172
and options.charset_overrides_xml_encoding
173
and not (options.namespace_declarations
174
or options.validate_if_schema
175
or options.create_entity_ref_nodes
177
or options.cdata_sections))
178
raise xml.dom.NotFoundErr("feature %s not known" % repr(name))
180
def parseURI(self, uri):
181
if self.entityResolver:
182
input = self.entityResolver.resolveEntity(None, uri)
184
input = DOMEntityResolver().resolveEntity(None, uri)
185
return self.parse(input)
187
def parse(self, input):
188
options = copy.copy(self._options)
189
options.filter = self.filter
190
options.errorHandler = self.errorHandler
191
fp = input.byteStream
192
if fp is None and options.systemId:
193
import urllib.request
194
fp = urllib.request.urlopen(input.systemId)
195
return self._parse_bytestream(fp, options)
197
def parseWithContext(self, input, cnode, action):
198
if action not in self._legal_actions:
199
raise ValueError("not a legal action")
200
raise NotImplementedError("Haven't written this yet...")
202
def _parse_bytestream(self, stream, options):
203
import xml.dom.expatbuilder
204
builder = xml.dom.expatbuilder.makeBuilder(options)
205
return builder.parseFile(stream)
208
def _name_xform(name):
209
return name.lower().replace('-', '_')
212
class DOMEntityResolver(object):
213
__slots__ = '_opener',
215
def resolveEntity(self, publicId, systemId):
216
assert systemId is not None
217
source = DOMInputSource()
218
source.publicId = publicId
219
source.systemId = systemId
220
source.byteStream = self._get_opener().open(systemId)
222
# determine the encoding if the transport provided it
223
source.encoding = self._guess_media_encoding(source)
225
# determine the base URI is we can
226
import posixpath, urllib.parse
227
parts = urllib.parse.urlparse(systemId)
228
scheme, netloc, path, params, query, fragment = parts
229
# XXX should we check the scheme here as well?
230
if path and not path.endswith("/"):
231
path = posixpath.dirname(path) + "/"
232
parts = scheme, netloc, path, params, query, fragment
233
source.baseURI = urllib.parse.urlunparse(parts)
237
def _get_opener(self):
240
except AttributeError:
241
self._opener = self._create_opener()
244
def _create_opener(self):
245
import urllib.request
246
return urllib.request.build_opener()
248
def _guess_media_encoding(self, source):
249
info = source.byteStream.info()
250
if "Content-Type" in info:
251
for param in info.getplist():
252
if param.startswith("charset="):
253
return param.split("=", 1)[1].lower()
256
class DOMInputSource(object):
257
__slots__ = ('byteStream', 'characterStream', 'stringData',
258
'encoding', 'publicId', 'systemId', 'baseURI')
261
self.byteStream = None
262
self.characterStream = None
263
self.stringData = None
269
def _get_byteStream(self):
270
return self.byteStream
271
def _set_byteStream(self, byteStream):
272
self.byteStream = byteStream
274
def _get_characterStream(self):
275
return self.characterStream
276
def _set_characterStream(self, characterStream):
277
self.characterStream = characterStream
279
def _get_stringData(self):
280
return self.stringData
281
def _set_stringData(self, data):
282
self.stringData = data
284
def _get_encoding(self):
286
def _set_encoding(self, encoding):
287
self.encoding = encoding
289
def _get_publicId(self):
291
def _set_publicId(self, publicId):
292
self.publicId = publicId
294
def _get_systemId(self):
296
def _set_systemId(self, systemId):
297
self.systemId = systemId
299
def _get_baseURI(self):
301
def _set_baseURI(self, uri):
305
class DOMBuilderFilter:
306
"""Element filter which can be used to tailor construction of
310
# There's really no need for this class; concrete implementations
311
# should just implement the endElement() and startElement()
312
# methods as appropriate. Using this makes it easy to only
313
# implement one of them.
320
whatToShow = NodeFilter.SHOW_ALL
322
def _get_whatToShow(self):
323
return self.whatToShow
325
def acceptNode(self, element):
326
return self.FILTER_ACCEPT
328
def startContainer(self, element):
329
return self.FILTER_ACCEPT
335
"""Mixin to create documents that conform to the load/save spec."""
339
def _get_async(self):
341
def _set_async(self, async):
343
raise xml.dom.NotSupportedErr(
344
"asynchronous document loading is not supported")
347
# What does it mean to "clear" a document? Does the
348
# documentElement disappear?
349
raise NotImplementedError(
350
"haven't figured out what this means yet")
353
raise NotImplementedError("haven't written this yet")
355
def loadXML(self, source):
356
raise NotImplementedError("haven't written this yet")
358
def saveXML(self, snode):
361
elif snode.ownerDocument is not self:
362
raise xml.dom.WrongDocumentErr()
366
class DOMImplementationLS:
368
MODE_ASYNCHRONOUS = 2
370
def createDOMBuilder(self, mode, schemaType):
371
if schemaType is not None:
372
raise xml.dom.NotSupportedErr(
373
"schemaType not yet supported")
374
if mode == self.MODE_SYNCHRONOUS:
376
if mode == self.MODE_ASYNCHRONOUS:
377
raise xml.dom.NotSupportedErr(
378
"asynchronous builders are not supported")
379
raise ValueError("unknown value for mode")
381
def createDOMWriter(self):
382
raise NotImplementedError(
383
"the writer interface hasn't been written yet!")
385
def createDOMInputSource(self):
386
return DOMInputSource()