~ubuntu-branches/ubuntu/karmic/pypy/karmic

« back to all changes in this revision

Viewing changes to lib-python/2.4.1/xml/dom/pulldom.py

  • Committer: Bazaar Package Importer
  • Author(s): Alexandre Fayolle
  • Date: 2007-04-13 09:33:09 UTC
  • Revision ID: james.westby@ubuntu.com-20070413093309-yoojh4jcoocu2krz
Tags: upstream-1.0.0
ImportĀ upstreamĀ versionĀ 1.0.0

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
import xml.sax
 
2
import xml.sax.handler
 
3
import types
 
4
 
 
5
try:
 
6
    _StringTypes = [types.StringType, types.UnicodeType]
 
7
except AttributeError:
 
8
    _StringTypes = [types.StringType]
 
9
 
 
10
START_ELEMENT = "START_ELEMENT"
 
11
END_ELEMENT = "END_ELEMENT"
 
12
COMMENT = "COMMENT"
 
13
START_DOCUMENT = "START_DOCUMENT"
 
14
END_DOCUMENT = "END_DOCUMENT"
 
15
PROCESSING_INSTRUCTION = "PROCESSING_INSTRUCTION"
 
16
IGNORABLE_WHITESPACE = "IGNORABLE_WHITESPACE"
 
17
CHARACTERS = "CHARACTERS"
 
18
 
 
19
class PullDOM(xml.sax.ContentHandler):
 
20
    _locator = None
 
21
    document = None
 
22
 
 
23
    def __init__(self, documentFactory=None):
 
24
        from xml.dom import XML_NAMESPACE
 
25
        self.documentFactory = documentFactory
 
26
        self.firstEvent = [None, None]
 
27
        self.lastEvent = self.firstEvent
 
28
        self.elementStack = []
 
29
        self.push = self.elementStack.append
 
30
        try:
 
31
            self.pop = self.elementStack.pop
 
32
        except AttributeError:
 
33
            # use class' pop instead
 
34
            pass
 
35
        self._ns_contexts = [{XML_NAMESPACE:'xml'}] # contains uri -> prefix dicts
 
36
        self._current_context = self._ns_contexts[-1]
 
37
        self.pending_events = []
 
38
 
 
39
    def pop(self):
 
40
        result = self.elementStack[-1]
 
41
        del self.elementStack[-1]
 
42
        return result
 
43
 
 
44
    def setDocumentLocator(self, locator):
 
45
        self._locator = locator
 
46
 
 
47
    def startPrefixMapping(self, prefix, uri):
 
48
        if not hasattr(self, '_xmlns_attrs'):
 
49
            self._xmlns_attrs = []
 
50
        self._xmlns_attrs.append((prefix or 'xmlns', uri))
 
51
        self._ns_contexts.append(self._current_context.copy())
 
52
        self._current_context[uri] = prefix or None
 
53
 
 
54
    def endPrefixMapping(self, prefix):
 
55
        self._current_context = self._ns_contexts.pop()
 
56
 
 
57
    def startElementNS(self, name, tagName , attrs):
 
58
        # Retrieve xml namespace declaration attributes.
 
59
        xmlns_uri = 'http://www.w3.org/2000/xmlns/'
 
60
        xmlns_attrs = getattr(self, '_xmlns_attrs', None)
 
61
        if xmlns_attrs is not None:
 
62
            for aname, value in xmlns_attrs:
 
63
                attrs._attrs[(xmlns_uri, aname)] = value
 
64
            self._xmlns_attrs = []
 
65
        uri, localname = name
 
66
        if uri:
 
67
            # When using namespaces, the reader may or may not
 
68
            # provide us with the original name. If not, create
 
69
            # *a* valid tagName from the current context.
 
70
            if tagName is None:
 
71
                prefix = self._current_context[uri]
 
72
                if prefix:
 
73
                    tagName = prefix + ":" + localname
 
74
                else:
 
75
                    tagName = localname
 
76
            if self.document:
 
77
                node = self.document.createElementNS(uri, tagName)
 
78
            else:
 
79
                node = self.buildDocument(uri, tagName)
 
80
        else:
 
81
            # When the tagname is not prefixed, it just appears as
 
82
            # localname
 
83
            if self.document:
 
84
                node = self.document.createElement(localname)
 
85
            else:
 
86
                node = self.buildDocument(None, localname)
 
87
 
 
88
        for aname,value in attrs.items():
 
89
            a_uri, a_localname = aname
 
90
            if a_uri == xmlns_uri:
 
91
                if a_localname == 'xmlns':
 
92
                    qname = a_localname
 
93
                else:
 
94
                    qname = 'xmlns:' + a_localname
 
95
                attr = self.document.createAttributeNS(a_uri, qname)
 
96
                node.setAttributeNodeNS(attr)
 
97
            elif a_uri:
 
98
                prefix = self._current_context[a_uri]
 
99
                if prefix:
 
100
                    qname = prefix + ":" + a_localname
 
101
                else:
 
102
                    qname = a_localname
 
103
                attr = self.document.createAttributeNS(a_uri, qname)
 
104
                node.setAttributeNodeNS(attr)
 
105
            else:
 
106
                attr = self.document.createAttribute(a_localname)
 
107
                node.setAttributeNode(attr)
 
108
            attr.value = value
 
109
 
 
110
        self.lastEvent[1] = [(START_ELEMENT, node), None]
 
111
        self.lastEvent = self.lastEvent[1]
 
112
        self.push(node)
 
113
 
 
114
    def endElementNS(self, name, tagName):
 
115
        self.lastEvent[1] = [(END_ELEMENT, self.pop()), None]
 
116
        self.lastEvent = self.lastEvent[1]
 
117
 
 
118
    def startElement(self, name, attrs):
 
119
        if self.document:
 
120
            node = self.document.createElement(name)
 
121
        else:
 
122
            node = self.buildDocument(None, name)
 
123
 
 
124
        for aname,value in attrs.items():
 
125
            attr = self.document.createAttribute(aname)
 
126
            attr.value = value
 
127
            node.setAttributeNode(attr)
 
128
 
 
129
        self.lastEvent[1] = [(START_ELEMENT, node), None]
 
130
        self.lastEvent = self.lastEvent[1]
 
131
        self.push(node)
 
132
 
 
133
    def endElement(self, name):
 
134
        self.lastEvent[1] = [(END_ELEMENT, self.pop()), None]
 
135
        self.lastEvent = self.lastEvent[1]
 
136
 
 
137
    def comment(self, s):
 
138
        if self.document:
 
139
            node = self.document.createComment(s)
 
140
            self.lastEvent[1] = [(COMMENT, node), None]
 
141
            self.lastEvent = self.lastEvent[1]
 
142
        else:
 
143
            event = [(COMMENT, s), None]
 
144
            self.pending_events.append(event)
 
145
 
 
146
    def processingInstruction(self, target, data):
 
147
        if self.document:
 
148
            node = self.document.createProcessingInstruction(target, data)
 
149
            self.lastEvent[1] = [(PROCESSING_INSTRUCTION, node), None]
 
150
            self.lastEvent = self.lastEvent[1]
 
151
        else:
 
152
            event = [(PROCESSING_INSTRUCTION, target, data), None]
 
153
            self.pending_events.append(event)
 
154
 
 
155
    def ignorableWhitespace(self, chars):
 
156
        node = self.document.createTextNode(chars)
 
157
        self.lastEvent[1] = [(IGNORABLE_WHITESPACE, node), None]
 
158
        self.lastEvent = self.lastEvent[1]
 
159
 
 
160
    def characters(self, chars):
 
161
        node = self.document.createTextNode(chars)
 
162
        self.lastEvent[1] = [(CHARACTERS, node), None]
 
163
        self.lastEvent = self.lastEvent[1]
 
164
 
 
165
    def startDocument(self):
 
166
        if self.documentFactory is None:
 
167
            import xml.dom.minidom
 
168
            self.documentFactory = xml.dom.minidom.Document.implementation
 
169
 
 
170
    def buildDocument(self, uri, tagname):
 
171
        # Can't do that in startDocument, since we need the tagname
 
172
        # XXX: obtain DocumentType
 
173
        node = self.documentFactory.createDocument(uri, tagname, None)
 
174
        self.document = node
 
175
        self.lastEvent[1] = [(START_DOCUMENT, node), None]
 
176
        self.lastEvent = self.lastEvent[1]
 
177
        self.push(node)
 
178
        # Put everything we have seen so far into the document
 
179
        for e in self.pending_events:
 
180
            if e[0][0] == PROCESSING_INSTRUCTION:
 
181
                _,target,data = e[0]
 
182
                n = self.document.createProcessingInstruction(target, data)
 
183
                e[0] = (PROCESSING_INSTRUCTION, n)
 
184
            elif e[0][0] == COMMENT:
 
185
                n = self.document.createComment(e[0][1])
 
186
                e[0] = (COMMENT, n)
 
187
            else:
 
188
                raise AssertionError("Unknown pending event ",e[0][0])
 
189
            self.lastEvent[1] = e
 
190
            self.lastEvent = e
 
191
        self.pending_events = None
 
192
        return node.firstChild
 
193
 
 
194
    def endDocument(self):
 
195
        self.lastEvent[1] = [(END_DOCUMENT, self.document), None]
 
196
        self.pop()
 
197
 
 
198
    def clear(self):
 
199
        "clear(): Explicitly release parsing structures"
 
200
        self.document = None
 
201
 
 
202
class ErrorHandler:
 
203
    def warning(self, exception):
 
204
        print exception
 
205
    def error(self, exception):
 
206
        raise exception
 
207
    def fatalError(self, exception):
 
208
        raise exception
 
209
 
 
210
class DOMEventStream:
 
211
    def __init__(self, stream, parser, bufsize):
 
212
        self.stream = stream
 
213
        self.parser = parser
 
214
        self.bufsize = bufsize
 
215
        if not hasattr(self.parser, 'feed'):
 
216
            self.getEvent = self._slurp
 
217
        self.reset()
 
218
 
 
219
    def reset(self):
 
220
        self.pulldom = PullDOM()
 
221
        # This content handler relies on namespace support
 
222
        self.parser.setFeature(xml.sax.handler.feature_namespaces, 1)
 
223
        self.parser.setContentHandler(self.pulldom)
 
224
 
 
225
    def __getitem__(self, pos):
 
226
        rc = self.getEvent()
 
227
        if rc:
 
228
            return rc
 
229
        raise IndexError
 
230
 
 
231
    def next(self):
 
232
        rc = self.getEvent()
 
233
        if rc:
 
234
            return rc
 
235
        raise StopIteration
 
236
 
 
237
    def __iter__(self):
 
238
        return self
 
239
 
 
240
    def expandNode(self, node):
 
241
        event = self.getEvent()
 
242
        parents = [node]
 
243
        while event:
 
244
            token, cur_node = event
 
245
            if cur_node is node:
 
246
                return
 
247
            if token != END_ELEMENT:
 
248
                parents[-1].appendChild(cur_node)
 
249
            if token == START_ELEMENT:
 
250
                parents.append(cur_node)
 
251
            elif token == END_ELEMENT:
 
252
                del parents[-1]
 
253
            event = self.getEvent()
 
254
 
 
255
    def getEvent(self):
 
256
        # use IncrementalParser interface, so we get the desired
 
257
        # pull effect
 
258
        if not self.pulldom.firstEvent[1]:
 
259
            self.pulldom.lastEvent = self.pulldom.firstEvent
 
260
        while not self.pulldom.firstEvent[1]:
 
261
            buf = self.stream.read(self.bufsize)
 
262
            if not buf:
 
263
                self.parser.close()
 
264
                return None
 
265
            self.parser.feed(buf)
 
266
        rc = self.pulldom.firstEvent[1][0]
 
267
        self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]
 
268
        return rc
 
269
 
 
270
    def _slurp(self):
 
271
        """ Fallback replacement for getEvent() using the
 
272
            standard SAX2 interface, which means we slurp the
 
273
            SAX events into memory (no performance gain, but
 
274
            we are compatible to all SAX parsers).
 
275
        """
 
276
        self.parser.parse(self.stream)
 
277
        self.getEvent = self._emit
 
278
        return self._emit()
 
279
 
 
280
    def _emit(self):
 
281
        """ Fallback replacement for getEvent() that emits
 
282
            the events that _slurp() read previously.
 
283
        """
 
284
        rc = self.pulldom.firstEvent[1][0]
 
285
        self.pulldom.firstEvent[1] = self.pulldom.firstEvent[1][1]
 
286
        return rc
 
287
 
 
288
    def clear(self):
 
289
        """clear(): Explicitly release parsing objects"""
 
290
        self.pulldom.clear()
 
291
        del self.pulldom
 
292
        self.parser = None
 
293
        self.stream = None
 
294
 
 
295
class SAX2DOM(PullDOM):
 
296
 
 
297
    def startElementNS(self, name, tagName , attrs):
 
298
        PullDOM.startElementNS(self, name, tagName, attrs)
 
299
        curNode = self.elementStack[-1]
 
300
        parentNode = self.elementStack[-2]
 
301
        parentNode.appendChild(curNode)
 
302
 
 
303
    def startElement(self, name, attrs):
 
304
        PullDOM.startElement(self, name, attrs)
 
305
        curNode = self.elementStack[-1]
 
306
        parentNode = self.elementStack[-2]
 
307
        parentNode.appendChild(curNode)
 
308
 
 
309
    def processingInstruction(self, target, data):
 
310
        PullDOM.processingInstruction(self, target, data)
 
311
        node = self.lastEvent[0][1]
 
312
        parentNode = self.elementStack[-1]
 
313
        parentNode.appendChild(node)
 
314
 
 
315
    def ignorableWhitespace(self, chars):
 
316
        PullDOM.ignorableWhitespace(self, chars)
 
317
        node = self.lastEvent[0][1]
 
318
        parentNode = self.elementStack[-1]
 
319
        parentNode.appendChild(node)
 
320
 
 
321
    def characters(self, chars):
 
322
        PullDOM.characters(self, chars)
 
323
        node = self.lastEvent[0][1]
 
324
        parentNode = self.elementStack[-1]
 
325
        parentNode.appendChild(node)
 
326
 
 
327
 
 
328
default_bufsize = (2 ** 14) - 20
 
329
 
 
330
def parse(stream_or_string, parser=None, bufsize=None):
 
331
    if bufsize is None:
 
332
        bufsize = default_bufsize
 
333
    if type(stream_or_string) in _StringTypes:
 
334
        stream = open(stream_or_string)
 
335
    else:
 
336
        stream = stream_or_string
 
337
    if not parser:
 
338
        parser = xml.sax.make_parser()
 
339
    return DOMEventStream(stream, parser, bufsize)
 
340
 
 
341
def parseString(string, parser=None):
 
342
    try:
 
343
        from cStringIO import StringIO
 
344
    except ImportError:
 
345
        from StringIO import StringIO
 
346
 
 
347
    bufsize = len(string)
 
348
    buf = StringIO(string)
 
349
    if not parser:
 
350
        parser = xml.sax.make_parser()
 
351
    return DOMEventStream(buf, parser, bufsize)