1
from __future__ import absolute_import, division, unicode_literals
2
from pip.vendor.six import text_type
4
from ..constants import scopingElements, tableInsertModeElements, namespaces
6
# The scope markers are inserted when entering object elements,
7
# marquees, table cells, and table captions, and are used to prevent formatting
8
# from "leaking" into tables, object elements, and marquees.
12
None: (frozenset(scopingElements), False),
13
"button": (frozenset(scopingElements | set([(namespaces["html"], "button")])), False),
14
"list": (frozenset(scopingElements | set([(namespaces["html"], "ol"),
15
(namespaces["html"], "ul")])), False),
16
"table": (frozenset([(namespaces["html"], "html"),
17
(namespaces["html"], "table")]), False),
18
"select": (frozenset([(namespaces["html"], "optgroup"),
19
(namespaces["html"], "option")]), True)
24
def __init__(self, name):
25
"""Node representing an item in the tree.
26
name - The tag name associated with the node
27
parent - The parent of the current node (or None for the document node)
28
value - The value of the current node (applies to text nodes and
30
attributes - a dict holding name, value pairs for attributes of the node
31
childNodes - a list of child nodes of the current node. This must
32
include all elements but not necessarily other node types
33
_flags - A list of miscellaneous flags that can be set on the node
43
attributesStr = " ".join(["%s=\"%s\"" % (name, value)
45
self.attributes.items()])
47
return "<%s %s>" % (self.name, attributesStr)
49
return "<%s>" % (self.name)
52
return "<%s>" % (self.name)
54
def appendChild(self, node):
55
"""Insert node as a child of the current node
57
raise NotImplementedError
59
def insertText(self, data, insertBefore=None):
60
"""Insert data as text in the current node, positioned before the
61
start of node insertBefore or to the end of the node's text.
63
raise NotImplementedError
65
def insertBefore(self, node, refNode):
66
"""Insert node as a child of the current node, before refNode in the
67
list of child nodes. Raises ValueError if refNode is not a child of
69
raise NotImplementedError
71
def removeChild(self, node):
72
"""Remove node from the children of the current node
74
raise NotImplementedError
76
def reparentChildren(self, newParent):
77
"""Move all the children of the current node to newParent.
78
This is needed so that trees that don't store text as nodes move the
79
text in the correct way
81
# XXX - should this method be made more general?
82
for child in self.childNodes:
83
newParent.appendChild(child)
87
"""Return a shallow copy of the current node i.e. a node with the same
88
name and attributes but with no parent or child nodes
90
raise NotImplementedError
93
"""Return true if the node has children or text, false otherwise
95
raise NotImplementedError
98
class ActiveFormattingElements(list):
99
def append(self, node):
102
for element in self[::-1]:
103
if element == Marker:
105
if self.nodesEqual(element, node):
110
list.append(self, node)
112
def nodesEqual(self, node1, node2):
113
if not node1.nameTuple == node2.nameTuple:
116
if not node1.attributes == node2.attributes:
122
class TreeBuilder(object):
123
"""Base treebuilder implementation
124
documentClass - the class to use for the bottommost node of a document
125
elementClass - the class to use for HTML Elements
126
commentClass - the class to use for comments
127
doctypeClass - the class to use for doctypes
133
# The class to use for creating a node
136
# The class to use for creating comments
139
# The class to use for creating doctypes
145
def __init__(self, namespaceHTMLElements):
146
if namespaceHTMLElements:
147
self.defaultNamespace = "http://www.w3.org/1999/xhtml"
149
self.defaultNamespace = None
153
self.openElements = []
154
self.activeFormattingElements = ActiveFormattingElements()
156
# XXX - rename these to headElement, formElement
157
self.headPointer = None
158
self.formPointer = None
160
self.insertFromTable = False
162
self.document = self.documentClass()
164
def elementInScope(self, target, variant=None):
166
# If we pass a node in we match that. if we pass a string
167
# match any node with that name
168
exactNode = hasattr(target, "nameTuple")
170
listElements, invert = listElementsMap[variant]
172
for node in reversed(self.openElements):
173
if (node.name == target and not exactNode or
174
node == target and exactNode):
176
elif (invert ^ (node.nameTuple in listElements)):
179
assert False # We should never reach this point
181
def reconstructActiveFormattingElements(self):
182
# Within this algorithm the order of steps described in the
183
# specification is not quite the same as the order of steps in the
184
# code. It should still do the same though.
186
# Step 1: stop the algorithm when there's nothing to do.
187
if not self.activeFormattingElements:
190
# Step 2 and step 3: we start with the last element. So i is -1.
191
i = len(self.activeFormattingElements) - 1
192
entry = self.activeFormattingElements[i]
193
if entry == Marker or entry in self.openElements:
197
while entry != Marker and entry not in self.openElements:
199
# This will be reset to 0 below
203
# Step 5: let entry be one earlier in the list.
204
entry = self.activeFormattingElements[i]
211
entry = self.activeFormattingElements[i]
212
clone = entry.cloneNode() # Mainly to get a new copy of the attributes
215
element = self.insertElement({"type": "StartTag",
217
"namespace": clone.namespace,
218
"data": clone.attributes})
221
self.activeFormattingElements[i] = element
224
if element == self.activeFormattingElements[-1]:
227
def clearActiveFormattingElements(self):
228
entry = self.activeFormattingElements.pop()
229
while self.activeFormattingElements and entry != Marker:
230
entry = self.activeFormattingElements.pop()
232
def elementInActiveFormattingElements(self, name):
233
"""Check if an element exists between the end of the active
234
formatting elements and the last marker. If it does, return it, else
237
for item in self.activeFormattingElements[::-1]:
238
# Check for Marker first because if it's a Marker it doesn't have a
242
elif item.name == name:
246
def insertRoot(self, token):
247
element = self.createElement(token)
248
self.openElements.append(element)
249
self.document.appendChild(element)
251
def insertDoctype(self, token):
253
publicId = token["publicId"]
254
systemId = token["systemId"]
256
doctype = self.doctypeClass(name, publicId, systemId)
257
self.document.appendChild(doctype)
259
def insertComment(self, token, parent=None):
261
parent = self.openElements[-1]
262
parent.appendChild(self.commentClass(token["data"]))
264
def createElement(self, token):
265
"""Create an element but don't insert it anywhere"""
267
namespace = token.get("namespace", self.defaultNamespace)
268
element = self.elementClass(name, namespace)
269
element.attributes = token["data"]
272
def _getInsertFromTable(self):
273
return self._insertFromTable
275
def _setInsertFromTable(self, value):
276
"""Switch the function used to insert an element from the
277
normal one to the misnested table one and back again"""
278
self._insertFromTable = value
280
self.insertElement = self.insertElementTable
282
self.insertElement = self.insertElementNormal
284
insertFromTable = property(_getInsertFromTable, _setInsertFromTable)
286
def insertElementNormal(self, token):
288
assert isinstance(name, text_type), "Element %s not unicode" % name
289
namespace = token.get("namespace", self.defaultNamespace)
290
element = self.elementClass(name, namespace)
291
element.attributes = token["data"]
292
self.openElements[-1].appendChild(element)
293
self.openElements.append(element)
296
def insertElementTable(self, token):
297
"""Create an element and insert it into the tree"""
298
element = self.createElement(token)
299
if self.openElements[-1].name not in tableInsertModeElements:
300
return self.insertElementNormal(token)
302
# We should be in the InTable mode. This means we want to do
303
# special magic element rearranging
304
parent, insertBefore = self.getTableMisnestedNodePosition()
305
if insertBefore is None:
306
parent.appendChild(element)
308
parent.insertBefore(element, insertBefore)
309
self.openElements.append(element)
312
def insertText(self, data, parent=None):
313
"""Insert text data."""
315
parent = self.openElements[-1]
317
if (not self.insertFromTable or (self.insertFromTable and
318
self.openElements[-1].name
319
not in tableInsertModeElements)):
320
parent.insertText(data)
322
# We should be in the InTable mode. This means we want to do
323
# special magic element rearranging
324
parent, insertBefore = self.getTableMisnestedNodePosition()
325
parent.insertText(data, insertBefore)
327
def getTableMisnestedNodePosition(self):
328
"""Get the foster parent element, and sibling to insert before
329
(or None) when inserting a misnested table node"""
330
# The foster parent element is the one which comes before the most
331
# recently opened table element
332
# XXX - this is really inelegant
336
for elm in self.openElements[::-1]:
337
if elm.name == "table":
341
# XXX - we should really check that this parent is actually a
344
fosterParent = lastTable.parent
345
insertBefore = lastTable
347
fosterParent = self.openElements[
348
self.openElements.index(lastTable) - 1]
350
fosterParent = self.openElements[0]
351
return fosterParent, insertBefore
353
def generateImpliedEndTags(self, exclude=None):
354
name = self.openElements[-1].name
355
# XXX td, th and tr are not actually needed
356
if (name in frozenset(("dd", "dt", "li", "option", "optgroup", "p", "rp", "rt"))
357
and name != exclude):
358
self.openElements.pop()
359
# XXX This is not entirely what the specification says. We should
360
# investigate it more closely.
361
self.generateImpliedEndTags(exclude)
363
def getDocument(self):
364
"Return the final tree"
367
def getFragment(self):
368
"Return the final fragment"
369
# assert self.innerHTML
370
fragment = self.fragmentClass()
371
self.openElements[0].reparentChildren(fragment)
374
def testSerializer(self, node):
375
"""Serialize the subtree of node in the format required by unit tests
376
node - the node from which to start serializing"""
377
raise NotImplementedError