~ubuntu-branches/debian/jessie/python-pip/jessie

« back to all changes in this revision

Viewing changes to pip/vendor/html5lib/html5parser.py

  • Committer: Package Import Robot
  • Author(s): Barry Warsaw
  • Date: 2014-03-31 14:44:40 UTC
  • mfrom: (1.2.6)
  • Revision ID: package-import@ubuntu.com-20140331144440-961i5y31zsxb7ev1
Tags: 1.5.4-1
* Team upload.
* New upstream release.
* d/patches:
  - system-ca-certificates.patch: Removed.  This is obsoleted by the
    vendorizing (and on Debian, de-vendorizing) of the requests library.
  - no-python-specific-scripts.patch: Removed.  Upstream renamed pip-X.Y
    to pipX.Y but adopts our pipX name as well.  I don't think it hurts
    to also have pipX.Y.
  - de-vendorize.patch: Added, in order to use Debian packages instead
    of vendorized packages.
* d/control:
  - Bump Standards-Version to 3.9.5 with no other changes needed.
  - Update Depends for the vendorized packages.
* d/python{,3}-pip.pyremove: Remove pip/_vendor directory from binary
  packages.

Show diffs side-by-side

added added

removed removed

Lines of Context:
1
 
from __future__ import absolute_import, division, unicode_literals
2
 
from pip.vendor.six import with_metaclass
3
 
 
4
 
import types
5
 
 
6
 
from . import inputstream
7
 
from . import tokenizer
8
 
 
9
 
from . import treebuilders
10
 
from .treebuilders._base import Marker
11
 
 
12
 
from . import utils
13
 
from . import constants
14
 
from .constants import spaceCharacters, asciiUpper2Lower
15
 
from .constants import specialElements
16
 
from .constants import headingElements
17
 
from .constants import cdataElements, rcdataElements
18
 
from .constants import tokenTypes, ReparseException, namespaces
19
 
from .constants import htmlIntegrationPointElements, mathmlTextIntegrationPointElements
20
 
 
21
 
 
22
 
def parse(doc, treebuilder="etree", encoding=None,
23
 
          namespaceHTMLElements=True):
24
 
    """Parse a string or file-like object into a tree"""
25
 
    tb = treebuilders.getTreeBuilder(treebuilder)
26
 
    p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
27
 
    return p.parse(doc, encoding=encoding)
28
 
 
29
 
 
30
 
def parseFragment(doc, container="div", treebuilder="etree", encoding=None,
31
 
                  namespaceHTMLElements=True):
32
 
    tb = treebuilders.getTreeBuilder(treebuilder)
33
 
    p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements)
34
 
    return p.parseFragment(doc, container=container, encoding=encoding)
35
 
 
36
 
 
37
 
def method_decorator_metaclass(function):
38
 
    class Decorated(type):
39
 
        def __new__(meta, classname, bases, classDict):
40
 
            for attributeName, attribute in classDict.items():
41
 
                if isinstance(attribute, types.FunctionType):
42
 
                    attribute = function(attribute)
43
 
 
44
 
                classDict[attributeName] = attribute
45
 
            return type.__new__(meta, classname, bases, classDict)
46
 
    return Decorated
47
 
 
48
 
 
49
 
class HTMLParser(object):
50
 
    """HTML parser. Generates a tree structure from a stream of (possibly
51
 
        malformed) HTML"""
52
 
 
53
 
    def __init__(self, tree=None, tokenizer=tokenizer.HTMLTokenizer,
54
 
                 strict=False, namespaceHTMLElements=True, debug=False):
55
 
        """
56
 
        strict - raise an exception when a parse error is encountered
57
 
 
58
 
        tree - a treebuilder class controlling the type of tree that will be
59
 
        returned. Built in treebuilders can be accessed through
60
 
        html5lib.treebuilders.getTreeBuilder(treeType)
61
 
 
62
 
        tokenizer - a class that provides a stream of tokens to the treebuilder.
63
 
        This may be replaced for e.g. a sanitizer which converts some tags to
64
 
        text
65
 
        """
66
 
 
67
 
        # Raise an exception on the first error encountered
68
 
        self.strict = strict
69
 
 
70
 
        if tree is None:
71
 
            tree = treebuilders.getTreeBuilder("etree")
72
 
        self.tree = tree(namespaceHTMLElements)
73
 
        self.tokenizer_class = tokenizer
74
 
        self.errors = []
75
 
 
76
 
        self.phases = dict([(name, cls(self, self.tree)) for name, cls in
77
 
                            getPhases(debug).items()])
78
 
 
79
 
    def _parse(self, stream, innerHTML=False, container="div",
80
 
               encoding=None, parseMeta=True, useChardet=True, **kwargs):
81
 
 
82
 
        self.innerHTMLMode = innerHTML
83
 
        self.container = container
84
 
        self.tokenizer = self.tokenizer_class(stream, encoding=encoding,
85
 
                                              parseMeta=parseMeta,
86
 
                                              useChardet=useChardet,
87
 
                                              parser=self, **kwargs)
88
 
        self.reset()
89
 
 
90
 
        while True:
91
 
            try:
92
 
                self.mainLoop()
93
 
                break
94
 
            except ReparseException:
95
 
                self.reset()
96
 
 
97
 
    def reset(self):
98
 
        self.tree.reset()
99
 
        self.firstStartTag = False
100
 
        self.errors = []
101
 
        self.log = []  # only used with debug mode
102
 
        # "quirks" / "limited quirks" / "no quirks"
103
 
        self.compatMode = "no quirks"
104
 
 
105
 
        if self.innerHTMLMode:
106
 
            self.innerHTML = self.container.lower()
107
 
 
108
 
            if self.innerHTML in cdataElements:
109
 
                self.tokenizer.state = self.tokenizer.rcdataState
110
 
            elif self.innerHTML in rcdataElements:
111
 
                self.tokenizer.state = self.tokenizer.rawtextState
112
 
            elif self.innerHTML == 'plaintext':
113
 
                self.tokenizer.state = self.tokenizer.plaintextState
114
 
            else:
115
 
                # state already is data state
116
 
                # self.tokenizer.state = self.tokenizer.dataState
117
 
                pass
118
 
            self.phase = self.phases["beforeHtml"]
119
 
            self.phase.insertHtmlElement()
120
 
            self.resetInsertionMode()
121
 
        else:
122
 
            self.innerHTML = False
123
 
            self.phase = self.phases["initial"]
124
 
 
125
 
        self.lastPhase = None
126
 
 
127
 
        self.beforeRCDataPhase = None
128
 
 
129
 
        self.framesetOK = True
130
 
 
131
 
    def isHTMLIntegrationPoint(self, element):
132
 
        if (element.name == "annotation-xml" and
133
 
                element.namespace == namespaces["mathml"]):
134
 
            return ("encoding" in element.attributes and
135
 
                    element.attributes["encoding"].translate(
136
 
                        asciiUpper2Lower) in
137
 
                    ("text/html", "application/xhtml+xml"))
138
 
        else:
139
 
            return (element.namespace, element.name) in htmlIntegrationPointElements
140
 
 
141
 
    def isMathMLTextIntegrationPoint(self, element):
142
 
        return (element.namespace, element.name) in mathmlTextIntegrationPointElements
143
 
 
144
 
    def mainLoop(self):
145
 
        CharactersToken = tokenTypes["Characters"]
146
 
        SpaceCharactersToken = tokenTypes["SpaceCharacters"]
147
 
        StartTagToken = tokenTypes["StartTag"]
148
 
        EndTagToken = tokenTypes["EndTag"]
149
 
        CommentToken = tokenTypes["Comment"]
150
 
        DoctypeToken = tokenTypes["Doctype"]
151
 
        ParseErrorToken = tokenTypes["ParseError"]
152
 
 
153
 
        for token in self.normalizedTokens():
154
 
            new_token = token
155
 
            while new_token is not None:
156
 
                currentNode = self.tree.openElements[-1] if self.tree.openElements else None
157
 
                currentNodeNamespace = currentNode.namespace if currentNode else None
158
 
                currentNodeName = currentNode.name if currentNode else None
159
 
 
160
 
                type = new_token["type"]
161
 
 
162
 
                if type == ParseErrorToken:
163
 
                    self.parseError(new_token["data"], new_token.get("datavars", {}))
164
 
                    new_token = None
165
 
                else:
166
 
                    if (len(self.tree.openElements) == 0 or
167
 
                        currentNodeNamespace == self.tree.defaultNamespace or
168
 
                        (self.isMathMLTextIntegrationPoint(currentNode) and
169
 
                         ((type == StartTagToken and
170
 
                           token["name"] not in frozenset(["mglyph", "malignmark"])) or
171
 
                         type in (CharactersToken, SpaceCharactersToken))) or
172
 
                        (currentNodeNamespace == namespaces["mathml"] and
173
 
                         currentNodeName == "annotation-xml" and
174
 
                         token["name"] == "svg") or
175
 
                        (self.isHTMLIntegrationPoint(currentNode) and
176
 
                         type in (StartTagToken, CharactersToken, SpaceCharactersToken))):
177
 
                        phase = self.phase
178
 
                    else:
179
 
                        phase = self.phases["inForeignContent"]
180
 
 
181
 
                    if type == CharactersToken:
182
 
                        new_token = phase.processCharacters(new_token)
183
 
                    elif type == SpaceCharactersToken:
184
 
                        new_token = phase.processSpaceCharacters(new_token)
185
 
                    elif type == StartTagToken:
186
 
                        new_token = phase.processStartTag(new_token)
187
 
                    elif type == EndTagToken:
188
 
                        new_token = phase.processEndTag(new_token)
189
 
                    elif type == CommentToken:
190
 
                        new_token = phase.processComment(new_token)
191
 
                    elif type == DoctypeToken:
192
 
                        new_token = phase.processDoctype(new_token)
193
 
 
194
 
            if (type == StartTagToken and token["selfClosing"]
195
 
                    and not token["selfClosingAcknowledged"]):
196
 
                self.parseError("non-void-element-with-trailing-solidus",
197
 
                                {"name": token["name"]})
198
 
 
199
 
        # When the loop finishes it's EOF
200
 
        reprocess = True
201
 
        phases = []
202
 
        while reprocess:
203
 
            phases.append(self.phase)
204
 
            reprocess = self.phase.processEOF()
205
 
            if reprocess:
206
 
                assert self.phase not in phases
207
 
 
208
 
    def normalizedTokens(self):
209
 
        for token in self.tokenizer:
210
 
            yield self.normalizeToken(token)
211
 
 
212
 
    def parse(self, stream, encoding=None, parseMeta=True, useChardet=True):
213
 
        """Parse a HTML document into a well-formed tree
214
 
 
215
 
        stream - a filelike object or string containing the HTML to be parsed
216
 
 
217
 
        The optional encoding parameter must be a string that indicates
218
 
        the encoding.  If specified, that encoding will be used,
219
 
        regardless of any BOM or later declaration (such as in a meta
220
 
        element)
221
 
        """
222
 
        self._parse(stream, innerHTML=False, encoding=encoding,
223
 
                    parseMeta=parseMeta, useChardet=useChardet)
224
 
        return self.tree.getDocument()
225
 
 
226
 
    def parseFragment(self, stream, container="div", encoding=None,
227
 
                      parseMeta=False, useChardet=True):
228
 
        """Parse a HTML fragment into a well-formed tree fragment
229
 
 
230
 
        container - name of the element we're setting the innerHTML property
231
 
        if set to None, default to 'div'
232
 
 
233
 
        stream - a filelike object or string containing the HTML to be parsed
234
 
 
235
 
        The optional encoding parameter must be a string that indicates
236
 
        the encoding.  If specified, that encoding will be used,
237
 
        regardless of any BOM or later declaration (such as in a meta
238
 
        element)
239
 
        """
240
 
        self._parse(stream, True, container=container, encoding=encoding)
241
 
        return self.tree.getFragment()
242
 
 
243
 
    def parseError(self, errorcode="XXX-undefined-error", datavars={}):
244
 
        # XXX The idea is to make errorcode mandatory.
245
 
        self.errors.append((self.tokenizer.stream.position(), errorcode, datavars))
246
 
        if self.strict:
247
 
            raise ParseError
248
 
 
249
 
    def normalizeToken(self, token):
250
 
        """ HTML5 specific normalizations to the token stream """
251
 
 
252
 
        if token["type"] == tokenTypes["StartTag"]:
253
 
            token["data"] = dict(token["data"][::-1])
254
 
 
255
 
        return token
256
 
 
257
 
    def adjustMathMLAttributes(self, token):
258
 
        replacements = {"definitionurl": "definitionURL"}
259
 
        for k, v in replacements.items():
260
 
            if k in token["data"]:
261
 
                token["data"][v] = token["data"][k]
262
 
                del token["data"][k]
263
 
 
264
 
    def adjustSVGAttributes(self, token):
265
 
        replacements = {
266
 
            "attributename": "attributeName",
267
 
            "attributetype": "attributeType",
268
 
            "basefrequency": "baseFrequency",
269
 
            "baseprofile": "baseProfile",
270
 
            "calcmode": "calcMode",
271
 
            "clippathunits": "clipPathUnits",
272
 
            "contentscripttype": "contentScriptType",
273
 
            "contentstyletype": "contentStyleType",
274
 
            "diffuseconstant": "diffuseConstant",
275
 
            "edgemode": "edgeMode",
276
 
            "externalresourcesrequired": "externalResourcesRequired",
277
 
            "filterres": "filterRes",
278
 
            "filterunits": "filterUnits",
279
 
            "glyphref": "glyphRef",
280
 
            "gradienttransform": "gradientTransform",
281
 
            "gradientunits": "gradientUnits",
282
 
            "kernelmatrix": "kernelMatrix",
283
 
            "kernelunitlength": "kernelUnitLength",
284
 
            "keypoints": "keyPoints",
285
 
            "keysplines": "keySplines",
286
 
            "keytimes": "keyTimes",
287
 
            "lengthadjust": "lengthAdjust",
288
 
            "limitingconeangle": "limitingConeAngle",
289
 
            "markerheight": "markerHeight",
290
 
            "markerunits": "markerUnits",
291
 
            "markerwidth": "markerWidth",
292
 
            "maskcontentunits": "maskContentUnits",
293
 
            "maskunits": "maskUnits",
294
 
            "numoctaves": "numOctaves",
295
 
            "pathlength": "pathLength",
296
 
            "patterncontentunits": "patternContentUnits",
297
 
            "patterntransform": "patternTransform",
298
 
            "patternunits": "patternUnits",
299
 
            "pointsatx": "pointsAtX",
300
 
            "pointsaty": "pointsAtY",
301
 
            "pointsatz": "pointsAtZ",
302
 
            "preservealpha": "preserveAlpha",
303
 
            "preserveaspectratio": "preserveAspectRatio",
304
 
            "primitiveunits": "primitiveUnits",
305
 
            "refx": "refX",
306
 
            "refy": "refY",
307
 
            "repeatcount": "repeatCount",
308
 
            "repeatdur": "repeatDur",
309
 
            "requiredextensions": "requiredExtensions",
310
 
            "requiredfeatures": "requiredFeatures",
311
 
            "specularconstant": "specularConstant",
312
 
            "specularexponent": "specularExponent",
313
 
            "spreadmethod": "spreadMethod",
314
 
            "startoffset": "startOffset",
315
 
            "stddeviation": "stdDeviation",
316
 
            "stitchtiles": "stitchTiles",
317
 
            "surfacescale": "surfaceScale",
318
 
            "systemlanguage": "systemLanguage",
319
 
            "tablevalues": "tableValues",
320
 
            "targetx": "targetX",
321
 
            "targety": "targetY",
322
 
            "textlength": "textLength",
323
 
            "viewbox": "viewBox",
324
 
            "viewtarget": "viewTarget",
325
 
            "xchannelselector": "xChannelSelector",
326
 
            "ychannelselector": "yChannelSelector",
327
 
            "zoomandpan": "zoomAndPan"
328
 
        }
329
 
        for originalName in list(token["data"].keys()):
330
 
            if originalName in replacements:
331
 
                svgName = replacements[originalName]
332
 
                token["data"][svgName] = token["data"][originalName]
333
 
                del token["data"][originalName]
334
 
 
335
 
    def adjustForeignAttributes(self, token):
336
 
        replacements = {
337
 
            "xlink:actuate": ("xlink", "actuate", namespaces["xlink"]),
338
 
            "xlink:arcrole": ("xlink", "arcrole", namespaces["xlink"]),
339
 
            "xlink:href": ("xlink", "href", namespaces["xlink"]),
340
 
            "xlink:role": ("xlink", "role", namespaces["xlink"]),
341
 
            "xlink:show": ("xlink", "show", namespaces["xlink"]),
342
 
            "xlink:title": ("xlink", "title", namespaces["xlink"]),
343
 
            "xlink:type": ("xlink", "type", namespaces["xlink"]),
344
 
            "xml:base": ("xml", "base", namespaces["xml"]),
345
 
            "xml:lang": ("xml", "lang", namespaces["xml"]),
346
 
            "xml:space": ("xml", "space", namespaces["xml"]),
347
 
            "xmlns": (None, "xmlns", namespaces["xmlns"]),
348
 
            "xmlns:xlink": ("xmlns", "xlink", namespaces["xmlns"])
349
 
        }
350
 
 
351
 
        for originalName in token["data"].keys():
352
 
            if originalName in replacements:
353
 
                foreignName = replacements[originalName]
354
 
                token["data"][foreignName] = token["data"][originalName]
355
 
                del token["data"][originalName]
356
 
 
357
 
    def reparseTokenNormal(self, token):
358
 
        self.parser.phase()
359
 
 
360
 
    def resetInsertionMode(self):
361
 
        # The name of this method is mostly historical. (It's also used in the
362
 
        # specification.)
363
 
        last = False
364
 
        newModes = {
365
 
            "select": "inSelect",
366
 
            "td": "inCell",
367
 
            "th": "inCell",
368
 
            "tr": "inRow",
369
 
            "tbody": "inTableBody",
370
 
            "thead": "inTableBody",
371
 
            "tfoot": "inTableBody",
372
 
            "caption": "inCaption",
373
 
            "colgroup": "inColumnGroup",
374
 
            "table": "inTable",
375
 
            "head": "inBody",
376
 
            "body": "inBody",
377
 
            "frameset": "inFrameset",
378
 
            "html": "beforeHead"
379
 
        }
380
 
        for node in self.tree.openElements[::-1]:
381
 
            nodeName = node.name
382
 
            new_phase = None
383
 
            if node == self.tree.openElements[0]:
384
 
                assert self.innerHTML
385
 
                last = True
386
 
                nodeName = self.innerHTML
387
 
            # Check for conditions that should only happen in the innerHTML
388
 
            # case
389
 
            if nodeName in ("select", "colgroup", "head", "html"):
390
 
                assert self.innerHTML
391
 
 
392
 
            if not last and node.namespace != self.tree.defaultNamespace:
393
 
                continue
394
 
 
395
 
            if nodeName in newModes:
396
 
                new_phase = self.phases[newModes[nodeName]]
397
 
                break
398
 
            elif last:
399
 
                new_phase = self.phases["inBody"]
400
 
                break
401
 
 
402
 
        self.phase = new_phase
403
 
 
404
 
    def parseRCDataRawtext(self, token, contentType):
405
 
        """Generic RCDATA/RAWTEXT Parsing algorithm
406
 
        contentType - RCDATA or RAWTEXT
407
 
        """
408
 
        assert contentType in ("RAWTEXT", "RCDATA")
409
 
 
410
 
        self.tree.insertElement(token)
411
 
 
412
 
        if contentType == "RAWTEXT":
413
 
            self.tokenizer.state = self.tokenizer.rawtextState
414
 
        else:
415
 
            self.tokenizer.state = self.tokenizer.rcdataState
416
 
 
417
 
        self.originalPhase = self.phase
418
 
 
419
 
        self.phase = self.phases["text"]
420
 
 
421
 
 
422
 
def getPhases(debug):
423
 
    def log(function):
424
 
        """Logger that records which phase processes each token"""
425
 
        type_names = dict((value, key) for key, value in
426
 
                          constants.tokenTypes.items())
427
 
 
428
 
        def wrapped(self, *args, **kwargs):
429
 
            if function.__name__.startswith("process") and len(args) > 0:
430
 
                token = args[0]
431
 
                try:
432
 
                    info = {"type": type_names[token['type']]}
433
 
                except:
434
 
                    raise
435
 
                if token['type'] in constants.tagTokenTypes:
436
 
                    info["name"] = token['name']
437
 
 
438
 
                self.parser.log.append((self.parser.tokenizer.state.__name__,
439
 
                                        self.parser.phase.__class__.__name__,
440
 
                                        self.__class__.__name__,
441
 
                                        function.__name__,
442
 
                                        info))
443
 
                return function(self, *args, **kwargs)
444
 
            else:
445
 
                return function(self, *args, **kwargs)
446
 
        return wrapped
447
 
 
448
 
    def getMetaclass(use_metaclass, metaclass_func):
449
 
        if use_metaclass:
450
 
            return method_decorator_metaclass(metaclass_func)
451
 
        else:
452
 
            return type
453
 
 
454
 
    class Phase(with_metaclass(getMetaclass(debug, log))):
455
 
        """Base class for helper object that implements each phase of processing
456
 
        """
457
 
 
458
 
        def __init__(self, parser, tree):
459
 
            self.parser = parser
460
 
            self.tree = tree
461
 
 
462
 
        def processEOF(self):
463
 
            raise NotImplementedError
464
 
 
465
 
        def processComment(self, token):
466
 
            # For most phases the following is correct. Where it's not it will be
467
 
            # overridden.
468
 
            self.tree.insertComment(token, self.tree.openElements[-1])
469
 
 
470
 
        def processDoctype(self, token):
471
 
            self.parser.parseError("unexpected-doctype")
472
 
 
473
 
        def processCharacters(self, token):
474
 
            self.tree.insertText(token["data"])
475
 
 
476
 
        def processSpaceCharacters(self, token):
477
 
            self.tree.insertText(token["data"])
478
 
 
479
 
        def processStartTag(self, token):
480
 
            return self.startTagHandler[token["name"]](token)
481
 
 
482
 
        def startTagHtml(self, token):
483
 
            if not self.parser.firstStartTag and token["name"] == "html":
484
 
                self.parser.parseError("non-html-root")
485
 
            # XXX Need a check here to see if the first start tag token emitted is
486
 
            # this token... If it's not, invoke self.parser.parseError().
487
 
            for attr, value in token["data"].items():
488
 
                if attr not in self.tree.openElements[0].attributes:
489
 
                    self.tree.openElements[0].attributes[attr] = value
490
 
            self.parser.firstStartTag = False
491
 
 
492
 
        def processEndTag(self, token):
493
 
            return self.endTagHandler[token["name"]](token)
494
 
 
495
 
    class InitialPhase(Phase):
496
 
        def processSpaceCharacters(self, token):
497
 
            pass
498
 
 
499
 
        def processComment(self, token):
500
 
            self.tree.insertComment(token, self.tree.document)
501
 
 
502
 
        def processDoctype(self, token):
503
 
            name = token["name"]
504
 
            publicId = token["publicId"]
505
 
            systemId = token["systemId"]
506
 
            correct = token["correct"]
507
 
 
508
 
            if (name != "html" or publicId is not None or
509
 
                    systemId is not None and systemId != "about:legacy-compat"):
510
 
                self.parser.parseError("unknown-doctype")
511
 
 
512
 
            if publicId is None:
513
 
                publicId = ""
514
 
 
515
 
            self.tree.insertDoctype(token)
516
 
 
517
 
            if publicId != "":
518
 
                publicId = publicId.translate(asciiUpper2Lower)
519
 
 
520
 
            if (not correct or token["name"] != "html"
521
 
                or publicId.startswith(
522
 
                ("+//silmaril//dtd html pro v0r11 19970101//",
523
 
                 "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
524
 
                 "-//as//dtd html 3.0 aswedit + extensions//",
525
 
                 "-//ietf//dtd html 2.0 level 1//",
526
 
                 "-//ietf//dtd html 2.0 level 2//",
527
 
                 "-//ietf//dtd html 2.0 strict level 1//",
528
 
                 "-//ietf//dtd html 2.0 strict level 2//",
529
 
                 "-//ietf//dtd html 2.0 strict//",
530
 
                 "-//ietf//dtd html 2.0//",
531
 
                 "-//ietf//dtd html 2.1e//",
532
 
                 "-//ietf//dtd html 3.0//",
533
 
                 "-//ietf//dtd html 3.2 final//",
534
 
                 "-//ietf//dtd html 3.2//",
535
 
                 "-//ietf//dtd html 3//",
536
 
                 "-//ietf//dtd html level 0//",
537
 
                 "-//ietf//dtd html level 1//",
538
 
                 "-//ietf//dtd html level 2//",
539
 
                 "-//ietf//dtd html level 3//",
540
 
                 "-//ietf//dtd html strict level 0//",
541
 
                 "-//ietf//dtd html strict level 1//",
542
 
                 "-//ietf//dtd html strict level 2//",
543
 
                 "-//ietf//dtd html strict level 3//",
544
 
                 "-//ietf//dtd html strict//",
545
 
                 "-//ietf//dtd html//",
546
 
                 "-//metrius//dtd metrius presentational//",
547
 
                 "-//microsoft//dtd internet explorer 2.0 html strict//",
548
 
                 "-//microsoft//dtd internet explorer 2.0 html//",
549
 
                 "-//microsoft//dtd internet explorer 2.0 tables//",
550
 
                 "-//microsoft//dtd internet explorer 3.0 html strict//",
551
 
                 "-//microsoft//dtd internet explorer 3.0 html//",
552
 
                 "-//microsoft//dtd internet explorer 3.0 tables//",
553
 
                 "-//netscape comm. corp.//dtd html//",
554
 
                 "-//netscape comm. corp.//dtd strict html//",
555
 
                 "-//o'reilly and associates//dtd html 2.0//",
556
 
                 "-//o'reilly and associates//dtd html extended 1.0//",
557
 
                 "-//o'reilly and associates//dtd html extended relaxed 1.0//",
558
 
                 "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
559
 
                 "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
560
 
                 "-//spyglass//dtd html 2.0 extended//",
561
 
                 "-//sq//dtd html 2.0 hotmetal + extensions//",
562
 
                 "-//sun microsystems corp.//dtd hotjava html//",
563
 
                 "-//sun microsystems corp.//dtd hotjava strict html//",
564
 
                 "-//w3c//dtd html 3 1995-03-24//",
565
 
                 "-//w3c//dtd html 3.2 draft//",
566
 
                 "-//w3c//dtd html 3.2 final//",
567
 
                 "-//w3c//dtd html 3.2//",
568
 
                 "-//w3c//dtd html 3.2s draft//",
569
 
                 "-//w3c//dtd html 4.0 frameset//",
570
 
                 "-//w3c//dtd html 4.0 transitional//",
571
 
                 "-//w3c//dtd html experimental 19960712//",
572
 
                 "-//w3c//dtd html experimental 970421//",
573
 
                 "-//w3c//dtd w3 html//",
574
 
                 "-//w3o//dtd w3 html 3.0//",
575
 
                 "-//webtechs//dtd mozilla html 2.0//",
576
 
                 "-//webtechs//dtd mozilla html//"))
577
 
                or publicId in
578
 
                    ("-//w3o//dtd w3 html strict 3.0//en//",
579
 
                     "-/w3c/dtd html 4.0 transitional/en",
580
 
                     "html")
581
 
                or publicId.startswith(
582
 
                    ("-//w3c//dtd html 4.01 frameset//",
583
 
                     "-//w3c//dtd html 4.01 transitional//")) and
584
 
                    systemId is None
585
 
                    or systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"):
586
 
                self.parser.compatMode = "quirks"
587
 
            elif (publicId.startswith(
588
 
                    ("-//w3c//dtd xhtml 1.0 frameset//",
589
 
                     "-//w3c//dtd xhtml 1.0 transitional//"))
590
 
                  or publicId.startswith(
591
 
                      ("-//w3c//dtd html 4.01 frameset//",
592
 
                       "-//w3c//dtd html 4.01 transitional//")) and
593
 
                  systemId is not None):
594
 
                self.parser.compatMode = "limited quirks"
595
 
 
596
 
            self.parser.phase = self.parser.phases["beforeHtml"]
597
 
 
598
 
        def anythingElse(self):
599
 
            self.parser.compatMode = "quirks"
600
 
            self.parser.phase = self.parser.phases["beforeHtml"]
601
 
 
602
 
        def processCharacters(self, token):
603
 
            self.parser.parseError("expected-doctype-but-got-chars")
604
 
            self.anythingElse()
605
 
            return token
606
 
 
607
 
        def processStartTag(self, token):
608
 
            self.parser.parseError("expected-doctype-but-got-start-tag",
609
 
                                   {"name": token["name"]})
610
 
            self.anythingElse()
611
 
            return token
612
 
 
613
 
        def processEndTag(self, token):
614
 
            self.parser.parseError("expected-doctype-but-got-end-tag",
615
 
                                   {"name": token["name"]})
616
 
            self.anythingElse()
617
 
            return token
618
 
 
619
 
        def processEOF(self):
620
 
            self.parser.parseError("expected-doctype-but-got-eof")
621
 
            self.anythingElse()
622
 
            return True
623
 
 
624
 
    class BeforeHtmlPhase(Phase):
625
 
        # helper methods
626
 
        def insertHtmlElement(self):
627
 
            self.tree.insertRoot(impliedTagToken("html", "StartTag"))
628
 
            self.parser.phase = self.parser.phases["beforeHead"]
629
 
 
630
 
        # other
631
 
        def processEOF(self):
632
 
            self.insertHtmlElement()
633
 
            return True
634
 
 
635
 
        def processComment(self, token):
636
 
            self.tree.insertComment(token, self.tree.document)
637
 
 
638
 
        def processSpaceCharacters(self, token):
639
 
            pass
640
 
 
641
 
        def processCharacters(self, token):
642
 
            self.insertHtmlElement()
643
 
            return token
644
 
 
645
 
        def processStartTag(self, token):
646
 
            if token["name"] == "html":
647
 
                self.parser.firstStartTag = True
648
 
            self.insertHtmlElement()
649
 
            return token
650
 
 
651
 
        def processEndTag(self, token):
652
 
            if token["name"] not in ("head", "body", "html", "br"):
653
 
                self.parser.parseError("unexpected-end-tag-before-html",
654
 
                                       {"name": token["name"]})
655
 
            else:
656
 
                self.insertHtmlElement()
657
 
                return token
658
 
 
659
 
    class BeforeHeadPhase(Phase):
660
 
        def __init__(self, parser, tree):
661
 
            Phase.__init__(self, parser, tree)
662
 
 
663
 
            self.startTagHandler = utils.MethodDispatcher([
664
 
                ("html", self.startTagHtml),
665
 
                ("head", self.startTagHead)
666
 
            ])
667
 
            self.startTagHandler.default = self.startTagOther
668
 
 
669
 
            self.endTagHandler = utils.MethodDispatcher([
670
 
                (("head", "body", "html", "br"), self.endTagImplyHead)
671
 
            ])
672
 
            self.endTagHandler.default = self.endTagOther
673
 
 
674
 
        def processEOF(self):
675
 
            self.startTagHead(impliedTagToken("head", "StartTag"))
676
 
            return True
677
 
 
678
 
        def processSpaceCharacters(self, token):
679
 
            pass
680
 
 
681
 
        def processCharacters(self, token):
682
 
            self.startTagHead(impliedTagToken("head", "StartTag"))
683
 
            return token
684
 
 
685
 
        def startTagHtml(self, token):
686
 
            return self.parser.phases["inBody"].processStartTag(token)
687
 
 
688
 
        def startTagHead(self, token):
689
 
            self.tree.insertElement(token)
690
 
            self.tree.headPointer = self.tree.openElements[-1]
691
 
            self.parser.phase = self.parser.phases["inHead"]
692
 
 
693
 
        def startTagOther(self, token):
694
 
            self.startTagHead(impliedTagToken("head", "StartTag"))
695
 
            return token
696
 
 
697
 
        def endTagImplyHead(self, token):
698
 
            self.startTagHead(impliedTagToken("head", "StartTag"))
699
 
            return token
700
 
 
701
 
        def endTagOther(self, token):
702
 
            self.parser.parseError("end-tag-after-implied-root",
703
 
                                   {"name": token["name"]})
704
 
 
705
 
    class InHeadPhase(Phase):
706
 
        def __init__(self, parser, tree):
707
 
            Phase.__init__(self, parser, tree)
708
 
 
709
 
            self.startTagHandler = utils.MethodDispatcher([
710
 
                ("html", self.startTagHtml),
711
 
                ("title", self.startTagTitle),
712
 
                (("noscript", "noframes", "style"), self.startTagNoScriptNoFramesStyle),
713
 
                ("script", self.startTagScript),
714
 
                (("base", "basefont", "bgsound", "command", "link"),
715
 
                 self.startTagBaseLinkCommand),
716
 
                ("meta", self.startTagMeta),
717
 
                ("head", self.startTagHead)
718
 
            ])
719
 
            self.startTagHandler.default = self.startTagOther
720
 
 
721
 
            self. endTagHandler = utils.MethodDispatcher([
722
 
                ("head", self.endTagHead),
723
 
                (("br", "html", "body"), self.endTagHtmlBodyBr)
724
 
            ])
725
 
            self.endTagHandler.default = self.endTagOther
726
 
 
727
 
        # the real thing
728
 
        def processEOF(self):
729
 
            self.anythingElse()
730
 
            return True
731
 
 
732
 
        def processCharacters(self, token):
733
 
            self.anythingElse()
734
 
            return token
735
 
 
736
 
        def startTagHtml(self, token):
737
 
            return self.parser.phases["inBody"].processStartTag(token)
738
 
 
739
 
        def startTagHead(self, token):
740
 
            self.parser.parseError("two-heads-are-not-better-than-one")
741
 
 
742
 
        def startTagBaseLinkCommand(self, token):
743
 
            self.tree.insertElement(token)
744
 
            self.tree.openElements.pop()
745
 
            token["selfClosingAcknowledged"] = True
746
 
 
747
 
        def startTagMeta(self, token):
748
 
            self.tree.insertElement(token)
749
 
            self.tree.openElements.pop()
750
 
            token["selfClosingAcknowledged"] = True
751
 
 
752
 
            attributes = token["data"]
753
 
            if self.parser.tokenizer.stream.charEncoding[1] == "tentative":
754
 
                if "charset" in attributes:
755
 
                    self.parser.tokenizer.stream.changeEncoding(attributes["charset"])
756
 
                elif ("content" in attributes and
757
 
                      "http-equiv" in attributes and
758
 
                      attributes["http-equiv"].lower() == "content-type"):
759
 
                    # Encoding it as UTF-8 here is a hack, as really we should pass
760
 
                    # the abstract Unicode string, and just use the
761
 
                    # ContentAttrParser on that, but using UTF-8 allows all chars
762
 
                    # to be encoded and as a ASCII-superset works.
763
 
                    data = inputstream.EncodingBytes(attributes["content"].encode("utf-8"))
764
 
                    parser = inputstream.ContentAttrParser(data)
765
 
                    codec = parser.parse()
766
 
                    self.parser.tokenizer.stream.changeEncoding(codec)
767
 
 
768
 
        def startTagTitle(self, token):
769
 
            self.parser.parseRCDataRawtext(token, "RCDATA")
770
 
 
771
 
        def startTagNoScriptNoFramesStyle(self, token):
772
 
            # Need to decide whether to implement the scripting-disabled case
773
 
            self.parser.parseRCDataRawtext(token, "RAWTEXT")
774
 
 
775
 
        def startTagScript(self, token):
776
 
            self.tree.insertElement(token)
777
 
            self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState
778
 
            self.parser.originalPhase = self.parser.phase
779
 
            self.parser.phase = self.parser.phases["text"]
780
 
 
781
 
        def startTagOther(self, token):
782
 
            self.anythingElse()
783
 
            return token
784
 
 
785
 
        def endTagHead(self, token):
786
 
            node = self.parser.tree.openElements.pop()
787
 
            assert node.name == "head", "Expected head got %s" % node.name
788
 
            self.parser.phase = self.parser.phases["afterHead"]
789
 
 
790
 
        def endTagHtmlBodyBr(self, token):
791
 
            self.anythingElse()
792
 
            return token
793
 
 
794
 
        def endTagOther(self, token):
795
 
            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
796
 
 
797
 
        def anythingElse(self):
798
 
            self.endTagHead(impliedTagToken("head"))
799
 
 
800
 
    # XXX If we implement a parser for which scripting is disabled we need to
801
 
    # implement this phase.
802
 
    #
803
 
    # class InHeadNoScriptPhase(Phase):
804
 
    class AfterHeadPhase(Phase):
805
 
        def __init__(self, parser, tree):
806
 
            Phase.__init__(self, parser, tree)
807
 
 
808
 
            self.startTagHandler = utils.MethodDispatcher([
809
 
                ("html", self.startTagHtml),
810
 
                ("body", self.startTagBody),
811
 
                ("frameset", self.startTagFrameset),
812
 
                (("base", "basefont", "bgsound", "link", "meta", "noframes", "script",
813
 
                  "style", "title"),
814
 
                 self.startTagFromHead),
815
 
                ("head", self.startTagHead)
816
 
            ])
817
 
            self.startTagHandler.default = self.startTagOther
818
 
            self.endTagHandler = utils.MethodDispatcher([(("body", "html", "br"),
819
 
                                                          self.endTagHtmlBodyBr)])
820
 
            self.endTagHandler.default = self.endTagOther
821
 
 
822
 
        def processEOF(self):
823
 
            self.anythingElse()
824
 
            return True
825
 
 
826
 
        def processCharacters(self, token):
827
 
            self.anythingElse()
828
 
            return token
829
 
 
830
 
        def startTagHtml(self, token):
831
 
            return self.parser.phases["inBody"].processStartTag(token)
832
 
 
833
 
        def startTagBody(self, token):
834
 
            self.parser.framesetOK = False
835
 
            self.tree.insertElement(token)
836
 
            self.parser.phase = self.parser.phases["inBody"]
837
 
 
838
 
        def startTagFrameset(self, token):
839
 
            self.tree.insertElement(token)
840
 
            self.parser.phase = self.parser.phases["inFrameset"]
841
 
 
842
 
        def startTagFromHead(self, token):
843
 
            self.parser.parseError("unexpected-start-tag-out-of-my-head",
844
 
                                   {"name": token["name"]})
845
 
            self.tree.openElements.append(self.tree.headPointer)
846
 
            self.parser.phases["inHead"].processStartTag(token)
847
 
            for node in self.tree.openElements[::-1]:
848
 
                if node.name == "head":
849
 
                    self.tree.openElements.remove(node)
850
 
                    break
851
 
 
852
 
        def startTagHead(self, token):
853
 
            self.parser.parseError("unexpected-start-tag", {"name": token["name"]})
854
 
 
855
 
        def startTagOther(self, token):
856
 
            self.anythingElse()
857
 
            return token
858
 
 
859
 
        def endTagHtmlBodyBr(self, token):
860
 
            self.anythingElse()
861
 
            return token
862
 
 
863
 
        def endTagOther(self, token):
864
 
            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
865
 
 
866
 
        def anythingElse(self):
867
 
            self.tree.insertElement(impliedTagToken("body", "StartTag"))
868
 
            self.parser.phase = self.parser.phases["inBody"]
869
 
            self.parser.framesetOK = True
870
 
 
871
 
    class InBodyPhase(Phase):
872
 
        # http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody
873
 
        # the really-really-really-very crazy mode
874
 
        def __init__(self, parser, tree):
875
 
            Phase.__init__(self, parser, tree)
876
 
 
877
 
            # Keep a ref to this for special handling of whitespace in <pre>
878
 
            self.processSpaceCharactersNonPre = self.processSpaceCharacters
879
 
 
880
 
            self.startTagHandler = utils.MethodDispatcher([
881
 
                ("html", self.startTagHtml),
882
 
                (("base", "basefont", "bgsound", "command", "link", "meta",
883
 
                  "noframes", "script", "style", "title"),
884
 
                 self.startTagProcessInHead),
885
 
                ("body", self.startTagBody),
886
 
                ("frameset", self.startTagFrameset),
887
 
                (("address", "article", "aside", "blockquote", "center", "details",
888
 
                  "details", "dir", "div", "dl", "fieldset", "figcaption", "figure",
889
 
                  "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p",
890
 
                  "section", "summary", "ul"),
891
 
                 self.startTagCloseP),
892
 
                (headingElements, self.startTagHeading),
893
 
                (("pre", "listing"), self.startTagPreListing),
894
 
                ("form", self.startTagForm),
895
 
                (("li", "dd", "dt"), self.startTagListItem),
896
 
                ("plaintext", self.startTagPlaintext),
897
 
                ("a", self.startTagA),
898
 
                (("b", "big", "code", "em", "font", "i", "s", "small", "strike",
899
 
                  "strong", "tt", "u"), self.startTagFormatting),
900
 
                ("nobr", self.startTagNobr),
901
 
                ("button", self.startTagButton),
902
 
                (("applet", "marquee", "object"), self.startTagAppletMarqueeObject),
903
 
                ("xmp", self.startTagXmp),
904
 
                ("table", self.startTagTable),
905
 
                (("area", "br", "embed", "img", "keygen", "wbr"),
906
 
                 self.startTagVoidFormatting),
907
 
                (("param", "source", "track"), self.startTagParamSource),
908
 
                ("input", self.startTagInput),
909
 
                ("hr", self.startTagHr),
910
 
                ("image", self.startTagImage),
911
 
                ("isindex", self.startTagIsIndex),
912
 
                ("textarea", self.startTagTextarea),
913
 
                ("iframe", self.startTagIFrame),
914
 
                (("noembed", "noframes", "noscript"), self.startTagRawtext),
915
 
                ("select", self.startTagSelect),
916
 
                (("rp", "rt"), self.startTagRpRt),
917
 
                (("option", "optgroup"), self.startTagOpt),
918
 
                (("math"), self.startTagMath),
919
 
                (("svg"), self.startTagSvg),
920
 
                (("caption", "col", "colgroup", "frame", "head",
921
 
                  "tbody", "td", "tfoot", "th", "thead",
922
 
                  "tr"), self.startTagMisplaced)
923
 
            ])
924
 
            self.startTagHandler.default = self.startTagOther
925
 
 
926
 
            self.endTagHandler = utils.MethodDispatcher([
927
 
                ("body", self.endTagBody),
928
 
                ("html", self.endTagHtml),
929
 
                (("address", "article", "aside", "blockquote", "button", "center",
930
 
                  "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure",
931
 
                  "footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre",
932
 
                  "section", "summary", "ul"), self.endTagBlock),
933
 
                ("form", self.endTagForm),
934
 
                ("p", self.endTagP),
935
 
                (("dd", "dt", "li"), self.endTagListItem),
936
 
                (headingElements, self.endTagHeading),
937
 
                (("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small",
938
 
                  "strike", "strong", "tt", "u"), self.endTagFormatting),
939
 
                (("applet", "marquee", "object"), self.endTagAppletMarqueeObject),
940
 
                ("br", self.endTagBr),
941
 
            ])
942
 
            self.endTagHandler.default = self.endTagOther
943
 
 
944
 
        def isMatchingFormattingElement(self, node1, node2):
945
 
            if node1.name != node2.name or node1.namespace != node2.namespace:
946
 
                return False
947
 
            elif len(node1.attributes) != len(node2.attributes):
948
 
                return False
949
 
            else:
950
 
                attributes1 = sorted(node1.attributes.items())
951
 
                attributes2 = sorted(node2.attributes.items())
952
 
                for attr1, attr2 in zip(attributes1, attributes2):
953
 
                    if attr1 != attr2:
954
 
                        return False
955
 
            return True
956
 
 
957
 
        # helper
958
 
        def addFormattingElement(self, token):
959
 
            self.tree.insertElement(token)
960
 
            element = self.tree.openElements[-1]
961
 
 
962
 
            matchingElements = []
963
 
            for node in self.tree.activeFormattingElements[::-1]:
964
 
                if node is Marker:
965
 
                    break
966
 
                elif self.isMatchingFormattingElement(node, element):
967
 
                    matchingElements.append(node)
968
 
 
969
 
            assert len(matchingElements) <= 3
970
 
            if len(matchingElements) == 3:
971
 
                self.tree.activeFormattingElements.remove(matchingElements[-1])
972
 
            self.tree.activeFormattingElements.append(element)
973
 
 
974
 
        # the real deal
975
 
        def processEOF(self):
976
 
            allowed_elements = frozenset(("dd", "dt", "li", "p", "tbody", "td",
977
 
                                          "tfoot", "th", "thead", "tr", "body",
978
 
                                          "html"))
979
 
            for node in self.tree.openElements[::-1]:
980
 
                if node.name not in allowed_elements:
981
 
                    self.parser.parseError("expected-closing-tag-but-got-eof")
982
 
                    break
983
 
            # Stop parsing
984
 
 
985
 
        def processSpaceCharactersDropNewline(self, token):
986
 
            # Sometimes (start of <pre>, <listing>, and <textarea> blocks) we
987
 
            # want to drop leading newlines
988
 
            data = token["data"]
989
 
            self.processSpaceCharacters = self.processSpaceCharactersNonPre
990
 
            if (data.startswith("\n") and
991
 
                self.tree.openElements[-1].name in ("pre", "listing", "textarea")
992
 
                    and not self.tree.openElements[-1].hasContent()):
993
 
                data = data[1:]
994
 
            if data:
995
 
                self.tree.reconstructActiveFormattingElements()
996
 
                self.tree.insertText(data)
997
 
 
998
 
        def processCharacters(self, token):
999
 
            if token["data"] == "\u0000":
1000
 
                # The tokenizer should always emit null on its own
1001
 
                return
1002
 
            self.tree.reconstructActiveFormattingElements()
1003
 
            self.tree.insertText(token["data"])
1004
 
            # This must be bad for performance
1005
 
            if (self.parser.framesetOK and
1006
 
                any([char not in spaceCharacters
1007
 
                     for char in token["data"]])):
1008
 
                self.parser.framesetOK = False
1009
 
 
1010
 
        def processSpaceCharacters(self, token):
1011
 
            self.tree.reconstructActiveFormattingElements()
1012
 
            self.tree.insertText(token["data"])
1013
 
 
1014
 
        def startTagProcessInHead(self, token):
1015
 
            return self.parser.phases["inHead"].processStartTag(token)
1016
 
 
1017
 
        def startTagBody(self, token):
1018
 
            self.parser.parseError("unexpected-start-tag", {"name": "body"})
1019
 
            if (len(self.tree.openElements) == 1
1020
 
                    or self.tree.openElements[1].name != "body"):
1021
 
                assert self.parser.innerHTML
1022
 
            else:
1023
 
                self.parser.framesetOK = False
1024
 
                for attr, value in token["data"].items():
1025
 
                    if attr not in self.tree.openElements[1].attributes:
1026
 
                        self.tree.openElements[1].attributes[attr] = value
1027
 
 
1028
 
        def startTagFrameset(self, token):
1029
 
            self.parser.parseError("unexpected-start-tag", {"name": "frameset"})
1030
 
            if (len(self.tree.openElements) == 1 or self.tree.openElements[1].name != "body"):
1031
 
                assert self.parser.innerHTML
1032
 
            elif not self.parser.framesetOK:
1033
 
                pass
1034
 
            else:
1035
 
                if self.tree.openElements[1].parent:
1036
 
                    self.tree.openElements[1].parent.removeChild(self.tree.openElements[1])
1037
 
                while self.tree.openElements[-1].name != "html":
1038
 
                    self.tree.openElements.pop()
1039
 
                self.tree.insertElement(token)
1040
 
                self.parser.phase = self.parser.phases["inFrameset"]
1041
 
 
1042
 
        def startTagCloseP(self, token):
1043
 
            if self.tree.elementInScope("p", variant="button"):
1044
 
                self.endTagP(impliedTagToken("p"))
1045
 
            self.tree.insertElement(token)
1046
 
 
1047
 
        def startTagPreListing(self, token):
1048
 
            if self.tree.elementInScope("p", variant="button"):
1049
 
                self.endTagP(impliedTagToken("p"))
1050
 
            self.tree.insertElement(token)
1051
 
            self.parser.framesetOK = False
1052
 
            self.processSpaceCharacters = self.processSpaceCharactersDropNewline
1053
 
 
1054
 
        def startTagForm(self, token):
1055
 
            if self.tree.formPointer:
1056
 
                self.parser.parseError("unexpected-start-tag", {"name": "form"})
1057
 
            else:
1058
 
                if self.tree.elementInScope("p", variant="button"):
1059
 
                    self.endTagP(impliedTagToken("p"))
1060
 
                self.tree.insertElement(token)
1061
 
                self.tree.formPointer = self.tree.openElements[-1]
1062
 
 
1063
 
        def startTagListItem(self, token):
1064
 
            self.parser.framesetOK = False
1065
 
 
1066
 
            stopNamesMap = {"li": ["li"],
1067
 
                            "dt": ["dt", "dd"],
1068
 
                            "dd": ["dt", "dd"]}
1069
 
            stopNames = stopNamesMap[token["name"]]
1070
 
            for node in reversed(self.tree.openElements):
1071
 
                if node.name in stopNames:
1072
 
                    self.parser.phase.processEndTag(
1073
 
                        impliedTagToken(node.name, "EndTag"))
1074
 
                    break
1075
 
                if (node.nameTuple in specialElements and
1076
 
                        node.name not in ("address", "div", "p")):
1077
 
                    break
1078
 
 
1079
 
            if self.tree.elementInScope("p", variant="button"):
1080
 
                self.parser.phase.processEndTag(
1081
 
                    impliedTagToken("p", "EndTag"))
1082
 
 
1083
 
            self.tree.insertElement(token)
1084
 
 
1085
 
        def startTagPlaintext(self, token):
1086
 
            if self.tree.elementInScope("p", variant="button"):
1087
 
                self.endTagP(impliedTagToken("p"))
1088
 
            self.tree.insertElement(token)
1089
 
            self.parser.tokenizer.state = self.parser.tokenizer.plaintextState
1090
 
 
1091
 
        def startTagHeading(self, token):
1092
 
            if self.tree.elementInScope("p", variant="button"):
1093
 
                self.endTagP(impliedTagToken("p"))
1094
 
            if self.tree.openElements[-1].name in headingElements:
1095
 
                self.parser.parseError("unexpected-start-tag", {"name": token["name"]})
1096
 
                self.tree.openElements.pop()
1097
 
            self.tree.insertElement(token)
1098
 
 
1099
 
        def startTagA(self, token):
1100
 
            afeAElement = self.tree.elementInActiveFormattingElements("a")
1101
 
            if afeAElement:
1102
 
                self.parser.parseError("unexpected-start-tag-implies-end-tag",
1103
 
                                       {"startName": "a", "endName": "a"})
1104
 
                self.endTagFormatting(impliedTagToken("a"))
1105
 
                if afeAElement in self.tree.openElements:
1106
 
                    self.tree.openElements.remove(afeAElement)
1107
 
                if afeAElement in self.tree.activeFormattingElements:
1108
 
                    self.tree.activeFormattingElements.remove(afeAElement)
1109
 
            self.tree.reconstructActiveFormattingElements()
1110
 
            self.addFormattingElement(token)
1111
 
 
1112
 
        def startTagFormatting(self, token):
1113
 
            self.tree.reconstructActiveFormattingElements()
1114
 
            self.addFormattingElement(token)
1115
 
 
1116
 
        def startTagNobr(self, token):
1117
 
            self.tree.reconstructActiveFormattingElements()
1118
 
            if self.tree.elementInScope("nobr"):
1119
 
                self.parser.parseError("unexpected-start-tag-implies-end-tag",
1120
 
                                       {"startName": "nobr", "endName": "nobr"})
1121
 
                self.processEndTag(impliedTagToken("nobr"))
1122
 
                # XXX Need tests that trigger the following
1123
 
                self.tree.reconstructActiveFormattingElements()
1124
 
            self.addFormattingElement(token)
1125
 
 
1126
 
        def startTagButton(self, token):
1127
 
            if self.tree.elementInScope("button"):
1128
 
                self.parser.parseError("unexpected-start-tag-implies-end-tag",
1129
 
                                       {"startName": "button", "endName": "button"})
1130
 
                self.processEndTag(impliedTagToken("button"))
1131
 
                return token
1132
 
            else:
1133
 
                self.tree.reconstructActiveFormattingElements()
1134
 
                self.tree.insertElement(token)
1135
 
                self.parser.framesetOK = False
1136
 
 
1137
 
        def startTagAppletMarqueeObject(self, token):
1138
 
            self.tree.reconstructActiveFormattingElements()
1139
 
            self.tree.insertElement(token)
1140
 
            self.tree.activeFormattingElements.append(Marker)
1141
 
            self.parser.framesetOK = False
1142
 
 
1143
 
        def startTagXmp(self, token):
1144
 
            if self.tree.elementInScope("p", variant="button"):
1145
 
                self.endTagP(impliedTagToken("p"))
1146
 
            self.tree.reconstructActiveFormattingElements()
1147
 
            self.parser.framesetOK = False
1148
 
            self.parser.parseRCDataRawtext(token, "RAWTEXT")
1149
 
 
1150
 
        def startTagTable(self, token):
1151
 
            if self.parser.compatMode != "quirks":
1152
 
                if self.tree.elementInScope("p", variant="button"):
1153
 
                    self.processEndTag(impliedTagToken("p"))
1154
 
            self.tree.insertElement(token)
1155
 
            self.parser.framesetOK = False
1156
 
            self.parser.phase = self.parser.phases["inTable"]
1157
 
 
1158
 
        def startTagVoidFormatting(self, token):
1159
 
            self.tree.reconstructActiveFormattingElements()
1160
 
            self.tree.insertElement(token)
1161
 
            self.tree.openElements.pop()
1162
 
            token["selfClosingAcknowledged"] = True
1163
 
            self.parser.framesetOK = False
1164
 
 
1165
 
        def startTagInput(self, token):
1166
 
            framesetOK = self.parser.framesetOK
1167
 
            self.startTagVoidFormatting(token)
1168
 
            if ("type" in token["data"] and
1169
 
                    token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
1170
 
                # input type=hidden doesn't change framesetOK
1171
 
                self.parser.framesetOK = framesetOK
1172
 
 
1173
 
        def startTagParamSource(self, token):
1174
 
            self.tree.insertElement(token)
1175
 
            self.tree.openElements.pop()
1176
 
            token["selfClosingAcknowledged"] = True
1177
 
 
1178
 
        def startTagHr(self, token):
1179
 
            if self.tree.elementInScope("p", variant="button"):
1180
 
                self.endTagP(impliedTagToken("p"))
1181
 
            self.tree.insertElement(token)
1182
 
            self.tree.openElements.pop()
1183
 
            token["selfClosingAcknowledged"] = True
1184
 
            self.parser.framesetOK = False
1185
 
 
1186
 
        def startTagImage(self, token):
1187
 
            # No really...
1188
 
            self.parser.parseError("unexpected-start-tag-treated-as",
1189
 
                                   {"originalName": "image", "newName": "img"})
1190
 
            self.processStartTag(impliedTagToken("img", "StartTag",
1191
 
                                                 attributes=token["data"],
1192
 
                                                 selfClosing=token["selfClosing"]))
1193
 
 
1194
 
        def startTagIsIndex(self, token):
1195
 
            self.parser.parseError("deprecated-tag", {"name": "isindex"})
1196
 
            if self.tree.formPointer:
1197
 
                return
1198
 
            form_attrs = {}
1199
 
            if "action" in token["data"]:
1200
 
                form_attrs["action"] = token["data"]["action"]
1201
 
            self.processStartTag(impliedTagToken("form", "StartTag",
1202
 
                                                 attributes=form_attrs))
1203
 
            self.processStartTag(impliedTagToken("hr", "StartTag"))
1204
 
            self.processStartTag(impliedTagToken("label", "StartTag"))
1205
 
            # XXX Localization ...
1206
 
            if "prompt" in token["data"]:
1207
 
                prompt = token["data"]["prompt"]
1208
 
            else:
1209
 
                prompt = "This is a searchable index. Enter search keywords: "
1210
 
            self.processCharacters(
1211
 
                {"type": tokenTypes["Characters"], "data": prompt})
1212
 
            attributes = token["data"].copy()
1213
 
            if "action" in attributes:
1214
 
                del attributes["action"]
1215
 
            if "prompt" in attributes:
1216
 
                del attributes["prompt"]
1217
 
            attributes["name"] = "isindex"
1218
 
            self.processStartTag(impliedTagToken("input", "StartTag",
1219
 
                                                 attributes=attributes,
1220
 
                                                 selfClosing=
1221
 
                                                 token["selfClosing"]))
1222
 
            self.processEndTag(impliedTagToken("label"))
1223
 
            self.processStartTag(impliedTagToken("hr", "StartTag"))
1224
 
            self.processEndTag(impliedTagToken("form"))
1225
 
 
1226
 
        def startTagTextarea(self, token):
1227
 
            self.tree.insertElement(token)
1228
 
            self.parser.tokenizer.state = self.parser.tokenizer.rcdataState
1229
 
            self.processSpaceCharacters = self.processSpaceCharactersDropNewline
1230
 
            self.parser.framesetOK = False
1231
 
 
1232
 
        def startTagIFrame(self, token):
1233
 
            self.parser.framesetOK = False
1234
 
            self.startTagRawtext(token)
1235
 
 
1236
 
        def startTagRawtext(self, token):
1237
 
            """iframe, noembed noframes, noscript(if scripting enabled)"""
1238
 
            self.parser.parseRCDataRawtext(token, "RAWTEXT")
1239
 
 
1240
 
        def startTagOpt(self, token):
1241
 
            if self.tree.openElements[-1].name == "option":
1242
 
                self.parser.phase.processEndTag(impliedTagToken("option"))
1243
 
            self.tree.reconstructActiveFormattingElements()
1244
 
            self.parser.tree.insertElement(token)
1245
 
 
1246
 
        def startTagSelect(self, token):
1247
 
            self.tree.reconstructActiveFormattingElements()
1248
 
            self.tree.insertElement(token)
1249
 
            self.parser.framesetOK = False
1250
 
            if self.parser.phase in (self.parser.phases["inTable"],
1251
 
                                     self.parser.phases["inCaption"],
1252
 
                                     self.parser.phases["inColumnGroup"],
1253
 
                                     self.parser.phases["inTableBody"],
1254
 
                                     self.parser.phases["inRow"],
1255
 
                                     self.parser.phases["inCell"]):
1256
 
                self.parser.phase = self.parser.phases["inSelectInTable"]
1257
 
            else:
1258
 
                self.parser.phase = self.parser.phases["inSelect"]
1259
 
 
1260
 
        def startTagRpRt(self, token):
1261
 
            if self.tree.elementInScope("ruby"):
1262
 
                self.tree.generateImpliedEndTags()
1263
 
                if self.tree.openElements[-1].name != "ruby":
1264
 
                    self.parser.parseError()
1265
 
            self.tree.insertElement(token)
1266
 
 
1267
 
        def startTagMath(self, token):
1268
 
            self.tree.reconstructActiveFormattingElements()
1269
 
            self.parser.adjustMathMLAttributes(token)
1270
 
            self.parser.adjustForeignAttributes(token)
1271
 
            token["namespace"] = namespaces["mathml"]
1272
 
            self.tree.insertElement(token)
1273
 
            # Need to get the parse error right for the case where the token
1274
 
            # has a namespace not equal to the xmlns attribute
1275
 
            if token["selfClosing"]:
1276
 
                self.tree.openElements.pop()
1277
 
                token["selfClosingAcknowledged"] = True
1278
 
 
1279
 
        def startTagSvg(self, token):
1280
 
            self.tree.reconstructActiveFormattingElements()
1281
 
            self.parser.adjustSVGAttributes(token)
1282
 
            self.parser.adjustForeignAttributes(token)
1283
 
            token["namespace"] = namespaces["svg"]
1284
 
            self.tree.insertElement(token)
1285
 
            # Need to get the parse error right for the case where the token
1286
 
            # has a namespace not equal to the xmlns attribute
1287
 
            if token["selfClosing"]:
1288
 
                self.tree.openElements.pop()
1289
 
                token["selfClosingAcknowledged"] = True
1290
 
 
1291
 
        def startTagMisplaced(self, token):
1292
 
            """ Elements that should be children of other elements that have a
1293
 
            different insertion mode; here they are ignored
1294
 
            "caption", "col", "colgroup", "frame", "frameset", "head",
1295
 
            "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
1296
 
            "tr", "noscript"
1297
 
            """
1298
 
            self.parser.parseError("unexpected-start-tag-ignored", {"name": token["name"]})
1299
 
 
1300
 
        def startTagOther(self, token):
1301
 
            self.tree.reconstructActiveFormattingElements()
1302
 
            self.tree.insertElement(token)
1303
 
 
1304
 
        def endTagP(self, token):
1305
 
            if not self.tree.elementInScope("p", variant="button"):
1306
 
                self.startTagCloseP(impliedTagToken("p", "StartTag"))
1307
 
                self.parser.parseError("unexpected-end-tag", {"name": "p"})
1308
 
                self.endTagP(impliedTagToken("p", "EndTag"))
1309
 
            else:
1310
 
                self.tree.generateImpliedEndTags("p")
1311
 
                if self.tree.openElements[-1].name != "p":
1312
 
                    self.parser.parseError("unexpected-end-tag", {"name": "p"})
1313
 
                node = self.tree.openElements.pop()
1314
 
                while node.name != "p":
1315
 
                    node = self.tree.openElements.pop()
1316
 
 
1317
 
        def endTagBody(self, token):
1318
 
            if not self.tree.elementInScope("body"):
1319
 
                self.parser.parseError()
1320
 
                return
1321
 
            elif self.tree.openElements[-1].name != "body":
1322
 
                for node in self.tree.openElements[2:]:
1323
 
                    if node.name not in frozenset(("dd", "dt", "li", "optgroup",
1324
 
                                                   "option", "p", "rp", "rt",
1325
 
                                                   "tbody", "td", "tfoot",
1326
 
                                                   "th", "thead", "tr", "body",
1327
 
                                                   "html")):
1328
 
                        # Not sure this is the correct name for the parse error
1329
 
                        self.parser.parseError(
1330
 
                            "expected-one-end-tag-but-got-another",
1331
 
                            {"expectedName": "body", "gotName": node.name})
1332
 
                        break
1333
 
            self.parser.phase = self.parser.phases["afterBody"]
1334
 
 
1335
 
        def endTagHtml(self, token):
1336
 
            # We repeat the test for the body end tag token being ignored here
1337
 
            if self.tree.elementInScope("body"):
1338
 
                self.endTagBody(impliedTagToken("body"))
1339
 
                return token
1340
 
 
1341
 
        def endTagBlock(self, token):
1342
 
            # Put us back in the right whitespace handling mode
1343
 
            if token["name"] == "pre":
1344
 
                self.processSpaceCharacters = self.processSpaceCharactersNonPre
1345
 
            inScope = self.tree.elementInScope(token["name"])
1346
 
            if inScope:
1347
 
                self.tree.generateImpliedEndTags()
1348
 
            if self.tree.openElements[-1].name != token["name"]:
1349
 
                self.parser.parseError("end-tag-too-early", {"name": token["name"]})
1350
 
            if inScope:
1351
 
                node = self.tree.openElements.pop()
1352
 
                while node.name != token["name"]:
1353
 
                    node = self.tree.openElements.pop()
1354
 
 
1355
 
        def endTagForm(self, token):
1356
 
            node = self.tree.formPointer
1357
 
            self.tree.formPointer = None
1358
 
            if node is None or not self.tree.elementInScope(node):
1359
 
                self.parser.parseError("unexpected-end-tag",
1360
 
                                       {"name": "form"})
1361
 
            else:
1362
 
                self.tree.generateImpliedEndTags()
1363
 
                if self.tree.openElements[-1] != node:
1364
 
                    self.parser.parseError("end-tag-too-early-ignored",
1365
 
                                           {"name": "form"})
1366
 
                self.tree.openElements.remove(node)
1367
 
 
1368
 
        def endTagListItem(self, token):
1369
 
            if token["name"] == "li":
1370
 
                variant = "list"
1371
 
            else:
1372
 
                variant = None
1373
 
            if not self.tree.elementInScope(token["name"], variant=variant):
1374
 
                self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
1375
 
            else:
1376
 
                self.tree.generateImpliedEndTags(exclude=token["name"])
1377
 
                if self.tree.openElements[-1].name != token["name"]:
1378
 
                    self.parser.parseError(
1379
 
                        "end-tag-too-early",
1380
 
                        {"name": token["name"]})
1381
 
                node = self.tree.openElements.pop()
1382
 
                while node.name != token["name"]:
1383
 
                    node = self.tree.openElements.pop()
1384
 
 
1385
 
        def endTagHeading(self, token):
1386
 
            for item in headingElements:
1387
 
                if self.tree.elementInScope(item):
1388
 
                    self.tree.generateImpliedEndTags()
1389
 
                    break
1390
 
            if self.tree.openElements[-1].name != token["name"]:
1391
 
                self.parser.parseError("end-tag-too-early", {"name": token["name"]})
1392
 
 
1393
 
            for item in headingElements:
1394
 
                if self.tree.elementInScope(item):
1395
 
                    item = self.tree.openElements.pop()
1396
 
                    while item.name not in headingElements:
1397
 
                        item = self.tree.openElements.pop()
1398
 
                    break
1399
 
 
1400
 
        def endTagFormatting(self, token):
1401
 
            """The much-feared adoption agency algorithm"""
1402
 
            # http://svn.whatwg.org/webapps/complete.html#adoptionAgency revision 7867
1403
 
            # XXX Better parseError messages appreciated.
1404
 
 
1405
 
            # Step 1
1406
 
            outerLoopCounter = 0
1407
 
 
1408
 
            # Step 2
1409
 
            while outerLoopCounter < 8:
1410
 
 
1411
 
                # Step 3
1412
 
                outerLoopCounter += 1
1413
 
 
1414
 
                # Step 4:
1415
 
 
1416
 
                # Let the formatting element be the last element in
1417
 
                # the list of active formatting elements that:
1418
 
                # - is between the end of the list and the last scope
1419
 
                # marker in the list, if any, or the start of the list
1420
 
                # otherwise, and
1421
 
                # - has the same tag name as the token.
1422
 
                formattingElement = self.tree.elementInActiveFormattingElements(
1423
 
                    token["name"])
1424
 
                if (not formattingElement or
1425
 
                    (formattingElement in self.tree.openElements and
1426
 
                     not self.tree.elementInScope(formattingElement.name))):
1427
 
                    # If there is no such node, then abort these steps
1428
 
                    # and instead act as described in the "any other
1429
 
                    # end tag" entry below.
1430
 
                    self.endTagOther(token)
1431
 
                    return
1432
 
 
1433
 
                # Otherwise, if there is such a node, but that node is
1434
 
                # not in the stack of open elements, then this is a
1435
 
                # parse error; remove the element from the list, and
1436
 
                # abort these steps.
1437
 
                elif formattingElement not in self.tree.openElements:
1438
 
                    self.parser.parseError("adoption-agency-1.2", {"name": token["name"]})
1439
 
                    self.tree.activeFormattingElements.remove(formattingElement)
1440
 
                    return
1441
 
 
1442
 
                # Otherwise, if there is such a node, and that node is
1443
 
                # also in the stack of open elements, but the element
1444
 
                # is not in scope, then this is a parse error; ignore
1445
 
                # the token, and abort these steps.
1446
 
                elif not self.tree.elementInScope(formattingElement.name):
1447
 
                    self.parser.parseError("adoption-agency-4.4", {"name": token["name"]})
1448
 
                    return
1449
 
 
1450
 
                # Otherwise, there is a formatting element and that
1451
 
                # element is in the stack and is in scope. If the
1452
 
                # element is not the current node, this is a parse
1453
 
                # error. In any case, proceed with the algorithm as
1454
 
                # written in the following steps.
1455
 
                else:
1456
 
                    if formattingElement != self.tree.openElements[-1]:
1457
 
                        self.parser.parseError("adoption-agency-1.3", {"name": token["name"]})
1458
 
 
1459
 
                # Step 5:
1460
 
 
1461
 
                # Let the furthest block be the topmost node in the
1462
 
                # stack of open elements that is lower in the stack
1463
 
                # than the formatting element, and is an element in
1464
 
                # the special category. There might not be one.
1465
 
                afeIndex = self.tree.openElements.index(formattingElement)
1466
 
                furthestBlock = None
1467
 
                for element in self.tree.openElements[afeIndex:]:
1468
 
                    if element.nameTuple in specialElements:
1469
 
                        furthestBlock = element
1470
 
                        break
1471
 
 
1472
 
                # Step 6:
1473
 
 
1474
 
                # If there is no furthest block, then the UA must
1475
 
                # first pop all the nodes from the bottom of the stack
1476
 
                # of open elements, from the current node up to and
1477
 
                # including the formatting element, then remove the
1478
 
                # formatting element from the list of active
1479
 
                # formatting elements, and finally abort these steps.
1480
 
                if furthestBlock is None:
1481
 
                    element = self.tree.openElements.pop()
1482
 
                    while element != formattingElement:
1483
 
                        element = self.tree.openElements.pop()
1484
 
                    self.tree.activeFormattingElements.remove(element)
1485
 
                    return
1486
 
 
1487
 
                # Step 7
1488
 
                commonAncestor = self.tree.openElements[afeIndex - 1]
1489
 
 
1490
 
                # Step 8:
1491
 
                # The bookmark is supposed to help us identify where to reinsert
1492
 
                # nodes in step 15. We have to ensure that we reinsert nodes after
1493
 
                # the node before the active formatting element. Note the bookmark
1494
 
                # can move in step 9.7
1495
 
                bookmark = self.tree.activeFormattingElements.index(formattingElement)
1496
 
 
1497
 
                # Step 9
1498
 
                lastNode = node = furthestBlock
1499
 
                innerLoopCounter = 0
1500
 
 
1501
 
                index = self.tree.openElements.index(node)
1502
 
                while innerLoopCounter < 3:
1503
 
                    innerLoopCounter += 1
1504
 
                    # Node is element before node in open elements
1505
 
                    index -= 1
1506
 
                    node = self.tree.openElements[index]
1507
 
                    if node not in self.tree.activeFormattingElements:
1508
 
                        self.tree.openElements.remove(node)
1509
 
                        continue
1510
 
                    # Step 9.6
1511
 
                    if node == formattingElement:
1512
 
                        break
1513
 
                    # Step 9.7
1514
 
                    if lastNode == furthestBlock:
1515
 
                        bookmark = self.tree.activeFormattingElements.index(node) + 1
1516
 
                    # Step 9.8
1517
 
                    clone = node.cloneNode()
1518
 
                    # Replace node with clone
1519
 
                    self.tree.activeFormattingElements[
1520
 
                        self.tree.activeFormattingElements.index(node)] = clone
1521
 
                    self.tree.openElements[
1522
 
                        self.tree.openElements.index(node)] = clone
1523
 
                    node = clone
1524
 
                    # Step 9.9
1525
 
                    # Remove lastNode from its parents, if any
1526
 
                    if lastNode.parent:
1527
 
                        lastNode.parent.removeChild(lastNode)
1528
 
                    node.appendChild(lastNode)
1529
 
                    # Step 9.10
1530
 
                    lastNode = node
1531
 
 
1532
 
                # Step 10
1533
 
                # Foster parent lastNode if commonAncestor is a
1534
 
                # table, tbody, tfoot, thead, or tr we need to foster
1535
 
                # parent the lastNode
1536
 
                if lastNode.parent:
1537
 
                    lastNode.parent.removeChild(lastNode)
1538
 
 
1539
 
                if commonAncestor.name in frozenset(("table", "tbody", "tfoot", "thead", "tr")):
1540
 
                    parent, insertBefore = self.tree.getTableMisnestedNodePosition()
1541
 
                    parent.insertBefore(lastNode, insertBefore)
1542
 
                else:
1543
 
                    commonAncestor.appendChild(lastNode)
1544
 
 
1545
 
                # Step 11
1546
 
                clone = formattingElement.cloneNode()
1547
 
 
1548
 
                # Step 12
1549
 
                furthestBlock.reparentChildren(clone)
1550
 
 
1551
 
                # Step 13
1552
 
                furthestBlock.appendChild(clone)
1553
 
 
1554
 
                # Step 14
1555
 
                self.tree.activeFormattingElements.remove(formattingElement)
1556
 
                self.tree.activeFormattingElements.insert(bookmark, clone)
1557
 
 
1558
 
                # Step 15
1559
 
                self.tree.openElements.remove(formattingElement)
1560
 
                self.tree.openElements.insert(
1561
 
                    self.tree.openElements.index(furthestBlock) + 1, clone)
1562
 
 
1563
 
        def endTagAppletMarqueeObject(self, token):
1564
 
            if self.tree.elementInScope(token["name"]):
1565
 
                self.tree.generateImpliedEndTags()
1566
 
            if self.tree.openElements[-1].name != token["name"]:
1567
 
                self.parser.parseError("end-tag-too-early", {"name": token["name"]})
1568
 
 
1569
 
            if self.tree.elementInScope(token["name"]):
1570
 
                element = self.tree.openElements.pop()
1571
 
                while element.name != token["name"]:
1572
 
                    element = self.tree.openElements.pop()
1573
 
                self.tree.clearActiveFormattingElements()
1574
 
 
1575
 
        def endTagBr(self, token):
1576
 
            self.parser.parseError("unexpected-end-tag-treated-as",
1577
 
                                   {"originalName": "br", "newName": "br element"})
1578
 
            self.tree.reconstructActiveFormattingElements()
1579
 
            self.tree.insertElement(impliedTagToken("br", "StartTag"))
1580
 
            self.tree.openElements.pop()
1581
 
 
1582
 
        def endTagOther(self, token):
1583
 
            for node in self.tree.openElements[::-1]:
1584
 
                if node.name == token["name"]:
1585
 
                    self.tree.generateImpliedEndTags(exclude=token["name"])
1586
 
                    if self.tree.openElements[-1].name != token["name"]:
1587
 
                        self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
1588
 
                    while self.tree.openElements.pop() != node:
1589
 
                        pass
1590
 
                    break
1591
 
                else:
1592
 
                    if node.nameTuple in specialElements:
1593
 
                        self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
1594
 
                        break
1595
 
 
1596
 
    class TextPhase(Phase):
1597
 
        def __init__(self, parser, tree):
1598
 
            Phase.__init__(self, parser, tree)
1599
 
            self.startTagHandler = utils.MethodDispatcher([])
1600
 
            self.startTagHandler.default = self.startTagOther
1601
 
            self.endTagHandler = utils.MethodDispatcher([
1602
 
                ("script", self.endTagScript)])
1603
 
            self.endTagHandler.default = self.endTagOther
1604
 
 
1605
 
        def processCharacters(self, token):
1606
 
            self.tree.insertText(token["data"])
1607
 
 
1608
 
        def processEOF(self):
1609
 
            self.parser.parseError("expected-named-closing-tag-but-got-eof",
1610
 
                                   {"name": self.tree.openElements[-1].name})
1611
 
            self.tree.openElements.pop()
1612
 
            self.parser.phase = self.parser.originalPhase
1613
 
            return True
1614
 
 
1615
 
        def startTagOther(self, token):
1616
 
            assert False, "Tried to process start tag %s in RCDATA/RAWTEXT mode" % token['name']
1617
 
 
1618
 
        def endTagScript(self, token):
1619
 
            node = self.tree.openElements.pop()
1620
 
            assert node.name == "script"
1621
 
            self.parser.phase = self.parser.originalPhase
1622
 
            # The rest of this method is all stuff that only happens if
1623
 
            # document.write works
1624
 
 
1625
 
        def endTagOther(self, token):
1626
 
            self.tree.openElements.pop()
1627
 
            self.parser.phase = self.parser.originalPhase
1628
 
 
1629
 
    class InTablePhase(Phase):
1630
 
        # http://www.whatwg.org/specs/web-apps/current-work/#in-table
1631
 
        def __init__(self, parser, tree):
1632
 
            Phase.__init__(self, parser, tree)
1633
 
            self.startTagHandler = utils.MethodDispatcher([
1634
 
                ("html", self.startTagHtml),
1635
 
                ("caption", self.startTagCaption),
1636
 
                ("colgroup", self.startTagColgroup),
1637
 
                ("col", self.startTagCol),
1638
 
                (("tbody", "tfoot", "thead"), self.startTagRowGroup),
1639
 
                (("td", "th", "tr"), self.startTagImplyTbody),
1640
 
                ("table", self.startTagTable),
1641
 
                (("style", "script"), self.startTagStyleScript),
1642
 
                ("input", self.startTagInput),
1643
 
                ("form", self.startTagForm)
1644
 
            ])
1645
 
            self.startTagHandler.default = self.startTagOther
1646
 
 
1647
 
            self.endTagHandler = utils.MethodDispatcher([
1648
 
                ("table", self.endTagTable),
1649
 
                (("body", "caption", "col", "colgroup", "html", "tbody", "td",
1650
 
                  "tfoot", "th", "thead", "tr"), self.endTagIgnore)
1651
 
            ])
1652
 
            self.endTagHandler.default = self.endTagOther
1653
 
 
1654
 
        # helper methods
1655
 
        def clearStackToTableContext(self):
1656
 
            # "clear the stack back to a table context"
1657
 
            while self.tree.openElements[-1].name not in ("table", "html"):
1658
 
                # self.parser.parseError("unexpected-implied-end-tag-in-table",
1659
 
                #  {"name":  self.tree.openElements[-1].name})
1660
 
                self.tree.openElements.pop()
1661
 
            # When the current node is <html> it's an innerHTML case
1662
 
 
1663
 
        # processing methods
1664
 
        def processEOF(self):
1665
 
            if self.tree.openElements[-1].name != "html":
1666
 
                self.parser.parseError("eof-in-table")
1667
 
            else:
1668
 
                assert self.parser.innerHTML
1669
 
            # Stop parsing
1670
 
 
1671
 
        def processSpaceCharacters(self, token):
1672
 
            originalPhase = self.parser.phase
1673
 
            self.parser.phase = self.parser.phases["inTableText"]
1674
 
            self.parser.phase.originalPhase = originalPhase
1675
 
            self.parser.phase.processSpaceCharacters(token)
1676
 
 
1677
 
        def processCharacters(self, token):
1678
 
            originalPhase = self.parser.phase
1679
 
            self.parser.phase = self.parser.phases["inTableText"]
1680
 
            self.parser.phase.originalPhase = originalPhase
1681
 
            self.parser.phase.processCharacters(token)
1682
 
 
1683
 
        def insertText(self, token):
1684
 
            # If we get here there must be at least one non-whitespace character
1685
 
            # Do the table magic!
1686
 
            self.tree.insertFromTable = True
1687
 
            self.parser.phases["inBody"].processCharacters(token)
1688
 
            self.tree.insertFromTable = False
1689
 
 
1690
 
        def startTagCaption(self, token):
1691
 
            self.clearStackToTableContext()
1692
 
            self.tree.activeFormattingElements.append(Marker)
1693
 
            self.tree.insertElement(token)
1694
 
            self.parser.phase = self.parser.phases["inCaption"]
1695
 
 
1696
 
        def startTagColgroup(self, token):
1697
 
            self.clearStackToTableContext()
1698
 
            self.tree.insertElement(token)
1699
 
            self.parser.phase = self.parser.phases["inColumnGroup"]
1700
 
 
1701
 
        def startTagCol(self, token):
1702
 
            self.startTagColgroup(impliedTagToken("colgroup", "StartTag"))
1703
 
            return token
1704
 
 
1705
 
        def startTagRowGroup(self, token):
1706
 
            self.clearStackToTableContext()
1707
 
            self.tree.insertElement(token)
1708
 
            self.parser.phase = self.parser.phases["inTableBody"]
1709
 
 
1710
 
        def startTagImplyTbody(self, token):
1711
 
            self.startTagRowGroup(impliedTagToken("tbody", "StartTag"))
1712
 
            return token
1713
 
 
1714
 
        def startTagTable(self, token):
1715
 
            self.parser.parseError("unexpected-start-tag-implies-end-tag",
1716
 
                                   {"startName": "table", "endName": "table"})
1717
 
            self.parser.phase.processEndTag(impliedTagToken("table"))
1718
 
            if not self.parser.innerHTML:
1719
 
                return token
1720
 
 
1721
 
        def startTagStyleScript(self, token):
1722
 
            return self.parser.phases["inHead"].processStartTag(token)
1723
 
 
1724
 
        def startTagInput(self, token):
1725
 
            if ("type" in token["data"] and
1726
 
                    token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
1727
 
                self.parser.parseError("unexpected-hidden-input-in-table")
1728
 
                self.tree.insertElement(token)
1729
 
                # XXX associate with form
1730
 
                self.tree.openElements.pop()
1731
 
            else:
1732
 
                self.startTagOther(token)
1733
 
 
1734
 
        def startTagForm(self, token):
1735
 
            self.parser.parseError("unexpected-form-in-table")
1736
 
            if self.tree.formPointer is None:
1737
 
                self.tree.insertElement(token)
1738
 
                self.tree.formPointer = self.tree.openElements[-1]
1739
 
                self.tree.openElements.pop()
1740
 
 
1741
 
        def startTagOther(self, token):
1742
 
            self.parser.parseError("unexpected-start-tag-implies-table-voodoo", {"name": token["name"]})
1743
 
            # Do the table magic!
1744
 
            self.tree.insertFromTable = True
1745
 
            self.parser.phases["inBody"].processStartTag(token)
1746
 
            self.tree.insertFromTable = False
1747
 
 
1748
 
        def endTagTable(self, token):
1749
 
            if self.tree.elementInScope("table", variant="table"):
1750
 
                self.tree.generateImpliedEndTags()
1751
 
                if self.tree.openElements[-1].name != "table":
1752
 
                    self.parser.parseError("end-tag-too-early-named",
1753
 
                                           {"gotName": "table",
1754
 
                                            "expectedName": self.tree.openElements[-1].name})
1755
 
                while self.tree.openElements[-1].name != "table":
1756
 
                    self.tree.openElements.pop()
1757
 
                self.tree.openElements.pop()
1758
 
                self.parser.resetInsertionMode()
1759
 
            else:
1760
 
                # innerHTML case
1761
 
                assert self.parser.innerHTML
1762
 
                self.parser.parseError()
1763
 
 
1764
 
        def endTagIgnore(self, token):
1765
 
            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
1766
 
 
1767
 
        def endTagOther(self, token):
1768
 
            self.parser.parseError("unexpected-end-tag-implies-table-voodoo", {"name": token["name"]})
1769
 
            # Do the table magic!
1770
 
            self.tree.insertFromTable = True
1771
 
            self.parser.phases["inBody"].processEndTag(token)
1772
 
            self.tree.insertFromTable = False
1773
 
 
1774
 
    class InTableTextPhase(Phase):
1775
 
        def __init__(self, parser, tree):
1776
 
            Phase.__init__(self, parser, tree)
1777
 
            self.originalPhase = None
1778
 
            self.characterTokens = []
1779
 
 
1780
 
        def flushCharacters(self):
1781
 
            data = "".join([item["data"] for item in self.characterTokens])
1782
 
            if any([item not in spaceCharacters for item in data]):
1783
 
                token = {"type": tokenTypes["Characters"], "data": data}
1784
 
                self.parser.phases["inTable"].insertText(token)
1785
 
            elif data:
1786
 
                self.tree.insertText(data)
1787
 
            self.characterTokens = []
1788
 
 
1789
 
        def processComment(self, token):
1790
 
            self.flushCharacters()
1791
 
            self.parser.phase = self.originalPhase
1792
 
            return token
1793
 
 
1794
 
        def processEOF(self):
1795
 
            self.flushCharacters()
1796
 
            self.parser.phase = self.originalPhase
1797
 
            return True
1798
 
 
1799
 
        def processCharacters(self, token):
1800
 
            if token["data"] == "\u0000":
1801
 
                return
1802
 
            self.characterTokens.append(token)
1803
 
 
1804
 
        def processSpaceCharacters(self, token):
1805
 
            # pretty sure we should never reach here
1806
 
            self.characterTokens.append(token)
1807
 
    #        assert False
1808
 
 
1809
 
        def processStartTag(self, token):
1810
 
            self.flushCharacters()
1811
 
            self.parser.phase = self.originalPhase
1812
 
            return token
1813
 
 
1814
 
        def processEndTag(self, token):
1815
 
            self.flushCharacters()
1816
 
            self.parser.phase = self.originalPhase
1817
 
            return token
1818
 
 
1819
 
    class InCaptionPhase(Phase):
1820
 
        # http://www.whatwg.org/specs/web-apps/current-work/#in-caption
1821
 
        def __init__(self, parser, tree):
1822
 
            Phase.__init__(self, parser, tree)
1823
 
 
1824
 
            self.startTagHandler = utils.MethodDispatcher([
1825
 
                ("html", self.startTagHtml),
1826
 
                (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
1827
 
                  "thead", "tr"), self.startTagTableElement)
1828
 
            ])
1829
 
            self.startTagHandler.default = self.startTagOther
1830
 
 
1831
 
            self.endTagHandler = utils.MethodDispatcher([
1832
 
                ("caption", self.endTagCaption),
1833
 
                ("table", self.endTagTable),
1834
 
                (("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th",
1835
 
                  "thead", "tr"), self.endTagIgnore)
1836
 
            ])
1837
 
            self.endTagHandler.default = self.endTagOther
1838
 
 
1839
 
        def ignoreEndTagCaption(self):
1840
 
            return not self.tree.elementInScope("caption", variant="table")
1841
 
 
1842
 
        def processEOF(self):
1843
 
            self.parser.phases["inBody"].processEOF()
1844
 
 
1845
 
        def processCharacters(self, token):
1846
 
            return self.parser.phases["inBody"].processCharacters(token)
1847
 
 
1848
 
        def startTagTableElement(self, token):
1849
 
            self.parser.parseError()
1850
 
            # XXX Have to duplicate logic here to find out if the tag is ignored
1851
 
            ignoreEndTag = self.ignoreEndTagCaption()
1852
 
            self.parser.phase.processEndTag(impliedTagToken("caption"))
1853
 
            if not ignoreEndTag:
1854
 
                return token
1855
 
 
1856
 
        def startTagOther(self, token):
1857
 
            return self.parser.phases["inBody"].processStartTag(token)
1858
 
 
1859
 
        def endTagCaption(self, token):
1860
 
            if not self.ignoreEndTagCaption():
1861
 
                # AT this code is quite similar to endTagTable in "InTable"
1862
 
                self.tree.generateImpliedEndTags()
1863
 
                if self.tree.openElements[-1].name != "caption":
1864
 
                    self.parser.parseError("expected-one-end-tag-but-got-another",
1865
 
                                           {"gotName": "caption",
1866
 
                                            "expectedName": self.tree.openElements[-1].name})
1867
 
                while self.tree.openElements[-1].name != "caption":
1868
 
                    self.tree.openElements.pop()
1869
 
                self.tree.openElements.pop()
1870
 
                self.tree.clearActiveFormattingElements()
1871
 
                self.parser.phase = self.parser.phases["inTable"]
1872
 
            else:
1873
 
                # innerHTML case
1874
 
                assert self.parser.innerHTML
1875
 
                self.parser.parseError()
1876
 
 
1877
 
        def endTagTable(self, token):
1878
 
            self.parser.parseError()
1879
 
            ignoreEndTag = self.ignoreEndTagCaption()
1880
 
            self.parser.phase.processEndTag(impliedTagToken("caption"))
1881
 
            if not ignoreEndTag:
1882
 
                return token
1883
 
 
1884
 
        def endTagIgnore(self, token):
1885
 
            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
1886
 
 
1887
 
        def endTagOther(self, token):
1888
 
            return self.parser.phases["inBody"].processEndTag(token)
1889
 
 
1890
 
    class InColumnGroupPhase(Phase):
1891
 
        # http://www.whatwg.org/specs/web-apps/current-work/#in-column
1892
 
 
1893
 
        def __init__(self, parser, tree):
1894
 
            Phase.__init__(self, parser, tree)
1895
 
 
1896
 
            self.startTagHandler = utils.MethodDispatcher([
1897
 
                ("html", self.startTagHtml),
1898
 
                ("col", self.startTagCol)
1899
 
            ])
1900
 
            self.startTagHandler.default = self.startTagOther
1901
 
 
1902
 
            self.endTagHandler = utils.MethodDispatcher([
1903
 
                ("colgroup", self.endTagColgroup),
1904
 
                ("col", self.endTagCol)
1905
 
            ])
1906
 
            self.endTagHandler.default = self.endTagOther
1907
 
 
1908
 
        def ignoreEndTagColgroup(self):
1909
 
            return self.tree.openElements[-1].name == "html"
1910
 
 
1911
 
        def processEOF(self):
1912
 
            if self.tree.openElements[-1].name == "html":
1913
 
                assert self.parser.innerHTML
1914
 
                return
1915
 
            else:
1916
 
                ignoreEndTag = self.ignoreEndTagColgroup()
1917
 
                self.endTagColgroup(impliedTagToken("colgroup"))
1918
 
                if not ignoreEndTag:
1919
 
                    return True
1920
 
 
1921
 
        def processCharacters(self, token):
1922
 
            ignoreEndTag = self.ignoreEndTagColgroup()
1923
 
            self.endTagColgroup(impliedTagToken("colgroup"))
1924
 
            if not ignoreEndTag:
1925
 
                return token
1926
 
 
1927
 
        def startTagCol(self, token):
1928
 
            self.tree.insertElement(token)
1929
 
            self.tree.openElements.pop()
1930
 
 
1931
 
        def startTagOther(self, token):
1932
 
            ignoreEndTag = self.ignoreEndTagColgroup()
1933
 
            self.endTagColgroup(impliedTagToken("colgroup"))
1934
 
            if not ignoreEndTag:
1935
 
                return token
1936
 
 
1937
 
        def endTagColgroup(self, token):
1938
 
            if self.ignoreEndTagColgroup():
1939
 
                # innerHTML case
1940
 
                assert self.parser.innerHTML
1941
 
                self.parser.parseError()
1942
 
            else:
1943
 
                self.tree.openElements.pop()
1944
 
                self.parser.phase = self.parser.phases["inTable"]
1945
 
 
1946
 
        def endTagCol(self, token):
1947
 
            self.parser.parseError("no-end-tag", {"name": "col"})
1948
 
 
1949
 
        def endTagOther(self, token):
1950
 
            ignoreEndTag = self.ignoreEndTagColgroup()
1951
 
            self.endTagColgroup(impliedTagToken("colgroup"))
1952
 
            if not ignoreEndTag:
1953
 
                return token
1954
 
 
1955
 
    class InTableBodyPhase(Phase):
1956
 
        # http://www.whatwg.org/specs/web-apps/current-work/#in-table0
1957
 
        def __init__(self, parser, tree):
1958
 
            Phase.__init__(self, parser, tree)
1959
 
            self.startTagHandler = utils.MethodDispatcher([
1960
 
                ("html", self.startTagHtml),
1961
 
                ("tr", self.startTagTr),
1962
 
                (("td", "th"), self.startTagTableCell),
1963
 
                (("caption", "col", "colgroup", "tbody", "tfoot", "thead"),
1964
 
                 self.startTagTableOther)
1965
 
            ])
1966
 
            self.startTagHandler.default = self.startTagOther
1967
 
 
1968
 
            self.endTagHandler = utils.MethodDispatcher([
1969
 
                (("tbody", "tfoot", "thead"), self.endTagTableRowGroup),
1970
 
                ("table", self.endTagTable),
1971
 
                (("body", "caption", "col", "colgroup", "html", "td", "th",
1972
 
                  "tr"), self.endTagIgnore)
1973
 
            ])
1974
 
            self.endTagHandler.default = self.endTagOther
1975
 
 
1976
 
        # helper methods
1977
 
        def clearStackToTableBodyContext(self):
1978
 
            while self.tree.openElements[-1].name not in ("tbody", "tfoot",
1979
 
                                                          "thead", "html"):
1980
 
                # self.parser.parseError("unexpected-implied-end-tag-in-table",
1981
 
                #  {"name": self.tree.openElements[-1].name})
1982
 
                self.tree.openElements.pop()
1983
 
            if self.tree.openElements[-1].name == "html":
1984
 
                assert self.parser.innerHTML
1985
 
 
1986
 
        # the rest
1987
 
        def processEOF(self):
1988
 
            self.parser.phases["inTable"].processEOF()
1989
 
 
1990
 
        def processSpaceCharacters(self, token):
1991
 
            return self.parser.phases["inTable"].processSpaceCharacters(token)
1992
 
 
1993
 
        def processCharacters(self, token):
1994
 
            return self.parser.phases["inTable"].processCharacters(token)
1995
 
 
1996
 
        def startTagTr(self, token):
1997
 
            self.clearStackToTableBodyContext()
1998
 
            self.tree.insertElement(token)
1999
 
            self.parser.phase = self.parser.phases["inRow"]
2000
 
 
2001
 
        def startTagTableCell(self, token):
2002
 
            self.parser.parseError("unexpected-cell-in-table-body",
2003
 
                                   {"name": token["name"]})
2004
 
            self.startTagTr(impliedTagToken("tr", "StartTag"))
2005
 
            return token
2006
 
 
2007
 
        def startTagTableOther(self, token):
2008
 
            # XXX AT Any ideas on how to share this with endTagTable?
2009
 
            if (self.tree.elementInScope("tbody", variant="table") or
2010
 
                self.tree.elementInScope("thead", variant="table") or
2011
 
                    self.tree.elementInScope("tfoot", variant="table")):
2012
 
                self.clearStackToTableBodyContext()
2013
 
                self.endTagTableRowGroup(
2014
 
                    impliedTagToken(self.tree.openElements[-1].name))
2015
 
                return token
2016
 
            else:
2017
 
                # innerHTML case
2018
 
                assert self.parser.innerHTML
2019
 
                self.parser.parseError()
2020
 
 
2021
 
        def startTagOther(self, token):
2022
 
            return self.parser.phases["inTable"].processStartTag(token)
2023
 
 
2024
 
        def endTagTableRowGroup(self, token):
2025
 
            if self.tree.elementInScope(token["name"], variant="table"):
2026
 
                self.clearStackToTableBodyContext()
2027
 
                self.tree.openElements.pop()
2028
 
                self.parser.phase = self.parser.phases["inTable"]
2029
 
            else:
2030
 
                self.parser.parseError("unexpected-end-tag-in-table-body",
2031
 
                                       {"name": token["name"]})
2032
 
 
2033
 
        def endTagTable(self, token):
2034
 
            if (self.tree.elementInScope("tbody", variant="table") or
2035
 
                self.tree.elementInScope("thead", variant="table") or
2036
 
                    self.tree.elementInScope("tfoot", variant="table")):
2037
 
                self.clearStackToTableBodyContext()
2038
 
                self.endTagTableRowGroup(
2039
 
                    impliedTagToken(self.tree.openElements[-1].name))
2040
 
                return token
2041
 
            else:
2042
 
                # innerHTML case
2043
 
                assert self.parser.innerHTML
2044
 
                self.parser.parseError()
2045
 
 
2046
 
        def endTagIgnore(self, token):
2047
 
            self.parser.parseError("unexpected-end-tag-in-table-body",
2048
 
                                   {"name": token["name"]})
2049
 
 
2050
 
        def endTagOther(self, token):
2051
 
            return self.parser.phases["inTable"].processEndTag(token)
2052
 
 
2053
 
    class InRowPhase(Phase):
2054
 
        # http://www.whatwg.org/specs/web-apps/current-work/#in-row
2055
 
        def __init__(self, parser, tree):
2056
 
            Phase.__init__(self, parser, tree)
2057
 
            self.startTagHandler = utils.MethodDispatcher([
2058
 
                ("html", self.startTagHtml),
2059
 
                (("td", "th"), self.startTagTableCell),
2060
 
                (("caption", "col", "colgroup", "tbody", "tfoot", "thead",
2061
 
                  "tr"), self.startTagTableOther)
2062
 
            ])
2063
 
            self.startTagHandler.default = self.startTagOther
2064
 
 
2065
 
            self.endTagHandler = utils.MethodDispatcher([
2066
 
                ("tr", self.endTagTr),
2067
 
                ("table", self.endTagTable),
2068
 
                (("tbody", "tfoot", "thead"), self.endTagTableRowGroup),
2069
 
                (("body", "caption", "col", "colgroup", "html", "td", "th"),
2070
 
                 self.endTagIgnore)
2071
 
            ])
2072
 
            self.endTagHandler.default = self.endTagOther
2073
 
 
2074
 
        # helper methods (XXX unify this with other table helper methods)
2075
 
        def clearStackToTableRowContext(self):
2076
 
            while self.tree.openElements[-1].name not in ("tr", "html"):
2077
 
                self.parser.parseError("unexpected-implied-end-tag-in-table-row",
2078
 
                                       {"name": self.tree.openElements[-1].name})
2079
 
                self.tree.openElements.pop()
2080
 
 
2081
 
        def ignoreEndTagTr(self):
2082
 
            return not self.tree.elementInScope("tr", variant="table")
2083
 
 
2084
 
        # the rest
2085
 
        def processEOF(self):
2086
 
            self.parser.phases["inTable"].processEOF()
2087
 
 
2088
 
        def processSpaceCharacters(self, token):
2089
 
            return self.parser.phases["inTable"].processSpaceCharacters(token)
2090
 
 
2091
 
        def processCharacters(self, token):
2092
 
            return self.parser.phases["inTable"].processCharacters(token)
2093
 
 
2094
 
        def startTagTableCell(self, token):
2095
 
            self.clearStackToTableRowContext()
2096
 
            self.tree.insertElement(token)
2097
 
            self.parser.phase = self.parser.phases["inCell"]
2098
 
            self.tree.activeFormattingElements.append(Marker)
2099
 
 
2100
 
        def startTagTableOther(self, token):
2101
 
            ignoreEndTag = self.ignoreEndTagTr()
2102
 
            self.endTagTr(impliedTagToken("tr"))
2103
 
            # XXX how are we sure it's always ignored in the innerHTML case?
2104
 
            if not ignoreEndTag:
2105
 
                return token
2106
 
 
2107
 
        def startTagOther(self, token):
2108
 
            return self.parser.phases["inTable"].processStartTag(token)
2109
 
 
2110
 
        def endTagTr(self, token):
2111
 
            if not self.ignoreEndTagTr():
2112
 
                self.clearStackToTableRowContext()
2113
 
                self.tree.openElements.pop()
2114
 
                self.parser.phase = self.parser.phases["inTableBody"]
2115
 
            else:
2116
 
                # innerHTML case
2117
 
                assert self.parser.innerHTML
2118
 
                self.parser.parseError()
2119
 
 
2120
 
        def endTagTable(self, token):
2121
 
            ignoreEndTag = self.ignoreEndTagTr()
2122
 
            self.endTagTr(impliedTagToken("tr"))
2123
 
            # Reprocess the current tag if the tr end tag was not ignored
2124
 
            # XXX how are we sure it's always ignored in the innerHTML case?
2125
 
            if not ignoreEndTag:
2126
 
                return token
2127
 
 
2128
 
        def endTagTableRowGroup(self, token):
2129
 
            if self.tree.elementInScope(token["name"], variant="table"):
2130
 
                self.endTagTr(impliedTagToken("tr"))
2131
 
                return token
2132
 
            else:
2133
 
                self.parser.parseError()
2134
 
 
2135
 
        def endTagIgnore(self, token):
2136
 
            self.parser.parseError("unexpected-end-tag-in-table-row",
2137
 
                                   {"name": token["name"]})
2138
 
 
2139
 
        def endTagOther(self, token):
2140
 
            return self.parser.phases["inTable"].processEndTag(token)
2141
 
 
2142
 
    class InCellPhase(Phase):
2143
 
        # http://www.whatwg.org/specs/web-apps/current-work/#in-cell
2144
 
        def __init__(self, parser, tree):
2145
 
            Phase.__init__(self, parser, tree)
2146
 
            self.startTagHandler = utils.MethodDispatcher([
2147
 
                ("html", self.startTagHtml),
2148
 
                (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
2149
 
                  "thead", "tr"), self.startTagTableOther)
2150
 
            ])
2151
 
            self.startTagHandler.default = self.startTagOther
2152
 
 
2153
 
            self.endTagHandler = utils.MethodDispatcher([
2154
 
                (("td", "th"), self.endTagTableCell),
2155
 
                (("body", "caption", "col", "colgroup", "html"), self.endTagIgnore),
2156
 
                (("table", "tbody", "tfoot", "thead", "tr"), self.endTagImply)
2157
 
            ])
2158
 
            self.endTagHandler.default = self.endTagOther
2159
 
 
2160
 
        # helper
2161
 
        def closeCell(self):
2162
 
            if self.tree.elementInScope("td", variant="table"):
2163
 
                self.endTagTableCell(impliedTagToken("td"))
2164
 
            elif self.tree.elementInScope("th", variant="table"):
2165
 
                self.endTagTableCell(impliedTagToken("th"))
2166
 
 
2167
 
        # the rest
2168
 
        def processEOF(self):
2169
 
            self.parser.phases["inBody"].processEOF()
2170
 
 
2171
 
        def processCharacters(self, token):
2172
 
            return self.parser.phases["inBody"].processCharacters(token)
2173
 
 
2174
 
        def startTagTableOther(self, token):
2175
 
            if (self.tree.elementInScope("td", variant="table") or
2176
 
                    self.tree.elementInScope("th", variant="table")):
2177
 
                self.closeCell()
2178
 
                return token
2179
 
            else:
2180
 
                # innerHTML case
2181
 
                assert self.parser.innerHTML
2182
 
                self.parser.parseError()
2183
 
 
2184
 
        def startTagOther(self, token):
2185
 
            return self.parser.phases["inBody"].processStartTag(token)
2186
 
 
2187
 
        def endTagTableCell(self, token):
2188
 
            if self.tree.elementInScope(token["name"], variant="table"):
2189
 
                self.tree.generateImpliedEndTags(token["name"])
2190
 
                if self.tree.openElements[-1].name != token["name"]:
2191
 
                    self.parser.parseError("unexpected-cell-end-tag",
2192
 
                                           {"name": token["name"]})
2193
 
                    while True:
2194
 
                        node = self.tree.openElements.pop()
2195
 
                        if node.name == token["name"]:
2196
 
                            break
2197
 
                else:
2198
 
                    self.tree.openElements.pop()
2199
 
                self.tree.clearActiveFormattingElements()
2200
 
                self.parser.phase = self.parser.phases["inRow"]
2201
 
            else:
2202
 
                self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
2203
 
 
2204
 
        def endTagIgnore(self, token):
2205
 
            self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
2206
 
 
2207
 
        def endTagImply(self, token):
2208
 
            if self.tree.elementInScope(token["name"], variant="table"):
2209
 
                self.closeCell()
2210
 
                return token
2211
 
            else:
2212
 
                # sometimes innerHTML case
2213
 
                self.parser.parseError()
2214
 
 
2215
 
        def endTagOther(self, token):
2216
 
            return self.parser.phases["inBody"].processEndTag(token)
2217
 
 
2218
 
    class InSelectPhase(Phase):
2219
 
        def __init__(self, parser, tree):
2220
 
            Phase.__init__(self, parser, tree)
2221
 
 
2222
 
            self.startTagHandler = utils.MethodDispatcher([
2223
 
                ("html", self.startTagHtml),
2224
 
                ("option", self.startTagOption),
2225
 
                ("optgroup", self.startTagOptgroup),
2226
 
                ("select", self.startTagSelect),
2227
 
                (("input", "keygen", "textarea"), self.startTagInput),
2228
 
                ("script", self.startTagScript)
2229
 
            ])
2230
 
            self.startTagHandler.default = self.startTagOther
2231
 
 
2232
 
            self.endTagHandler = utils.MethodDispatcher([
2233
 
                ("option", self.endTagOption),
2234
 
                ("optgroup", self.endTagOptgroup),
2235
 
                ("select", self.endTagSelect)
2236
 
            ])
2237
 
            self.endTagHandler.default = self.endTagOther
2238
 
 
2239
 
        # http://www.whatwg.org/specs/web-apps/current-work/#in-select
2240
 
        def processEOF(self):
2241
 
            if self.tree.openElements[-1].name != "html":
2242
 
                self.parser.parseError("eof-in-select")
2243
 
            else:
2244
 
                assert self.parser.innerHTML
2245
 
 
2246
 
        def processCharacters(self, token):
2247
 
            if token["data"] == "\u0000":
2248
 
                return
2249
 
            self.tree.insertText(token["data"])
2250
 
 
2251
 
        def startTagOption(self, token):
2252
 
            # We need to imply </option> if <option> is the current node.
2253
 
            if self.tree.openElements[-1].name == "option":
2254
 
                self.tree.openElements.pop()
2255
 
            self.tree.insertElement(token)
2256
 
 
2257
 
        def startTagOptgroup(self, token):
2258
 
            if self.tree.openElements[-1].name == "option":
2259
 
                self.tree.openElements.pop()
2260
 
            if self.tree.openElements[-1].name == "optgroup":
2261
 
                self.tree.openElements.pop()
2262
 
            self.tree.insertElement(token)
2263
 
 
2264
 
        def startTagSelect(self, token):
2265
 
            self.parser.parseError("unexpected-select-in-select")
2266
 
            self.endTagSelect(impliedTagToken("select"))
2267
 
 
2268
 
        def startTagInput(self, token):
2269
 
            self.parser.parseError("unexpected-input-in-select")
2270
 
            if self.tree.elementInScope("select", variant="select"):
2271
 
                self.endTagSelect(impliedTagToken("select"))
2272
 
                return token
2273
 
            else:
2274
 
                assert self.parser.innerHTML
2275
 
 
2276
 
        def startTagScript(self, token):
2277
 
            return self.parser.phases["inHead"].processStartTag(token)
2278
 
 
2279
 
        def startTagOther(self, token):
2280
 
            self.parser.parseError("unexpected-start-tag-in-select",
2281
 
                                   {"name": token["name"]})
2282
 
 
2283
 
        def endTagOption(self, token):
2284
 
            if self.tree.openElements[-1].name == "option":
2285
 
                self.tree.openElements.pop()
2286
 
            else:
2287
 
                self.parser.parseError("unexpected-end-tag-in-select",
2288
 
                                       {"name": "option"})
2289
 
 
2290
 
        def endTagOptgroup(self, token):
2291
 
            # </optgroup> implicitly closes <option>
2292
 
            if (self.tree.openElements[-1].name == "option" and
2293
 
                    self.tree.openElements[-2].name == "optgroup"):
2294
 
                self.tree.openElements.pop()
2295
 
            # It also closes </optgroup>
2296
 
            if self.tree.openElements[-1].name == "optgroup":
2297
 
                self.tree.openElements.pop()
2298
 
            # But nothing else
2299
 
            else:
2300
 
                self.parser.parseError("unexpected-end-tag-in-select",
2301
 
                                       {"name": "optgroup"})
2302
 
 
2303
 
        def endTagSelect(self, token):
2304
 
            if self.tree.elementInScope("select", variant="select"):
2305
 
                node = self.tree.openElements.pop()
2306
 
                while node.name != "select":
2307
 
                    node = self.tree.openElements.pop()
2308
 
                self.parser.resetInsertionMode()
2309
 
            else:
2310
 
                # innerHTML case
2311
 
                assert self.parser.innerHTML
2312
 
                self.parser.parseError()
2313
 
 
2314
 
        def endTagOther(self, token):
2315
 
            self.parser.parseError("unexpected-end-tag-in-select",
2316
 
                                   {"name": token["name"]})
2317
 
 
2318
 
    class InSelectInTablePhase(Phase):
2319
 
        def __init__(self, parser, tree):
2320
 
            Phase.__init__(self, parser, tree)
2321
 
 
2322
 
            self.startTagHandler = utils.MethodDispatcher([
2323
 
                (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
2324
 
                 self.startTagTable)
2325
 
            ])
2326
 
            self.startTagHandler.default = self.startTagOther
2327
 
 
2328
 
            self.endTagHandler = utils.MethodDispatcher([
2329
 
                (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
2330
 
                 self.endTagTable)
2331
 
            ])
2332
 
            self.endTagHandler.default = self.endTagOther
2333
 
 
2334
 
        def processEOF(self):
2335
 
            self.parser.phases["inSelect"].processEOF()
2336
 
 
2337
 
        def processCharacters(self, token):
2338
 
            return self.parser.phases["inSelect"].processCharacters(token)
2339
 
 
2340
 
        def startTagTable(self, token):
2341
 
            self.parser.parseError("unexpected-table-element-start-tag-in-select-in-table", {"name": token["name"]})
2342
 
            self.endTagOther(impliedTagToken("select"))
2343
 
            return token
2344
 
 
2345
 
        def startTagOther(self, token):
2346
 
            return self.parser.phases["inSelect"].processStartTag(token)
2347
 
 
2348
 
        def endTagTable(self, token):
2349
 
            self.parser.parseError("unexpected-table-element-end-tag-in-select-in-table", {"name": token["name"]})
2350
 
            if self.tree.elementInScope(token["name"], variant="table"):
2351
 
                self.endTagOther(impliedTagToken("select"))
2352
 
                return token
2353
 
 
2354
 
        def endTagOther(self, token):
2355
 
            return self.parser.phases["inSelect"].processEndTag(token)
2356
 
 
2357
 
    class InForeignContentPhase(Phase):
2358
 
        breakoutElements = frozenset(["b", "big", "blockquote", "body", "br",
2359
 
                                      "center", "code", "dd", "div", "dl", "dt",
2360
 
                                      "em", "embed", "h1", "h2", "h3",
2361
 
                                      "h4", "h5", "h6", "head", "hr", "i", "img",
2362
 
                                      "li", "listing", "menu", "meta", "nobr",
2363
 
                                      "ol", "p", "pre", "ruby", "s", "small",
2364
 
                                      "span", "strong", "strike", "sub", "sup",
2365
 
                                      "table", "tt", "u", "ul", "var"])
2366
 
 
2367
 
        def __init__(self, parser, tree):
2368
 
            Phase.__init__(self, parser, tree)
2369
 
 
2370
 
        def adjustSVGTagNames(self, token):
2371
 
            replacements = {"altglyph": "altGlyph",
2372
 
                            "altglyphdef": "altGlyphDef",
2373
 
                            "altglyphitem": "altGlyphItem",
2374
 
                            "animatecolor": "animateColor",
2375
 
                            "animatemotion": "animateMotion",
2376
 
                            "animatetransform": "animateTransform",
2377
 
                            "clippath": "clipPath",
2378
 
                            "feblend": "feBlend",
2379
 
                            "fecolormatrix": "feColorMatrix",
2380
 
                            "fecomponenttransfer": "feComponentTransfer",
2381
 
                            "fecomposite": "feComposite",
2382
 
                            "feconvolvematrix": "feConvolveMatrix",
2383
 
                            "fediffuselighting": "feDiffuseLighting",
2384
 
                            "fedisplacementmap": "feDisplacementMap",
2385
 
                            "fedistantlight": "feDistantLight",
2386
 
                            "feflood": "feFlood",
2387
 
                            "fefunca": "feFuncA",
2388
 
                            "fefuncb": "feFuncB",
2389
 
                            "fefuncg": "feFuncG",
2390
 
                            "fefuncr": "feFuncR",
2391
 
                            "fegaussianblur": "feGaussianBlur",
2392
 
                            "feimage": "feImage",
2393
 
                            "femerge": "feMerge",
2394
 
                            "femergenode": "feMergeNode",
2395
 
                            "femorphology": "feMorphology",
2396
 
                            "feoffset": "feOffset",
2397
 
                            "fepointlight": "fePointLight",
2398
 
                            "fespecularlighting": "feSpecularLighting",
2399
 
                            "fespotlight": "feSpotLight",
2400
 
                            "fetile": "feTile",
2401
 
                            "feturbulence": "feTurbulence",
2402
 
                            "foreignobject": "foreignObject",
2403
 
                            "glyphref": "glyphRef",
2404
 
                            "lineargradient": "linearGradient",
2405
 
                            "radialgradient": "radialGradient",
2406
 
                            "textpath": "textPath"}
2407
 
 
2408
 
            if token["name"] in replacements:
2409
 
                token["name"] = replacements[token["name"]]
2410
 
 
2411
 
        def processCharacters(self, token):
2412
 
            if token["data"] == "\u0000":
2413
 
                token["data"] = "\uFFFD"
2414
 
            elif (self.parser.framesetOK and
2415
 
                  any(char not in spaceCharacters for char in token["data"])):
2416
 
                self.parser.framesetOK = False
2417
 
            Phase.processCharacters(self, token)
2418
 
 
2419
 
        def processStartTag(self, token):
2420
 
            currentNode = self.tree.openElements[-1]
2421
 
            if (token["name"] in self.breakoutElements or
2422
 
                (token["name"] == "font" and
2423
 
                 set(token["data"].keys()) & set(["color", "face", "size"]))):
2424
 
                self.parser.parseError("unexpected-html-element-in-foreign-content",
2425
 
                                       {"name": token["name"]})
2426
 
                while (self.tree.openElements[-1].namespace !=
2427
 
                       self.tree.defaultNamespace and
2428
 
                       not self.parser.isHTMLIntegrationPoint(self.tree.openElements[-1]) and
2429
 
                       not self.parser.isMathMLTextIntegrationPoint(self.tree.openElements[-1])):
2430
 
                    self.tree.openElements.pop()
2431
 
                return token
2432
 
 
2433
 
            else:
2434
 
                if currentNode.namespace == namespaces["mathml"]:
2435
 
                    self.parser.adjustMathMLAttributes(token)
2436
 
                elif currentNode.namespace == namespaces["svg"]:
2437
 
                    self.adjustSVGTagNames(token)
2438
 
                    self.parser.adjustSVGAttributes(token)
2439
 
                self.parser.adjustForeignAttributes(token)
2440
 
                token["namespace"] = currentNode.namespace
2441
 
                self.tree.insertElement(token)
2442
 
                if token["selfClosing"]:
2443
 
                    self.tree.openElements.pop()
2444
 
                    token["selfClosingAcknowledged"] = True
2445
 
 
2446
 
        def processEndTag(self, token):
2447
 
            nodeIndex = len(self.tree.openElements) - 1
2448
 
            node = self.tree.openElements[-1]
2449
 
            if node.name != token["name"]:
2450
 
                self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
2451
 
 
2452
 
            while True:
2453
 
                if node.name.translate(asciiUpper2Lower) == token["name"]:
2454
 
                    # XXX this isn't in the spec but it seems necessary
2455
 
                    if self.parser.phase == self.parser.phases["inTableText"]:
2456
 
                        self.parser.phase.flushCharacters()
2457
 
                        self.parser.phase = self.parser.phase.originalPhase
2458
 
                    while self.tree.openElements.pop() != node:
2459
 
                        assert self.tree.openElements
2460
 
                    new_token = None
2461
 
                    break
2462
 
                nodeIndex -= 1
2463
 
 
2464
 
                node = self.tree.openElements[nodeIndex]
2465
 
                if node.namespace != self.tree.defaultNamespace:
2466
 
                    continue
2467
 
                else:
2468
 
                    new_token = self.parser.phase.processEndTag(token)
2469
 
                    break
2470
 
            return new_token
2471
 
 
2472
 
    class AfterBodyPhase(Phase):
2473
 
        def __init__(self, parser, tree):
2474
 
            Phase.__init__(self, parser, tree)
2475
 
 
2476
 
            self.startTagHandler = utils.MethodDispatcher([
2477
 
                ("html", self.startTagHtml)
2478
 
            ])
2479
 
            self.startTagHandler.default = self.startTagOther
2480
 
 
2481
 
            self.endTagHandler = utils.MethodDispatcher([("html", self.endTagHtml)])
2482
 
            self.endTagHandler.default = self.endTagOther
2483
 
 
2484
 
        def processEOF(self):
2485
 
            # Stop parsing
2486
 
            pass
2487
 
 
2488
 
        def processComment(self, token):
2489
 
            # This is needed because data is to be appended to the <html> element
2490
 
            # here and not to whatever is currently open.
2491
 
            self.tree.insertComment(token, self.tree.openElements[0])
2492
 
 
2493
 
        def processCharacters(self, token):
2494
 
            self.parser.parseError("unexpected-char-after-body")
2495
 
            self.parser.phase = self.parser.phases["inBody"]
2496
 
            return token
2497
 
 
2498
 
        def startTagHtml(self, token):
2499
 
            return self.parser.phases["inBody"].processStartTag(token)
2500
 
 
2501
 
        def startTagOther(self, token):
2502
 
            self.parser.parseError("unexpected-start-tag-after-body",
2503
 
                                   {"name": token["name"]})
2504
 
            self.parser.phase = self.parser.phases["inBody"]
2505
 
            return token
2506
 
 
2507
 
        def endTagHtml(self, name):
2508
 
            if self.parser.innerHTML:
2509
 
                self.parser.parseError("unexpected-end-tag-after-body-innerhtml")
2510
 
            else:
2511
 
                self.parser.phase = self.parser.phases["afterAfterBody"]
2512
 
 
2513
 
        def endTagOther(self, token):
2514
 
            self.parser.parseError("unexpected-end-tag-after-body",
2515
 
                                   {"name": token["name"]})
2516
 
            self.parser.phase = self.parser.phases["inBody"]
2517
 
            return token
2518
 
 
2519
 
    class InFramesetPhase(Phase):
2520
 
        # http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
2521
 
        def __init__(self, parser, tree):
2522
 
            Phase.__init__(self, parser, tree)
2523
 
 
2524
 
            self.startTagHandler = utils.MethodDispatcher([
2525
 
                ("html", self.startTagHtml),
2526
 
                ("frameset", self.startTagFrameset),
2527
 
                ("frame", self.startTagFrame),
2528
 
                ("noframes", self.startTagNoframes)
2529
 
            ])
2530
 
            self.startTagHandler.default = self.startTagOther
2531
 
 
2532
 
            self.endTagHandler = utils.MethodDispatcher([
2533
 
                ("frameset", self.endTagFrameset)
2534
 
            ])
2535
 
            self.endTagHandler.default = self.endTagOther
2536
 
 
2537
 
        def processEOF(self):
2538
 
            if self.tree.openElements[-1].name != "html":
2539
 
                self.parser.parseError("eof-in-frameset")
2540
 
            else:
2541
 
                assert self.parser.innerHTML
2542
 
 
2543
 
        def processCharacters(self, token):
2544
 
            self.parser.parseError("unexpected-char-in-frameset")
2545
 
 
2546
 
        def startTagFrameset(self, token):
2547
 
            self.tree.insertElement(token)
2548
 
 
2549
 
        def startTagFrame(self, token):
2550
 
            self.tree.insertElement(token)
2551
 
            self.tree.openElements.pop()
2552
 
 
2553
 
        def startTagNoframes(self, token):
2554
 
            return self.parser.phases["inBody"].processStartTag(token)
2555
 
 
2556
 
        def startTagOther(self, token):
2557
 
            self.parser.parseError("unexpected-start-tag-in-frameset",
2558
 
                                   {"name": token["name"]})
2559
 
 
2560
 
        def endTagFrameset(self, token):
2561
 
            if self.tree.openElements[-1].name == "html":
2562
 
                # innerHTML case
2563
 
                self.parser.parseError("unexpected-frameset-in-frameset-innerhtml")
2564
 
            else:
2565
 
                self.tree.openElements.pop()
2566
 
            if (not self.parser.innerHTML and
2567
 
                    self.tree.openElements[-1].name != "frameset"):
2568
 
                # If we're not in innerHTML mode and the the current node is not a
2569
 
                # "frameset" element (anymore) then switch.
2570
 
                self.parser.phase = self.parser.phases["afterFrameset"]
2571
 
 
2572
 
        def endTagOther(self, token):
2573
 
            self.parser.parseError("unexpected-end-tag-in-frameset",
2574
 
                                   {"name": token["name"]})
2575
 
 
2576
 
    class AfterFramesetPhase(Phase):
2577
 
        # http://www.whatwg.org/specs/web-apps/current-work/#after3
2578
 
        def __init__(self, parser, tree):
2579
 
            Phase.__init__(self, parser, tree)
2580
 
 
2581
 
            self.startTagHandler = utils.MethodDispatcher([
2582
 
                ("html", self.startTagHtml),
2583
 
                ("noframes", self.startTagNoframes)
2584
 
            ])
2585
 
            self.startTagHandler.default = self.startTagOther
2586
 
 
2587
 
            self.endTagHandler = utils.MethodDispatcher([
2588
 
                ("html", self.endTagHtml)
2589
 
            ])
2590
 
            self.endTagHandler.default = self.endTagOther
2591
 
 
2592
 
        def processEOF(self):
2593
 
            # Stop parsing
2594
 
            pass
2595
 
 
2596
 
        def processCharacters(self, token):
2597
 
            self.parser.parseError("unexpected-char-after-frameset")
2598
 
 
2599
 
        def startTagNoframes(self, token):
2600
 
            return self.parser.phases["inHead"].processStartTag(token)
2601
 
 
2602
 
        def startTagOther(self, token):
2603
 
            self.parser.parseError("unexpected-start-tag-after-frameset",
2604
 
                                   {"name": token["name"]})
2605
 
 
2606
 
        def endTagHtml(self, token):
2607
 
            self.parser.phase = self.parser.phases["afterAfterFrameset"]
2608
 
 
2609
 
        def endTagOther(self, token):
2610
 
            self.parser.parseError("unexpected-end-tag-after-frameset",
2611
 
                                   {"name": token["name"]})
2612
 
 
2613
 
    class AfterAfterBodyPhase(Phase):
2614
 
        def __init__(self, parser, tree):
2615
 
            Phase.__init__(self, parser, tree)
2616
 
 
2617
 
            self.startTagHandler = utils.MethodDispatcher([
2618
 
                ("html", self.startTagHtml)
2619
 
            ])
2620
 
            self.startTagHandler.default = self.startTagOther
2621
 
 
2622
 
        def processEOF(self):
2623
 
            pass
2624
 
 
2625
 
        def processComment(self, token):
2626
 
            self.tree.insertComment(token, self.tree.document)
2627
 
 
2628
 
        def processSpaceCharacters(self, token):
2629
 
            return self.parser.phases["inBody"].processSpaceCharacters(token)
2630
 
 
2631
 
        def processCharacters(self, token):
2632
 
            self.parser.parseError("expected-eof-but-got-char")
2633
 
            self.parser.phase = self.parser.phases["inBody"]
2634
 
            return token
2635
 
 
2636
 
        def startTagHtml(self, token):
2637
 
            return self.parser.phases["inBody"].processStartTag(token)
2638
 
 
2639
 
        def startTagOther(self, token):
2640
 
            self.parser.parseError("expected-eof-but-got-start-tag",
2641
 
                                   {"name": token["name"]})
2642
 
            self.parser.phase = self.parser.phases["inBody"]
2643
 
            return token
2644
 
 
2645
 
        def processEndTag(self, token):
2646
 
            self.parser.parseError("expected-eof-but-got-end-tag",
2647
 
                                   {"name": token["name"]})
2648
 
            self.parser.phase = self.parser.phases["inBody"]
2649
 
            return token
2650
 
 
2651
 
    class AfterAfterFramesetPhase(Phase):
2652
 
        def __init__(self, parser, tree):
2653
 
            Phase.__init__(self, parser, tree)
2654
 
 
2655
 
            self.startTagHandler = utils.MethodDispatcher([
2656
 
                ("html", self.startTagHtml),
2657
 
                ("noframes", self.startTagNoFrames)
2658
 
            ])
2659
 
            self.startTagHandler.default = self.startTagOther
2660
 
 
2661
 
        def processEOF(self):
2662
 
            pass
2663
 
 
2664
 
        def processComment(self, token):
2665
 
            self.tree.insertComment(token, self.tree.document)
2666
 
 
2667
 
        def processSpaceCharacters(self, token):
2668
 
            return self.parser.phases["inBody"].processSpaceCharacters(token)
2669
 
 
2670
 
        def processCharacters(self, token):
2671
 
            self.parser.parseError("expected-eof-but-got-char")
2672
 
 
2673
 
        def startTagHtml(self, token):
2674
 
            return self.parser.phases["inBody"].processStartTag(token)
2675
 
 
2676
 
        def startTagNoFrames(self, token):
2677
 
            return self.parser.phases["inHead"].processStartTag(token)
2678
 
 
2679
 
        def startTagOther(self, token):
2680
 
            self.parser.parseError("expected-eof-but-got-start-tag",
2681
 
                                   {"name": token["name"]})
2682
 
 
2683
 
        def processEndTag(self, token):
2684
 
            self.parser.parseError("expected-eof-but-got-end-tag",
2685
 
                                   {"name": token["name"]})
2686
 
 
2687
 
    return {
2688
 
        "initial": InitialPhase,
2689
 
        "beforeHtml": BeforeHtmlPhase,
2690
 
        "beforeHead": BeforeHeadPhase,
2691
 
        "inHead": InHeadPhase,
2692
 
        # XXX "inHeadNoscript": InHeadNoScriptPhase,
2693
 
        "afterHead": AfterHeadPhase,
2694
 
        "inBody": InBodyPhase,
2695
 
        "text": TextPhase,
2696
 
        "inTable": InTablePhase,
2697
 
        "inTableText": InTableTextPhase,
2698
 
        "inCaption": InCaptionPhase,
2699
 
        "inColumnGroup": InColumnGroupPhase,
2700
 
        "inTableBody": InTableBodyPhase,
2701
 
        "inRow": InRowPhase,
2702
 
        "inCell": InCellPhase,
2703
 
        "inSelect": InSelectPhase,
2704
 
        "inSelectInTable": InSelectInTablePhase,
2705
 
        "inForeignContent": InForeignContentPhase,
2706
 
        "afterBody": AfterBodyPhase,
2707
 
        "inFrameset": InFramesetPhase,
2708
 
        "afterFrameset": AfterFramesetPhase,
2709
 
        "afterAfterBody": AfterAfterBodyPhase,
2710
 
        "afterAfterFrameset": AfterAfterFramesetPhase,
2711
 
        # XXX after after frameset
2712
 
    }
2713
 
 
2714
 
 
2715
 
def impliedTagToken(name, type="EndTag", attributes=None,
2716
 
                    selfClosing=False):
2717
 
    if attributes is None:
2718
 
        attributes = {}
2719
 
    return {"type": tokenTypes[type], "name": name, "data": attributes,
2720
 
            "selfClosing": selfClosing}
2721
 
 
2722
 
 
2723
 
class ParseError(Exception):
2724
 
    """Error in parsed document"""
2725
 
    pass