~ibmcharmers/charms/xenial/ibm-cinder-storwize-svc/trunk

« back to all changes in this revision

Viewing changes to .tox/py35/lib/python3.5/site-packages/pip/_vendor/html5lib/treewalkers/__init__.py

  • Committer: Ankammarao
  • Date: 2017-03-06 05:11:42 UTC
  • Revision ID: achittet@in.ibm.com-20170306051142-dpg27z4es1k56hfn
Marked tests folder executable

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
"""A collection of modules for iterating through different kinds of
 
2
tree, generating tokens identical to those produced by the tokenizer
 
3
module.
 
4
 
 
5
To create a tree walker for a new type of tree, you need to do
 
6
implement a tree walker object (called TreeWalker by convention) that
 
7
implements a 'serialize' method taking a tree as sole argument and
 
8
returning an iterator generating tokens.
 
9
"""
 
10
 
 
11
from __future__ import absolute_import, division, unicode_literals
 
12
 
 
13
from .. import constants
 
14
from .._utils import default_etree
 
15
 
 
16
__all__ = ["getTreeWalker", "pprint", "dom", "etree", "genshi", "etree_lxml"]
 
17
 
 
18
treeWalkerCache = {}
 
19
 
 
20
 
 
21
def getTreeWalker(treeType, implementation=None, **kwargs):
 
22
    """Get a TreeWalker class for various types of tree with built-in support
 
23
 
 
24
    Args:
 
25
        treeType (str): the name of the tree type required (case-insensitive).
 
26
            Supported values are:
 
27
 
 
28
            - "dom": The xml.dom.minidom DOM implementation
 
29
            - "etree": A generic walker for tree implementations exposing an
 
30
                       elementtree-like interface (known to work with
 
31
                       ElementTree, cElementTree and lxml.etree).
 
32
            - "lxml": Optimized walker for lxml.etree
 
33
            - "genshi": a Genshi stream
 
34
 
 
35
        Implementation: A module implementing the tree type e.g.
 
36
            xml.etree.ElementTree or cElementTree (Currently applies to the
 
37
            "etree" tree type only).
 
38
    """
 
39
 
 
40
    treeType = treeType.lower()
 
41
    if treeType not in treeWalkerCache:
 
42
        if treeType == "dom":
 
43
            from . import dom
 
44
            treeWalkerCache[treeType] = dom.TreeWalker
 
45
        elif treeType == "genshi":
 
46
            from . import genshi
 
47
            treeWalkerCache[treeType] = genshi.TreeWalker
 
48
        elif treeType == "lxml":
 
49
            from . import etree_lxml
 
50
            treeWalkerCache[treeType] = etree_lxml.TreeWalker
 
51
        elif treeType == "etree":
 
52
            from . import etree
 
53
            if implementation is None:
 
54
                implementation = default_etree
 
55
            # XXX: NEVER cache here, caching is done in the etree submodule
 
56
            return etree.getETreeModule(implementation, **kwargs).TreeWalker
 
57
    return treeWalkerCache.get(treeType)
 
58
 
 
59
 
 
60
def concatenateCharacterTokens(tokens):
 
61
    pendingCharacters = []
 
62
    for token in tokens:
 
63
        type = token["type"]
 
64
        if type in ("Characters", "SpaceCharacters"):
 
65
            pendingCharacters.append(token["data"])
 
66
        else:
 
67
            if pendingCharacters:
 
68
                yield {"type": "Characters", "data": "".join(pendingCharacters)}
 
69
                pendingCharacters = []
 
70
            yield token
 
71
    if pendingCharacters:
 
72
        yield {"type": "Characters", "data": "".join(pendingCharacters)}
 
73
 
 
74
 
 
75
def pprint(walker):
 
76
    """Pretty printer for tree walkers"""
 
77
    output = []
 
78
    indent = 0
 
79
    for token in concatenateCharacterTokens(walker):
 
80
        type = token["type"]
 
81
        if type in ("StartTag", "EmptyTag"):
 
82
            # tag name
 
83
            if token["namespace"] and token["namespace"] != constants.namespaces["html"]:
 
84
                if token["namespace"] in constants.prefixes:
 
85
                    ns = constants.prefixes[token["namespace"]]
 
86
                else:
 
87
                    ns = token["namespace"]
 
88
                name = "%s %s" % (ns, token["name"])
 
89
            else:
 
90
                name = token["name"]
 
91
            output.append("%s<%s>" % (" " * indent, name))
 
92
            indent += 2
 
93
            # attributes (sorted for consistent ordering)
 
94
            attrs = token["data"]
 
95
            for (namespace, localname), value in sorted(attrs.items()):
 
96
                if namespace:
 
97
                    if namespace in constants.prefixes:
 
98
                        ns = constants.prefixes[namespace]
 
99
                    else:
 
100
                        ns = namespace
 
101
                    name = "%s %s" % (ns, localname)
 
102
                else:
 
103
                    name = localname
 
104
                output.append("%s%s=\"%s\"" % (" " * indent, name, value))
 
105
            # self-closing
 
106
            if type == "EmptyTag":
 
107
                indent -= 2
 
108
 
 
109
        elif type == "EndTag":
 
110
            indent -= 2
 
111
 
 
112
        elif type == "Comment":
 
113
            output.append("%s<!-- %s -->" % (" " * indent, token["data"]))
 
114
 
 
115
        elif type == "Doctype":
 
116
            if token["name"]:
 
117
                if token["publicId"]:
 
118
                    output.append("""%s<!DOCTYPE %s "%s" "%s">""" %
 
119
                                  (" " * indent,
 
120
                                   token["name"],
 
121
                                   token["publicId"],
 
122
                                   token["systemId"] if token["systemId"] else ""))
 
123
                elif token["systemId"]:
 
124
                    output.append("""%s<!DOCTYPE %s "" "%s">""" %
 
125
                                  (" " * indent,
 
126
                                   token["name"],
 
127
                                   token["systemId"]))
 
128
                else:
 
129
                    output.append("%s<!DOCTYPE %s>" % (" " * indent,
 
130
                                                       token["name"]))
 
131
            else:
 
132
                output.append("%s<!DOCTYPE >" % (" " * indent,))
 
133
 
 
134
        elif type == "Characters":
 
135
            output.append("%s\"%s\"" % (" " * indent, token["data"]))
 
136
 
 
137
        elif type == "SpaceCharacters":
 
138
            assert False, "concatenateCharacterTokens should have got rid of all Space tokens"
 
139
 
 
140
        else:
 
141
            raise ValueError("Unknown token type, %s" % type)
 
142
 
 
143
    return "\n".join(output)