3
# Add html4 serialization to older versions of Elementree
4
# Taken from ElementTree 1.3 preview with slight modifications
6
# Copyright (c) 1999-2007 by Fredrik Lundh. All rights reserved.
8
# fredrik@pythonware.com
9
# http://www.pythonware.com
11
# --------------------------------------------------------------------
12
# The ElementTree toolkit is
14
# Copyright (c) 1999-2007 by Fredrik Lundh
16
# By obtaining, using, and/or copying this software and/or its
17
# associated documentation, you agree that you have read, understood,
18
# and will comply with the following terms and conditions:
20
# Permission to use, copy, modify, and distribute this software and
21
# its associated documentation for any purpose and without fee is
22
# hereby granted, provided that the above copyright notice appears in
23
# all copies, and that both that copyright notice and this permission
24
# notice appear in supporting documentation, and that the name of
25
# Secret Labs AB or the author not be used in advertising or publicity
26
# pertaining to distribution of the software without specific, written
29
# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
30
# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
31
# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
32
# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
33
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
34
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
35
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
37
# --------------------------------------------------------------------
41
ElementTree = markdown.etree.ElementTree
42
QName = markdown.etree.QName
43
Comment = markdown.etree.Comment
44
PI = markdown.etree.PI
45
ProcessingInstruction = markdown.etree.ProcessingInstruction
47
HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
48
"img", "input", "isindex", "link", "meta" "param")
51
HTML_EMPTY = set(HTML_EMPTY)
56
# "well-known" namespace prefixes
57
"http://www.w3.org/XML/1998/namespace": "xml",
58
"http://www.w3.org/1999/xhtml": "html",
59
"http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
60
"http://schemas.xmlsoap.org/wsdl/": "wsdl",
62
"http://www.w3.org/2001/XMLSchema": "xs",
63
"http://www.w3.org/2001/XMLSchema-instance": "xsi",
65
"http://purl.org/dc/elements/1.1/": "dc",
69
def _raise_serialization_error(text):
71
"cannot serialize %r (type %s)" % (text, type(text).__name__)
74
def _encode(text, encoding):
76
return text.encode(encoding, "xmlcharrefreplace")
77
except (TypeError, AttributeError):
78
_raise_serialization_error(text)
80
def _escape_cdata(text, encoding):
81
# escape character data
83
# it's worth avoiding do-nothing calls for strings that are
84
# shorter than 500 character, or so. assume that's, by far,
85
# the most common case in most applications.
87
text = text.replace("&", "&")
89
text = text.replace("<", "<")
91
text = text.replace(">", ">")
92
return text.encode(encoding, "xmlcharrefreplace")
93
except (TypeError, AttributeError):
94
_raise_serialization_error(text)
97
def _escape_attrib(text, encoding):
98
# escape attribute value
101
text = text.replace("&", "&")
103
text = text.replace("<", "<")
105
text = text.replace(">", ">")
107
text = text.replace("\"", """)
109
text = text.replace("\n", " ")
110
return text.encode(encoding, "xmlcharrefreplace")
111
except (TypeError, AttributeError):
112
_raise_serialization_error(text)
114
def _escape_attrib_html(text, encoding):
115
# escape attribute value
118
text = text.replace("&", "&")
120
text = text.replace(">", ">")
122
text = text.replace("\"", """)
123
return text.encode(encoding, "xmlcharrefreplace")
124
except (TypeError, AttributeError):
125
_raise_serialization_error(text)
128
def _serialize_html(write, elem, encoding, qnames, namespaces):
132
write("<!--%s-->" % _escape_cdata(text, encoding))
133
elif tag is ProcessingInstruction:
134
write("<?%s?>" % _escape_cdata(text, encoding))
139
write(_escape_cdata(text, encoding))
141
_serialize_html(write, e, encoding, qnames, None)
145
if items or namespaces:
146
items.sort() # lexical order
148
if isinstance(k, QName):
150
if isinstance(v, QName):
153
v = _escape_attrib_html(v, encoding)
154
# FIXME: handle boolean attributes
155
write(" %s=\"%s\"" % (qnames[k], v))
157
items = namespaces.items()
158
items.sort(key=lambda x: x[1]) # sort on prefix
162
write(" xmlns%s=\"%s\"" % (
164
_escape_attrib(v, encoding)
169
if tag == "script" or tag == "style":
170
write(_encode(text, encoding))
172
write(_escape_cdata(text, encoding))
174
_serialize_html(write, e, encoding, qnames, None)
175
if tag not in HTML_EMPTY:
176
write("</" + tag + ">")
178
write(_escape_cdata(elem.tail, encoding))
180
def write_html(root, f,
183
default_namespace=None):
184
assert root is not None
185
if not hasattr(f, "write"):
189
encoding = "us-ascii"
190
qnames, namespaces = _namespaces(
191
root, encoding, default_namespace
194
write, root, encoding, qnames, namespaces
197
# --------------------------------------------------------------------
198
# serialization support
200
def _namespaces(elem, encoding, default_namespace=None):
201
# identify namespaces used in this tree
203
# maps qnames to *encoded* prefix:local names
204
qnames = {None: None}
206
# maps uri:s to prefixes
208
if default_namespace:
209
namespaces[default_namespace] = ""
212
return text.encode(encoding)
214
def add_qname(qname):
215
# calculate serialized qname representation
218
uri, tag = qname[1:].split("}", 1)
219
prefix = namespaces.get(uri)
221
prefix = _namespace_map.get(uri)
223
prefix = "ns%d" % len(namespaces)
225
namespaces[uri] = prefix
227
qnames[qname] = encode("%s:%s" % (prefix, tag))
229
qnames[qname] = encode(tag) # default element
231
if default_namespace:
232
# FIXME: can this be handled in XML 1.0?
234
"cannot use non-qualified names with "
235
"default_namespace option"
237
qnames[qname] = encode(qname)
239
_raise_serialization_error(qname)
241
# populate qname and namespaces table
244
except AttributeError:
245
iterate = elem.getiterator # cET compatibility
246
for elem in iterate():
248
if isinstance(tag, QName) and tag.text not in qnames:
250
elif isinstance(tag, basestring):
251
if tag not in qnames:
253
elif tag is not None and tag is not Comment and tag is not PI:
254
_raise_serialization_error(tag)
255
for key, value in elem.items():
256
if isinstance(key, QName):
258
if key not in qnames:
260
if isinstance(value, QName) and value.text not in qnames:
261
add_qname(value.text)
263
if isinstance(text, QName) and text.text not in qnames:
265
return qnames, namespaces
267
def to_html_string(element, encoding=None):
272
file.write = data.append
273
write_html(ElementTree(element).getroot(),file,encoding)