1
# Copyright (c) 2004-2009 Divmod.
2
# See LICENSE for details.
4
from xml.sax import make_parser, handler
7
from nevow.stan import xml, Tag, directive, slot
10
## Require PyXML 0.8.2 or later, or, if PyXML isn't installed
11
## python2.3 or later, because that includes approximately the
12
## same code (but doesn't share a version number *!@#$@!@#)
15
## pyxml package has a version_info attribute
16
bad_version = pyxml.version_info < (0,8,2)
17
## before 0.8.3, startDTD was passed the args in the wrong order
18
bad_startdtd_args = pyxml.version_info < (0,8,3)
20
## we're using core python xml library
22
bad_version = sys.version_info < (2,3)
23
# python < 2.4 has the startDTD bug
24
bad_startdtd_args = sys.version_info < (2,4)
27
class nscontext(object):
29
def __init__(self, parent=None):
31
if parent is not None:
32
self.nss = dict(parent.nss)
34
self.nss = {'http://www.w3.org/XML/1998/namespace':'xml'}
36
def get(self, k, d=None):
37
return self.nss.get(k, d)
39
def __setitem__(self, k, v):
40
self.nss.__setitem__(k, v)
42
def __getitem__(self, k):
43
return self.nss.__getitem__(k)
46
class ToStan(handler.ContentHandler, handler.EntityResolver):
56
def __init__(self, ignoreDocType, ignoreComment, sourceFilename):
57
self.ignoreDocType = ignoreDocType
58
self.ignoreComment = ignoreComment
59
self.sourceFilename = sourceFilename
60
self.prefixMap = nscontext()
64
def setDocumentLocator(self, locator):
65
self.locator = locator
67
def resolveEntity(self, publicId, systemId):
68
## This doesn't seem to get called, which is good.
69
raise Exception("resolveEntity should not be called. We don't use external DTDs.")
71
def skippedEntity(self, name):
72
self.current.append(xml("&%s;"%name))
74
def startDocument(self):
76
self.current = self.document
80
def endDocument(self):
83
def processingInstruction(self, target, data):
84
self.current.append(xml("<?%s %s?>\n" % (target, data)))
86
def startPrefixMapping(self, prefix, uri):
88
self.prefixMap = nscontext(self.prefixMap)
89
self.prefixMap[uri] = prefix
91
# Ignore Nevow's namespace, we'll replace those during parsing.
92
if uri == nevow.namespace:
95
# Add to a list that will be applied once we have the element.
97
self.xmlnsAttrs.append(('xmlns',uri))
99
self.xmlnsAttrs.append(('xmlns:%s'%prefix,uri))
101
def endPrefixMapping(self, prefix):
102
self.prefixMap = self.prefixMap.parent
104
def startElementNS(self, ns_and_name, qname, attrs):
106
filename = self.sourceFilename
107
lineNumber = self.locator.getLineNumber()
108
columnNumber = self.locator.getColumnNumber()
110
ns, name = ns_and_name
111
if ns == nevow.namespace:
112
if name == 'invisible':
116
# Try to get the default value for the slot
117
default = attrs[(None, 'default')]
119
# If there wasn't one, then use None to indicate no
123
attrs[(None, 'name')], default=default,
124
filename=filename, lineNumber=lineNumber,
125
columnNumber=columnNumber)
126
self.stack.append(el)
127
self.current.append(el)
128
self.current = el.children
133
attributes = self.attributeList
134
directives = self.directiveMapping
135
for k, v in attrs.items():
137
if att_ns != nevow.namespace:
139
if nons in directives:
140
## clean this up by making the names more consistent
141
specials[directives[nons]] = directive(v)
143
if nons in attributes:
148
for (attrNs, attrName), v in attrs.items():
149
nsPrefix = self.prefixMap.get(attrNs)
151
no_ns_attrs[attrName] = v
153
no_ns_attrs['%s:%s'%(nsPrefix,attrName)] = v
155
if ns == nevow.namespace and name == 'attr':
157
# TODO: define a better exception for this?
158
raise AssertionError( '<nevow:attr> as top-level element' )
159
if 'name' not in no_ns_attrs:
161
raise AssertionError( '<nevow:attr> requires a name attribute' )
162
el = Tag('', specials=specials, filename=filename,
163
lineNumber=lineNumber, columnNumber=columnNumber)
164
self.stack[-1].attributes[no_ns_attrs['name']] = el
165
self.stack.append(el)
166
self.current = el.children
169
# Apply any xmlns attributes
171
no_ns_attrs.update(dict(self.xmlnsAttrs))
174
# Add the prefix that was used in the parsed template for non-Nevow
175
# namespaces (which Nevow will consume anyway).
176
if ns != nevow.namespace and ns is not None:
177
prefix = self.prefixMap[ns]
178
if prefix is not None:
179
name = '%s:%s' % (self.prefixMap[ns],name)
181
name, attributes=dict(no_ns_attrs), specials=specials,
182
filename=filename, lineNumber=lineNumber,
183
columnNumber=columnNumber)
184
self.stack.append(el)
185
self.current.append(el)
186
self.current = el.children
188
def characters(self, ch):
189
# CDATA characters should be passed through as is.
192
self.current.append(ch)
194
def endElementNS(self, name, qname):
195
me = self.stack.pop()
197
self.current = self.stack[-1].children
199
self.current = self.document
201
def startDTD(self, name, publicId, systemId):
202
if self.ignoreDocType:
204
# Check for broken startDTD
205
if bad_startdtd_args:
206
systemId, publicId = publicId, systemId
207
doctype = '<!DOCTYPE %s\n PUBLIC "%s"\n "%s">\n' % (name, publicId, systemId)
208
self.current.append(xml(doctype))
210
def endDTD(self, *args):
213
def startCDATA(self):
215
self.current.append(xml('<![CDATA['))
219
self.current.append(xml(']]>'))
221
def comment(self, content):
222
if self.ignoreComment:
224
self.current.append( (xml('<!--'),xml(content),xml('-->')) )
227
def parse(fl, ignoreDocType=False, ignoreComment=False):
228
## Earlier PyXMLs don't handle non-standard entities (e.g. ©)
229
## correctly. They will either give an error or simply ignore the
230
## entity producing bad output.
233
raise Exception("Please use PyXML later than 0.8.2 or python later than 2.3. Earlier ones are too buggy.")
235
parser = make_parser()
236
parser.setFeature(handler.feature_validation, 0)
237
parser.setFeature(handler.feature_namespaces, 1)
238
parser.setFeature(handler.feature_external_ges, 0)
239
parser.setFeature(handler.feature_external_pes, 0)
241
s = ToStan(ignoreDocType, ignoreComment, getattr(fl, "name", None))
242
parser.setContentHandler(s)
243
parser.setEntityResolver(s)
244
parser.setProperty(handler.property_lexical_handler, s)
250
def parseString(t, ignoreDocType=False, ignoreComment=False):
251
from cStringIO import StringIO
252
return parse(StringIO(t), ignoreDocType, ignoreComment)