2
module for generating and serializing xml and html structures
3
by using simple python objects.
5
(c) holger krekel, holger at merlinux eu. 2009
9
if sys.version_info >= (3,0):
12
def unicode(x, errors=None):
13
if hasattr(x, '__unicode__'):
14
return x.__unicode__()
22
class NamespaceMetaclass(type):
23
def __getattr__(self, name):
25
raise AttributeError(name)
27
raise ValueError("Namespace class is abstract")
28
tagspec = self.__tagspec__
29
if tagspec is not None and name not in tagspec:
30
raise AttributeError(name)
32
if self.__stickyname__:
33
classattr['xmlname'] = name
34
cls = type(name, (self.__tagclass__,), classattr)
35
setattr(self, name, cls)
40
def __init__(self, **kwargs):
41
self.__dict__.update(kwargs)
43
def __init__(self, *args, **kwargs):
44
super(Tag, self).__init__(args)
45
self.attr = self.Attr(**kwargs)
47
def __unicode__(self):
48
return self.unicode(indent=0)
51
def unicode(self, indent=2):
53
SimpleUnicodeVisitor(l.append, indent).visit(self)
57
name = self.__class__.__name__
58
return "<%r tag object %d>" % (name, id(self))
60
Namespace = NamespaceMetaclass('Namespace', (object, ), {
63
'__stickyname__': False,
67
def unicode(self, indent=2):
69
HtmlVisitor(l.append, indent, shortempty=False).visit(self)
72
# exported plain html namespace
73
class html(Namespace):
74
__tagclass__ = HtmlTag
76
__tagspec__ = dict([(x,1) for x in (
77
'a,abbr,acronym,address,applet,area,b,bdo,big,blink,'
78
'blockquote,body,br,button,caption,center,cite,code,col,'
79
'colgroup,comment,dd,del,dfn,dir,div,dl,dt,em,embed,'
80
'fieldset,font,form,frameset,h1,h2,h3,h4,h5,h6,head,html,'
81
'i,iframe,img,input,ins,kbd,label,legend,li,link,listing,'
82
'map,marquee,menu,meta,multicol,nobr,noembed,noframes,'
83
'noscript,object,ol,optgroup,option,p,pre,q,s,script,'
84
'select,small,span,strike,strong,style,sub,sup,table,'
85
'tbody,td,textarea,tfoot,th,thead,title,tr,tt,u,ul,xmp,'
86
'base,basefont,frame,hr,isindex,param,samp,var'
90
def __init__(self, **kw):
91
for x, y in kw.items():
92
x = x.replace('_', '-')
97
"""just a box that can contain a unicode string that will be
98
included directly in the output"""
99
def __init__(self, uniobj):
102
class SimpleUnicodeVisitor(object):
103
""" recursive visitor to write unicode. """
104
def __init__(self, write, indent=0, curindent=0, shortempty=True):
107
self.visited = {} # for detection of recursion
109
self.curindent = curindent
111
self.shortempty = shortempty # short empty tags or not
113
def visit(self, node):
114
""" dispatcher on node's class/bases name. """
117
visitmethod = self.cache[cls]
119
for subclass in cls.__mro__:
120
visitmethod = getattr(self, subclass.__name__, None)
121
if visitmethod is not None:
124
visitmethod = self.__object
125
self.cache[cls] = visitmethod
128
# the default fallback handler is marked private
129
# to avoid clashes with the tag name object
130
def __object(self, obj):
132
self.write(escape(unicode(obj)))
135
self.write(obj.uniobj)
138
assert id(obj) not in self.visited
139
self.visited[id(obj)] = 1
144
assert id(tag) not in self.visited
146
tag.parent = self.parents[-1]
149
self.visited[id(tag)] = 1
150
tagname = getattr(tag, 'xmlname', tag.__class__.__name__)
151
if self.curindent and not self._isinline(tagname):
152
self.write("\n" + u(' ') * self.curindent)
154
self.curindent += self.indent
155
self.write(u('<%s%s>') % (tagname, self.attributes(tag)))
156
self.parents.append(tag)
160
self.write(u('</%s>') % tagname)
161
self.curindent -= self.indent
163
nameattr = tagname+self.attributes(tag)
164
if self._issingleton(tagname):
165
self.write(u('<%s/>') % (nameattr,))
167
self.write(u('<%s></%s>') % (nameattr, tagname))
169
def attributes(self, tag):
170
# serialize attributes
171
attrlist = dir(tag.attr)
174
for name in attrlist:
175
res = self.repr_attribute(tag.attr, name)
178
l.extend(self.getstyle(tag))
181
def repr_attribute(self, attrs, name):
183
value = getattr(attrs, name)
184
if name.endswith('_'):
186
if isinstance(value, raw):
187
insert = value.uniobj
189
insert = escape(unicode(value))
190
return ' %s="%s"' % (name, insert)
192
def getstyle(self, tag):
193
""" return attribute list suitable for styling. """
195
styledict = tag.style.__dict__
196
except AttributeError:
199
stylelist = [x+': ' + y for x,y in styledict.items()]
200
return [u(' style="%s"') % u('; ').join(stylelist)]
202
def _issingleton(self, tagname):
203
"""can (and will) be overridden in subclasses"""
204
return self.shortempty
206
def _isinline(self, tagname):
207
"""can (and will) be overridden in subclasses"""
210
class HtmlVisitor(SimpleUnicodeVisitor):
212
single = dict([(x, 1) for x in
213
('br,img,area,param,col,hr,meta,link,base,'
214
'input,frame').split(',')])
215
inline = dict([(x, 1) for x in
216
('a abbr acronym b basefont bdo big br cite code dfn em font '
217
'i img input kbd label q s samp select small span strike '
218
'strong sub sup textarea tt u var'.split(' '))])
220
def repr_attribute(self, attrs, name):
222
value = getattr(attrs, name)
225
return super(HtmlVisitor, self).repr_attribute(attrs, name)
227
def _issingleton(self, tagname):
228
return tagname in self.single
230
def _isinline(self, tagname):
231
return tagname in self.inline
237
u('"') : u('"'), u('<') : u('<'), u('>') : u('>'),
238
u('&') : u('&'), u("'") : u('''),
240
self.charef_rex = re.compile(u("|").join(self.escape.keys()))
242
def _replacer(self, match):
243
return self.escape[match.group(0)]
245
def __call__(self, ustring):
246
""" xml-escape the given unicode string. """
248
ustring = unicode(ustring)
249
except UnicodeDecodeError:
250
ustring = unicode(ustring, 'utf-8', errors='replace')
251
return self.charef_rex.sub(self._replacer, ustring)