1
# -*- coding: utf-8 -*-
3
MoinMoin - DocBook Formatter
5
@copyright: 2005 by Mikko Virkkil <mvirkkil@cc.hut.fi>
6
@copyright: 2005 by MoinMoin:AlexanderSchremmer (small modifications)
7
@copyright: 2005 by MoinMoin:Petr Pytelka <pyta@lightcomp.com> (small modifications)
9
@license: GNU GPL, see COPYING for details.
12
import sys, traceback, os
14
from MoinMoin.formatter.base import FormatterBase
15
from MoinMoin import wikiutil, config
16
from MoinMoin.error import CompositeError
17
from MoinMoin.action import AttachFile
19
from xml.sax import saxutils
20
from xml.dom import getDOMImplementation
22
class InternalError(CompositeError): pass
25
dom = getDOMImplementation("4DOM")
27
raise InternalError("You need to install PyXML to use the DocBook formatter.")
29
class DocBookOutputFormatter:
34
def __init__(self, dommDoc):
36
self.curNode = dommDoc.documentElement
38
def setHeading(self, headNode):
39
self.domHeadNode = headNode
42
def _printNode(self, node):
46
from xml.dom.ext import Print
48
from xml.dom.ext import Printer
50
stream = StringIO.StringIO()
52
visitor = Printer.PrintVisitor(stream, 'UTF-8')
53
Printer.PrintWalker(visitor, node).run()
54
# get value from stream
55
ret = stream.getvalue()
58
return unicode(ret, 'utf-8')
61
# return heading from model
62
rootNode = self.doc.documentElement
64
return '<?xml version="1.0"?><%s>%s' % (rootNode.nodeName,
65
self._printNode(self.domHeadNode))
69
# print all nodes inside dom behind heading
70
firstNode = self.doc.documentElement.firstChild
72
if firstNode != self.domHeadNode:
73
body.append(self._printNode(firstNode))
74
firstNode = firstNode.nextSibling
77
def getEndContent(self):
78
# close all opened tags
80
while self.curNode != self.doc.documentElement:
81
ret.append("</%s>" % (self.curNode.nodeName, ))
82
self.curNode = self.curNode.parentNode
86
return "</%s>" % self.doc.documentElement.nodeName
88
class Formatter(FormatterBase):
93
section_should_break = ['abstract', 'para', 'emphasis']
95
def __init__(self, request, **kw):
96
'''We should use this for creating the doc'''
97
FormatterBase.__init__(self, request, **kw)
99
self.doc = dom.createDocument(None, "article", dom.createDocumentType(
100
"article", "-//OASIS//DTD DocBook V4.4//EN",
101
"http://www.docbook.org/xml/4.4/docbookx.dtd"))
102
self.root = self.doc.documentElement
104
self.outputFormatter = DocBookOutputFormatter(self.doc)
105
self.exchangeKeys = []
106
self.exchangeValues = []
108
def startDocument(self, pagename):
109
info = self.doc.createElement("articleinfo")
110
title = self.doc.createElement("title")
111
title.appendChild(self.doc.createTextNode(pagename))
112
info.appendChild(title)
113
self.root.appendChild(info)
115
self.outputFormatter.setHeading(info)
117
return self.outputFormatter.getHeading()
119
def startContent(self, content_id="content", **kw):
123
def endContent(self):
124
bodyStr = self.outputFormatter.getBody()
125
# exchange all strings in body
127
while i < len(self.exchangeKeys):
128
bodyStr = bodyStr.replace(self.exchangeKeys[i], self.exchangeValues[i])
130
return bodyStr + self.outputFormatter.getEndContent()
132
def endDocument(self):
133
return self.outputFormatter.getFooter()
135
def text(self, text, **kw):
140
if self.cur.nodeName == "screen":
141
if self.cur.lastChild != None:
142
from xml.dom.ext import Node
143
if self.cur.lastChild.nodeType == Node.CDATA_SECTION_NODE:
144
self.cur.lastChild.nodeValue = self.cur.lastChild.nodeValue + srcText
146
self.cur.appendChild(self.doc.createCDATASection(srcText))
148
self.cur.appendChild(self.doc.createTextNode(srcText))
151
def heading(self, on, depth, **kw):
152
while self.cur.nodeName in self.section_should_break:
153
self.cur = self.cur.parentNode
156
# try to go to higher level if needed
157
if depth <= self.curdepth:
158
# number of levels we want to go higher
159
numberOfLevels = self.curdepth-depth + 1
160
for i in range(numberOfLevels):
161
#find first non section node
162
while (self.cur.nodeName != "section" and self.cur.nodeName != "article"):
163
self.cur = self.cur.parentNode
165
# I don't understand this code - looks like unnecessary -- maybe it is used to gain some vertical space for large headings?
166
# if len(self.cur.childNodes) < 3:
167
# self._addEmptyNode("para")
169
# check if not top-level
170
if self.cur.nodeName != "article":
171
self.cur = self.cur.parentNode
173
section = self.doc.createElement("section")
174
self.cur.appendChild(section)
177
title = self.doc.createElement("title")
178
self.cur.appendChild(title)
180
self.curdepth = depth
182
self.cur = self.cur.parentNode
186
def paragraph(self, on, **kw):
187
FormatterBase.paragraph(self, on)
189
para = self.doc.createElement("para")
190
self.cur.appendChild(para)
193
self.cur = self.cur.parentNode
196
def linebreak(self, preformatted=1):
200
#this should not happen
205
def _handleNode(self, name, on, attributes=()):
207
node = self.doc.createElement(name)
208
self.cur.appendChild(node)
209
if len(attributes) > 0:
210
for name, value in attributes:
211
node.setAttribute(name, value)
214
self.cur = self.cur.parentNode
217
def _addEmptyNode(self, name, attributes=()):
218
node = self.doc.createElement(name)
219
self.cur.appendChild(node)
220
if len(attributes) > 0:
221
for name, value in attributes:
222
node.setAttribute(name, value)
224
def _getTableCellCount(self, attrs=()):
226
if attrs and attrs.has_key('colspan'):
227
s1 = attrs['colspan']
228
s1 = str(s1).replace('"','')
232
def _addTableCellDefinition(self, attrs=()):
233
# Check number of columns
234
cols = self._getTableCellCount(attrs)
237
numberExistingColumns = 0
238
while actNode and actNode.nodeName != 'tgroup':
239
actNode = actNode.parentNode
240
# Number of existing columns
241
nodeBefore = self.cur
243
nodeBefore = actNode.firstChild
244
while nodeBefore and nodeBefore.nodeName != 'tbody':
245
nodeBefore = nodeBefore.nextSibling
246
numberExistingColumns += 1
250
numberExistingColumns += 1
251
nnode = self.doc.createElement("colspec")
252
nnode.setAttribute('colname', 'xxx' + str(numberExistingColumns))
255
actNode.insertBefore(nnode, nodeBefore)
257
self.cur.insertBefore(nnode, nodeBefore)
259
# Set new number of columns for tgroup
260
self.cur.parentNode.parentNode.parentNode.setAttribute('cols', str(numberExistingColumns))
263
### Inline ##########################################################
265
def _handleFormatting(self, name, on, attributes=()):
266
# We add all the elements we create to the list of elements that should not contain a section
267
if name not in self.section_should_break:
268
self.section_should_break.append(name)
270
return self._handleNode(name, on, attributes)
272
def strong(self, on, **kw):
273
return self._handleFormatting("emphasis", on, (('role','strong'), ))
275
def emphasis(self, on, **kw):
276
return self._handleFormatting("emphasis", on)
278
def underline(self, on, **kw):
279
return self._handleFormatting("emphasis", on, (('role','underline'), ))
281
def highlight(self, on, **kw):
282
return self._handleFormatting("emphasis", on, (('role','highlight'), ))
284
def sup(self, on, **kw):
285
return self._handleFormatting("superscript", on)
287
def sub(self, on, **kw):
288
return self._handleFormatting("subscript", on)
290
def strike(self, on, **kw):
291
# does not yield <strike> using the HTML XSLT files here ...
292
# but seems to be correct
293
return self._handleFormatting("emphasis", on,
294
(('role','strikethrough'), ))
296
def code(self, on, **kw):
297
return self._handleFormatting("code", on)
299
def preformatted(self, on, **kw):
300
return self._handleFormatting("screen", on)
303
### Lists ###########################################################
305
def number_list(self, on, type=None, start=None, **kw):
306
docbook_ol_types = {'1': "arabic",
312
if type and docbook_ol_types.has_key(type):
313
attrs = [("numeration", docbook_ol_types[type])]
317
return self._handleNode('orderedlist', on, attrs)
319
def bullet_list(self, on, **kw):
320
return self._handleNode("itemizedlist", on)
322
def definition_list(self, on, **kw):
323
return self._handleNode("glosslist", on)
325
def definition_term(self, on, compact=0, **kw):
326
# When on is false, we back out just on level. This is
327
# ok because we know definition_desc gets called, and we
328
# back out two levels there.
330
entry=self.doc.createElement('glossentry')
331
term=self.doc.createElement('glossterm')
332
entry.appendChild(term)
333
self.cur.appendChild(entry)
336
self.cur = self.cur.parentNode
339
def definition_desc(self, on, **kw):
340
# We backout two levels when 'on' is false, to leave the glossentry stuff
342
return self._handleNode("glossdef", on)
344
self.cur = self.cur.parentNode.parentNode
347
def listitem(self, on, **kw):
349
node = self.doc.createElement("listitem")
350
self.cur.appendChild(node)
353
self.cur = self.cur.parentNode
357
### Links ###########################################################
359
# FIXME: This is quite crappy
360
def pagelink(self, on, pagename='', page=None, **kw):
361
FormatterBase.pagelink(self, on, pagename, page, **kw)
363
return self.interwikilink(on, 'Self', pagename) # FIXME
365
# FIXME: This is even more crappy
366
def interwikilink(self, on, interwiki='', pagename='', **kw):
368
return self.url(on,kw)
370
wikitag, wikiurl, wikitail, wikitag_bad = wikiutil.resolve_wiki(self.request, '%s:%s' % (interwiki, pagename))
371
wikiurl = wikiutil.mapURL(self.request, wikiurl)
372
href = wikiutil.join_wiki(wikiurl, wikitail)
374
return self.url(on, href)
376
def url(self, on, url=None, css=None, **kw):
377
return self._handleNode("ulink", on, (('url', url), ))
379
def anchordef(self, name):
380
self._handleNode("anchor", True, (('id', name), ))
381
self._handleNode("ulink", False)
384
def anchorlink(self, on, name='', **kw):
385
id = kw.get('id',None)
388
attrs.append(('endterm', name))
390
attrs.append(('linkend', id))
392
attrs.append(('linkend', name))
394
return self._handleNode("link", on, attrs)
396
### Attachments ######################################################
398
def attachment_link(self, url, text, **kw):
399
_ = self.request.getText
400
pagename, filename = AttachFile.absoluteName(url, self.page.page_name)
401
fname = wikiutil.taintfilename(filename)
402
fpath = AttachFile.getFilename(self.request, pagename, fname)
403
target = AttachFile.getAttachUrl(pagename, filename, self.request)
404
if not os.path.exists(fpath):
405
return self.text("[attachment:%s]" % url)
407
return (self.url(1, target, title="attachment:%s" % url) +
411
def attachment_image(self, url, **kw):
412
_ = self.request.getText
413
pagename, filename = AttachFile.absoluteName(url, self.page.page_name)
414
fname = wikiutil.taintfilename(filename)
415
fpath = AttachFile.getFilename(self.request, pagename, fname)
416
if not os.path.exists(fpath):
417
return self.text("[attachment:%s]" % url)
420
title="attachment:%s" % url,
421
src=AttachFile.getAttachUrl(pagename, filename,
422
self.request, addts=1))
424
def attachment_drawing(self, url, text, **kw):
425
_ = self.request.getText
426
pagename, filename = AttachFile.absoluteName(url, self.page.page_name)
427
fname = wikiutil.taintfilename(filename)
429
fname = fname + ".png"
430
filename = filename + ".png"
431
if not os.path.exists(fpath):
432
return self.text("[drawing:%s]" % url)
436
src=AttachFile.getAttachUrl(pagename, filename, self.request,
438
html_class="drawing")
440
### Images and Smileys ##############################################
442
def image(self, src=None, **kw):
445
media = self.doc.createElement('inlinemediaobject')
447
imagewrap = self.doc.createElement('imageobject')
448
media.appendChild(imagewrap)
450
image = self.doc.createElement('imagedata')
451
if kw.has_key('src'):
452
image.setAttribute('fileref', kw['src'])
453
if kw.has_key('width'):
454
image.setAttribute('width', kw['width'])
455
if kw.has_key('height'):
456
image.setAttribute('depth', kw['height'])
457
imagewrap.appendChild(image)
460
for a in ('title', 'html_title', 'alt', 'html_alt'):
465
txtcontainer = self.doc.createElement('textobject')
466
media.appendChild(txtcontainer)
467
txtphrase = self.doc.createElement('phrase')
468
txtphrase.appendChild(self.doc.createTextNode(title))
469
txtcontainer.appendChild(txtphrase)
471
self.cur.appendChild(media)
474
def smiley(self, text):
475
w, h, b, img = config.smileys[text.strip()]
477
if not href.startswith('/'):
478
href = self.request.theme.img_url(img)
479
return self.image(src=href, alt=text, width=str(w), height=str(h))
481
def icon(self, type):
482
return '' # self.request.theme.make_icon(type)
484
### Tables ##########################################################
486
#FIXME: We should copy code from text_html.py for attr handling
488
def table(self, on, attrs=None, **kw):
490
if attrs and attrs.has_key('id'):
491
sanitized_attrs[id] = attrs['id']
493
self._handleNode("table", on, sanitized_attrs)
495
self._addEmptyNode("caption") #dtd for table requires caption
496
self._handleNode("tgroup", on)
497
self._handleNode("tbody", on)
500
def table_row(self, on, attrs=None, **kw):
501
self.table_current_row_cells = 0
503
if attrs and attrs.has_key('id'):
504
sanitized_attrs[id] = attrs['id']
505
return self._handleNode("row", on, sanitized_attrs)
507
def table_cell(self, on, attrs=None, **kw):
508
# Finish row definition
510
if attrs and attrs.has_key('id'):
511
sanitized_attrs[id] = attrs['id']
512
# Get number of newly added columns
513
startCount = self.table_current_row_cells
514
addedCellsCount = self._getTableCellCount(attrs)
515
self.table_current_row_cells += addedCellsCount
516
ret = self._handleNode("entry", on, sanitized_attrs)
517
if self.cur.parentNode == self.cur.parentNode.parentNode.firstChild:
518
self._addTableCellDefinition(attrs)
519
# Set cell join if any
520
if addedCellsCount > 1:
521
startString = "xxx" + str(startCount)
522
stopString = "xxx" + str(startCount + addedCellsCount - 1)
523
self.cur.setAttribute("namest", startString)
524
self.cur.setAttribute("nameend", stopString)
527
### Code ############################################################
529
def code_area(self, on, code_id, code_type='code', show=0, start=-1, step=-1):
530
show = show and 'numbered' or 'unnumbered'
534
attrs = (('id', code_id),
535
('linenumbering', show),
536
('startinglinenumber', str(start)),
537
('language', code_type),
538
('format','linespecific'),
540
return self._handleFormatting("screen", on, attrs)
542
def code_line(self, on):
543
return '' # No clue why something should be done here
545
def code_token(self, on, tok_type):
546
toks_map = {'ID':'methodname',
549
'Comment':'lineannotation',
560
if toks_map.has_key(tok_type) and toks_map[tok_type] != '':
561
return self._handleFormatting(toks_map[tok_type], on)
565
def macro(self, macro_obj, name, args):
566
if name == "TableOfContents":
567
# Table of content can be inserted in docbook transformation
569
# output of all macros is added as the text node
570
# At the begining text mode contain some string which is later
571
# exchange for real value. There is problem that data inserted
572
# as text mode are encoded to xml, e.g. < is encoded in the output as <
573
text = FormatterBase.macro(self, macro_obj, name, args)
575
# prepare identificator
576
sKey = "EXCHANGESTRINGMACRO-" + str(len(self.exchangeKeys)) + "-EXCHANGESTRINGMACRO"
577
self.exchangeKeys.append(sKey)
578
self.exchangeValues.append(text)
579
# append data to lists
583
### Not supported ###################################################
585
def rule(self, size = 0, **kw):
588
def small(self, on, **kw):
591
def big(self, on, **kw):