1
# -*- coding: iso-8859-1 -*-
3
MoinMoin - DocBook-XML Parser
5
This code was tested with 4Suite 1.0a4 and 1.0b1
7
@copyright: 2005 Henry Ho <henryho167 AT hotmail DOT com>,
8
2005 MoinMoin:AlexanderSchremmer
9
@license: GNU GPL, see COPYING for details.
14
- image support through Attachment
15
- internal Wikilinks if a word is a strict wikiname
16
- image alt is perserved
17
- works with compiled xslt stylesheet for optimized performance
20
- make sure you have installed the DocBook XSLT files
21
- set the path to the html directory of the DocBook XSLT files in your
22
wiki or farm configuration:
23
docbook_html_dir = r"/usr/share/xml/docbook/stylesheet/nwalsh/html/"
24
Note that this directory needs to be writable because a cache file will
27
>How can I use Ft API for DTD validation?
28
If you have PyXMl installed, you can use ValidatingReader rather than
29
NonvalidatingReader. See:
30
http://uche.ogbuji.net/tech/akara/nodes/2003-01-01/domlettes
37
from MoinMoin import Page
38
from MoinMoin.parser.text_xslt import Parser as XsltParser
39
from MoinMoin.parser.text_moin_wiki import Parser as WikiParser
43
class Parser(XsltParser):
45
Send XML file formatted via XSLT.
49
Dependencies = Dependencies
51
def __init__(self, raw, request, **kw):
52
XsltParser.__init__(self, raw, request)
54
# relative path to docbook.xsl and compiled_xsl
55
docbook_html_directory = request.cfg.docbook_html_dir
56
self.db_xsl = os.path.join(docbook_html_directory, 'docbook.xsl')
57
self.db_compiled_xsl = os.path.join(docbook_html_directory, 'db_compiled.dat')
59
self.wikiParser = WikiParser(raw=self.raw, request=self.request, pretty_url=1)
62
def format(self, formatter):
63
self.wikiParser.formatter = formatter
64
XsltParser.format(self, formatter)
66
def append_stylesheet(self):
68
virtual function, for docbook parser
70
abs_db_xsl = os.path.abspath(self.db_xsl)
71
abs_db_compiled_xsl = os.path.abspath(self.db_compiled_xsl)
73
# same as path.exists, but also test if it is a file
74
if not os.path.isfile(abs_db_compiled_xsl):
75
_compile_xsl(abs_db_xsl, abs_db_compiled_xsl)
77
assert os.path.isfile(abs_db_compiled_xsl)
79
self.processor.appendStylesheetInstance(cPickle.load(file(abs_db_compiled_xsl, 'rb')))
81
def parse_result(self, result):
83
additional parsing to the resulting XSLT'ed result (resultString) before saving
86
BASIC CLEAN UP : remove unnecessary HTML tags
87
RESOLVE IMG SRC : fix src to find attachment
88
RESOLVE WikiNames: if a word is a valid wikiname & a valid wikipage,
89
replace word with hyperlink
93
# remove from beginning until end of body tag
94
found = re.search('<body.*?>', result)
96
result = result[found.end():]
98
# remove everything after & including </body>
99
found = result.rfind('</body>')
101
result = result[:found]
104
found = re.finditer('<img.*?>', result)
106
splitResult = _splitResult(found, result)
107
for index in range(len(splitResult)):
108
if splitResult[index].startswith('<img'):
109
found = re.search('src="(?P<source>.*?)"', splitResult[index])
110
imageSrc = found.group('source')
111
imageAlt = None # save alt
112
found = re.search('alt="(?P<alt>.*?)"', splitResult[index])
114
imageAlt = found.group('alt')
115
splitResult[index] = self.wikiParser.attachment(('attachment:' + imageSrc, ""))
116
if imageAlt: # restore alt
117
splitResult[index] = re.sub('alt=".*?"', 'alt="%s"' % imageAlt, splitResult[index])
119
result = ''.join(splitResult)
123
# if a word is a valid wikiname & a valid wikipage,
124
# replace word with hyperlink
126
found = re.finditer(self.wikiParser.word_rule, result, re.UNICODE|re.VERBOSE)
128
splitResult = _splitResult(found, result)
130
for index in range(len(splitResult)):
131
if (re.match(self.wikiParser.word_rule, splitResult[index], re.UNICODE|re.VERBOSE)
132
and Page.Page(self.request, splitResult[index]).exists()):
133
splitResult[index] = self.wikiParser._word_repl(splitResult[index])
134
result = ''.join(splitResult)
136
# remove stuff that fail HTML 4.01 Strict verification
138
# remove unsupported attributes
139
result = re.sub(' target=".*?"| type=".*?"', '', result)
140
result = re.sub('<hr .*?>', '<hr>', result)
142
# remove <p>...</p> inside <a>...</a> or <caption>...</caption>
143
found = re.finditer('<a href=".*?</a>|<caption>.*?</caption>', result) # XXX re.DOTALL)
145
splitResult = _splitResult(found, result)
146
for index in range(len(splitResult)):
147
if (splitResult[index].startswith('<a href="')
148
or splitResult[index].startswith('<caption>')):
149
splitResult[index] = splitResult[index].replace('<p>', '').replace('</p>', '')
150
result = ''.join(splitResult)
156
def _compile_xsl(XSLT_FILE, XSLT_COMPILED_FILE):
158
compiling docbook stylesheet
160
reference: http://155.210.85.193:8010/ccia/nodes/2005-03-18/compileXslt?xslt=/akara/akara.xslt
162
from Ft.Xml.Xslt.Processor import Processor
163
from Ft.Xml.Xslt import Stylesheet
164
from Ft.Xml import InputSource
165
from Ft.Lib import Uri
167
# New docbook processor
168
db_processor = Processor()
171
my_sheet_uri = Uri.OsPathToUri(XSLT_FILE, 1)
172
sty_isrc = InputSource.DefaultFactory.fromUri(my_sheet_uri)
175
db_processor.appendStylesheet(sty_isrc)
177
# Pickled stylesheet will be self.abs_db_compiled_xsl file
178
db_root = db_processor.stylesheet.root
179
fw = file(XSLT_COMPILED_FILE, 'wb')
180
cPickle.dump(db_root, fw) # , protocol=2)
184
def _splitResult(iterator, result):
189
start, end = f.span()
190
splitResult.append(result[startpos:start])
191
splitResult.append(result[start:end])
193
splitResult.append(result[startpos:])