1
# Copyright (c) 2001-2009 Twisted Matrix Laboratories.
2
# See LICENSE for details.
5
LaTeX output support for Lore.
8
from xml.dom import minidom as dom
9
import os.path, re, string
10
from cStringIO import StringIO
13
from twisted.web import domhelpers
14
from twisted.python import text, procutils
18
escapingRE = re.compile(r'([\[\]#$%&_{}^~\\])')
19
lowerUpperRE = re.compile(r'([a-z])([A-Z])')
21
def _escapeMatch(match):
24
return '$\\backslash$'
35
txt = escapingRE.sub(_escapeMatch, txt)
36
return txt.replace('\n', ' ')
38
entities = {'amp': '\&', 'gt': '>', 'lt': '<', 'quot': '"',
39
'copy': '\\copyright', 'mdash': '---', 'rdquo': '``',
46
path = os.path.normpath(os.path.join(cwd, path))
47
return path.replace('\\', '/') # windows slashes make LaTeX blow up
50
def getLatexText(node, writer, filter=lambda x:x, entities=entities):
51
if hasattr(node, 'eref'):
52
return writer(entities.get(node.eref, ''))
53
if hasattr(node, 'data'):
54
if isinstance(node.data, unicode):
55
data = node.data.encode('utf-8')
58
return writer(filter(data))
59
for child in node.childNodes:
60
getLatexText(child, writer, filter, entities)
62
class BaseLatexSpitter:
64
def __init__(self, writer, currDir='.', filename=''):
66
self.currDir = currDir
67
self.filename = filename
69
def visitNode(self, node):
70
if isinstance(node, dom.Comment):
72
if not hasattr(node, 'tagName'):
73
self.writeNodeData(node)
75
getattr(self, 'visitNode_'+node.tagName, self.visitNodeDefault)(node)
77
def visitNodeDefault(self, node):
78
self.writer(getattr(self, 'start_'+node.tagName, ''))
79
for child in node.childNodes:
81
self.writer(getattr(self, 'end_'+node.tagName, ''))
83
def visitNode_a(self, node):
84
if node.hasAttribute('class'):
85
if node.getAttribute('class').endswith('listing'):
86
return self.visitNode_a_listing(node)
87
if node.hasAttribute('href'):
88
return self.visitNode_a_href(node)
89
if node.hasAttribute('name'):
90
return self.visitNode_a_name(node)
91
self.visitNodeDefault(node)
93
def visitNode_span(self, node):
94
if not node.hasAttribute('class'):
95
return self.visitNodeDefault(node)
96
node.tagName += '_'+node.getAttribute('class')
99
visitNode_div = visitNode_span
101
def visitNode_h1(self, node):
104
def visitNode_style(self, node):
108
class LatexSpitter(BaseLatexSpitter):
111
diaHack = bool(procutils.which("dia"))
113
def writeNodeData(self, node):
115
getLatexText(node, buf.write, latexEscape)
116
self.writer(buf.getvalue().replace('<', '$<$').replace('>', '$>$'))
118
def visitNode_head(self, node):
119
authorNodes = domhelpers.findElementsWithAttribute(node, 'rel', 'author')
120
authorNodes = [n for n in authorNodes if n.tagName == 'link']
123
self.writer('\\author{')
125
for aNode in authorNodes:
126
name = aNode.getAttribute('title')
127
href = aNode.getAttribute('href')
128
if href.startswith('mailto:'):
133
name += '$<$' + href + '$>$'
137
self.writer(' \\and '.join(authors))
140
self.visitNodeDefault(node)
142
def visitNode_pre(self, node):
143
self.writer('\\begin{verbatim}\n')
145
getLatexText(node, buf.write)
146
self.writer(text.removeLeadingTrailingBlanks(buf.getvalue()))
147
self.writer('\\end{verbatim}\n')
149
def visitNode_code(self, node):
151
getLatexText(node, fout.write, latexEscape)
152
data = lowerUpperRE.sub(r'\1\\linebreak[1]\2', fout.getvalue())
153
data = data[:1] + data[1:].replace('.', '.\\linebreak[1]')
154
self.writer('\\texttt{'+data+'}')
156
def visitNode_img(self, node):
157
fileName = os.path.join(self.currDir, node.getAttribute('src'))
158
target, ext = os.path.splitext(fileName)
159
if self.diaHack and os.access(target + '.dia', os.R_OK):
161
fileName = target + ext
162
f = getattr(self, 'convert_'+ext[1:], None)
165
target = os.path.join(self.currDir, os.path.basename(target)+'.eps')
167
target = os.path.basename(target)
168
self._write_img(target)
170
def _write_img(self, target):
171
"""Write LaTeX for image."""
172
self.writer('\\begin{center}\\includegraphics[%%\n'
174
'\\textwidth,height=1.0\\textheight,\nkeepaspectratio]'
175
'{%s}\\end{center}\n' % target)
177
def convert_png(self, src, target):
178
# XXX there's a *reason* Python comes with the pipes module -
179
# someone fix this to use it.
180
r = os.system('pngtopnm "%s" | pnmtops -noturn > "%s"' % (src, target))
184
def convert_dia(self, src, target):
185
# EVIL DISGUSTING HACK
186
data = os.popen("gunzip -dc %s" % (src)).read()
187
pre = '<dia:attribute name="scaling">\n <dia:real val="1"/>'
188
post = '<dia:attribute name="scaling">\n <dia:real val="0.5"/>'
189
f = open('%s_hacked.dia' % (src), 'wb')
190
f.write(data.replace(pre, post))
192
os.system('gzip %s_hacked.dia' % (src,))
193
os.system('mv %s_hacked.dia.gz %s_hacked.dia' % (src,src))
194
# Let's pretend we never saw that.
196
# Silly dia needs an X server, even though it doesn't display anything.
197
# If this is a problem for you, try using Xvfb.
198
os.system("dia %s_hacked.dia -n -e %s" % (src, target))
200
def visitNodeHeader(self, node):
201
level = (int(node.tagName[1])-2)+self.baseLevel
202
self.writer('\n\n\\'+level*'sub'+'section{')
203
spitter = HeadingLatexSpitter(self.writer, self.currDir, self.filename)
204
spitter.visitNodeDefault(node)
207
def visitNode_a_listing(self, node):
208
fileName = os.path.join(self.currDir, node.getAttribute('href'))
209
self.writer('\\begin{verbatim}\n')
210
lines = map(string.rstrip, open(fileName).readlines())
211
skipLines = int(node.getAttribute('skipLines') or 0)
212
lines = lines[skipLines:]
213
self.writer(text.removeLeadingTrailingBlanks('\n'.join(lines)))
214
self.writer('\\end{verbatim}')
216
# Write a caption for this source listing
217
fileName = os.path.basename(fileName)
218
caption = domhelpers.getNodeText(node)
219
if caption == fileName:
220
caption = 'Source listing'
221
self.writer('\parbox[b]{\linewidth}{\\begin{center}%s --- '
222
'\\begin{em}%s\\end{em}\\end{center}}'
223
% (latexEscape(caption), latexEscape(fileName)))
225
def visitNode_a_href(self, node):
226
supported_schemes=['http', 'https', 'ftp', 'mailto']
227
href = node.getAttribute('href')
228
if urlparse.urlparse(href)[0] in supported_schemes:
229
text = domhelpers.getNodeText(node)
230
self.visitNodeDefault(node)
232
self.writer('\\footnote{%s}' % latexEscape(href))
234
path, fragid = (href.split('#', 1) + [None])[:2]
238
path = os.path.join(os.path.dirname(self.filename), path)
240
#path = os.path.basename(self.filename)
242
# # Hack for linking to man pages from howtos, i.e.
243
# # ../doc/foo-man.html -> foo-man.html
244
# path = os.path.basename(path)
246
path = realpath(path)
249
ref = path + 'HASH' + fragid
252
self.writer('\\textit{')
253
self.visitNodeDefault(node)
255
self.writer('\\loreref{%s}' % ref)
257
def visitNode_a_name(self, node):
258
self.writer('\\label{%sHASH%s}' % (
259
realpath(self.filename), node.getAttribute('name')))
260
self.visitNodeDefault(node)
262
def visitNode_table(self, node):
263
rows = [[col for col in row.childNodes
264
if getattr(col, 'tagName', None) in ('th', 'td')]
265
for row in node.childNodes if getattr(row, 'tagName', None)=='tr']
266
numCols = 1+max([len(row) for row in rows])
267
self.writer('\\begin{table}[ht]\\begin{center}')
268
self.writer('\\begin{tabular}{@{}'+'l'*numCols+'@{}}')
274
if col.tagName == 'th':
276
self.writer('\\\\\n') #\\ ends lines
278
self.writer('\\hline\n')
279
self.writer('\\end{tabular}\n')
280
if node.hasAttribute('title'):
281
self.writer('\\caption{%s}'
282
% latexEscape(node.getAttribute('title')))
283
self.writer('\\end{center}\\end{table}\n')
285
def visitNode_span_footnote(self, node):
286
self.writer('\\footnote{')
287
spitter = FootnoteLatexSpitter(self.writer, self.currDir, self.filename)
288
spitter.visitNodeDefault(node)
291
def visitNode_span_index(self, node):
292
self.writer('\\index{%s}\n' % node.getAttribute('value'))
293
self.visitNodeDefault(node)
295
visitNode_h2 = visitNode_h3 = visitNode_h4 = visitNodeHeader
297
start_title = '\\title{'
306
start_html = '''\\documentclass{article}
307
\\newcommand{\\loreref}[1]{%
308
\\ifthenelse{\\value{page}=\\pageref{#1}}%
310
{ (page \\pageref{#1})}%
313
start_body = '\\begin{document}\n\\maketitle\n'
314
end_body = '\\end{document}'
316
start_dl = '\\begin{description}\n'
317
end_dl = '\\end{description}\n'
318
start_ul = '\\begin{itemize}\n'
319
end_ul = '\\end{itemize}\n'
321
start_ol = '\\begin{enumerate}\n'
322
end_ol = '\\end{enumerate}\n'
333
start_strong = start_em = '\\begin{em}'
334
end_strong = end_em = '\\end{em}'
339
start_div_note = '\\begin{quotation}\\textbf{Note:}'
340
end_div_note = '\\end{quotation}'
342
start_th = '\\textbf{'
346
class SectionLatexSpitter(LatexSpitter):
350
start_title = '\\section{'
352
def visitNode_title(self, node):
353
self.visitNodeDefault(node)
354
#self.writer('\\label{%s}}\n' % os.path.basename(self.filename))
355
self.writer('\\label{%s}}\n' % realpath(self.filename))
357
end_title = end_body = start_body = start_html = ''
360
class ChapterLatexSpitter(SectionLatexSpitter):
362
start_title = '\\chapter{'
365
class HeadingLatexSpitter(BaseLatexSpitter):
369
writeNodeData = LatexSpitter.writeNodeData.im_func
372
class FootnoteLatexSpitter(LatexSpitter):
373
"""For multi-paragraph footnotes, this avoids having an empty leading
378
def visitNode_span_footnote(self, node):
379
self.visitNodeDefault(node)
381
def visitNode_p(self, node):
382
self.visitNodeDefault(node)
383
self.start_p = LatexSpitter.start_p
385
class BookLatexSpitter(LatexSpitter):
386
def visitNode_body(self, node):
387
tocs=domhelpers.locateNodes([node], 'class', 'toc')
388
domhelpers.clearNode(node)
391
node.appendChild(toc)
392
self.visitNodeDefault(node)
394
def visitNode_link(self, node):
395
if not node.hasAttribute('rel'):
396
return self.visitNodeDefault(node)
397
node.tagName += '_'+node.getAttribute('rel')
400
def visitNode_link_author(self, node):
401
self.writer('\\author{%s}\n' % node.getAttribute('text'))
403
def visitNode_link_stylesheet(self, node):
404
if node.hasAttribute('type') and node.hasAttribute('href'):
405
if node.getAttribute('type')=='application/x-latex':
406
packagename=node.getAttribute('href')
407
packagebase,ext=os.path.splitext(packagename)
408
self.writer('\\usepackage{%s}\n' % packagebase)
410
start_html = r'''\documentclass[oneside]{book}
411
\usepackage{graphicx}
412
\usepackage{times,mathptmx}
415
start_body = r'''\begin{document}
426
def visitNode_a(self, node):
427
if node.hasAttribute('class'):
428
a_class=node.getAttribute('class')
429
if a_class.endswith('listing'):
430
return self.visitNode_a_listing(node)
432
return getattr(self, 'visitNode_a_%s' % a_class)(node)
433
if node.hasAttribute('href'):
434
return self.visitNode_a_href(node)
435
if node.hasAttribute('name'):
436
return self.visitNode_a_name(node)
437
self.visitNodeDefault(node)
439
def visitNode_a_chapter(self, node):
440
self.writer('\\chapter{')
441
self.visitNodeDefault(node)
444
def visitNode_a_sect(self, node):
445
base,ext=os.path.splitext(node.getAttribute('href'))
446
self.writer('\\input{%s}\n' % base)
450
def processFile(spitter, fin):
451
# XXX Use Inversion Of Control Pattern to orthogonalize the parsing API
452
# from the Visitor Pattern application. (EnterPrise)
453
dom = tree.parseFileAndReport(fin.name, lambda x: fin).documentElement
454
spitter.visitNode(dom)
457
def convertFile(filename, spitterClass):
458
fout = open(os.path.splitext(filename)[0]+".tex", 'w')
459
spitter = spitterClass(fout.write, os.path.dirname(filename), filename)
461
processFile(spitter, fin)