1
# Copyright (c) 2001-2004 Twisted Matrix Laboratories.
2
# See LICENSE for details.
6
from twisted.web import microdom, domhelpers
7
from twisted.python import text, procutils
8
import os, os.path, re, string
9
from cStringIO import StringIO
15
escapingRE = re.compile(r'([\[\]#$%&_{}^~\\])')
16
lowerUpperRE = re.compile(r'([a-z])([A-Z])')
18
def _escapeMatch(match):
21
return '$\\backslash$'
31
def latexEscape(text):
32
text = escapingRE.sub(_escapeMatch, text)
33
return text.replace('\n', ' ')
35
entities = {'amp': '\&', 'gt': '>', 'lt': '<', 'quot': '"',
36
'copy': '\\copyright', 'mdash': '---', 'rdquo': '``',
43
path = os.path.normpath(os.path.join(cwd, path))
44
if path.startswith(cwd + '/'):
45
path = path[len(cwd)+1:]
46
return path.replace('\\', '/') # windows slashes make LaTeX blow up
49
def getLatexText(node, writer, filter=lambda x:x, entities=entities):
50
if hasattr(node, 'eref'):
51
return writer(entities.get(node.eref, ''))
52
if hasattr(node, 'data'):
53
return writer(filter(node.data))
54
for child in node.childNodes:
55
getLatexText(child, writer, filter, entities)
57
class BaseLatexSpitter:
59
def __init__(self, writer, currDir='.', filename=''):
61
self.currDir = currDir
62
self.filename = filename
64
def visitNode(self, node):
65
if isinstance(node, microdom.Comment):
67
if not hasattr(node, 'tagName'):
68
self.writeNodeData(node)
70
getattr(self, 'visitNode_'+node.tagName, self.visitNodeDefault)(node)
72
def visitNodeDefault(self, node):
73
self.writer(getattr(self, 'start_'+node.tagName, ''))
74
for child in node.childNodes:
76
self.writer(getattr(self, 'end_'+node.tagName, ''))
78
def visitNode_a(self, node):
79
if node.hasAttribute('class'):
80
if node.getAttribute('class').endswith('listing'):
81
return self.visitNode_a_listing(node)
82
if node.hasAttribute('href'):
83
return self.visitNode_a_href(node)
84
if node.hasAttribute('name'):
85
return self.visitNode_a_name(node)
86
self.visitNodeDefault(node)
88
def visitNode_span(self, node):
89
if not node.hasAttribute('class'):
90
return self.visitNodeDefault(node)
91
node.tagName += '_'+node.getAttribute('class')
94
visitNode_div = visitNode_span
96
def visitNode_h1(self, node):
99
def visitNode_style(self, node):
103
class LatexSpitter(BaseLatexSpitter):
106
diaHack = bool(procutils.which("dia"))
108
def writeNodeData(self, node):
110
getLatexText(node, buf.write, latexEscape)
111
self.writer(buf.getvalue().replace('<', '$<$').replace('>', '$>$'))
113
def visitNode_head(self, node):
114
authorNodes = domhelpers.findElementsWithAttribute(node, 'rel', 'author')
115
authorNodes = [n for n in authorNodes if n.tagName == 'link']
118
self.writer('\\author{')
120
for aNode in authorNodes:
121
name = aNode.getAttribute('title', '')
122
href = aNode.getAttribute('href', '')
123
if href.startswith('mailto:'):
128
name += '$<$' + href + '$>$'
132
self.writer(' \\and '.join(authors))
135
self.visitNodeDefault(node)
137
def visitNode_pre(self, node):
138
self.writer('\\begin{verbatim}\n')
140
getLatexText(node, buf.write)
141
self.writer(text.removeLeadingTrailingBlanks(buf.getvalue()))
142
self.writer('\\end{verbatim}\n')
144
def visitNode_code(self, node):
146
getLatexText(node, fout.write, latexEscape)
147
data = lowerUpperRE.sub(r'\1\\linebreak[1]\2', fout.getvalue())
148
data = data[:1] + data[1:].replace('.', '.\\linebreak[1]')
149
self.writer('\\texttt{'+data+'}')
151
def visitNode_img(self, node):
152
fileName = os.path.join(self.currDir, node.getAttribute('src'))
153
target, ext = os.path.splitext(fileName)
154
if self.diaHack and os.access(target + '.dia', os.R_OK):
156
fileName = target + ext
157
f = getattr(self, 'convert_'+ext[1:], None)
160
target = os.path.join(self.currDir, os.path.basename(target)+'.eps')
162
target = os.path.basename(target)
163
self._write_img(target)
165
def _write_img(self, target):
166
"""Write LaTeX for image."""
167
self.writer('\\begin{center}\\includegraphics[%%\n'
169
'\\textwidth,height=1.0\\textheight,\nkeepaspectratio]'
170
'{%s}\\end{center}\n' % target)
172
def convert_png(self, src, target):
173
# XXX there's a *reason* Python comes with the pipes module -
174
# someone fix this to use it.
175
r = os.system('pngtopnm "%s" | pnmtops -noturn > "%s"' % (src, target))
179
def convert_dia(self, src, target):
180
# EVIL DISGUSTING HACK
181
data = os.popen("gunzip -dc %s" % (src)).read()
182
pre = '<dia:attribute name="scaling">\n <dia:real val="1"/>'
183
post = '<dia:attribute name="scaling">\n <dia:real val="0.5"/>'
184
open('%s_hacked.dia' % (src), 'wb').write(data.replace(pre, post))
185
os.system('gzip %s_hacked.dia' % (src,))
186
os.system('mv %s_hacked.dia.gz %s_hacked.dia' % (src,src))
187
# Let's pretend we never saw that.
189
# Silly dia needs an X server, even though it doesn't display anything.
190
# If this is a problem for you, try using Xvfb.
191
os.system("dia %s_hacked.dia -n -e %s" % (src, target))
193
def visitNodeHeader(self, node):
194
level = (int(node.tagName[1])-2)+self.baseLevel
195
self.writer('\n\n\\'+level*'sub'+'section{')
196
spitter = HeadingLatexSpitter(self.writer, self.currDir, self.filename)
197
spitter.visitNodeDefault(node)
200
def visitNode_a_listing(self, node):
201
fileName = os.path.join(self.currDir, node.getAttribute('href'))
202
self.writer('\\begin{verbatim}\n')
203
lines = map(string.rstrip, open(fileName).readlines())
204
lines = lines[int(node.getAttribute('skipLines', 0)):]
205
self.writer(text.removeLeadingTrailingBlanks('\n'.join(lines)))
206
self.writer('\\end{verbatim}')
208
# Write a caption for this source listing
209
fileName = os.path.basename(fileName)
210
caption = domhelpers.getNodeText(node)
211
if caption == fileName:
212
caption = 'Source listing'
213
self.writer('\parbox[b]{\linewidth}{\\begin{center}%s --- '
214
'\\begin{em}%s\\end{em}\\end{center}}'
215
% (latexEscape(caption), latexEscape(fileName)))
217
def visitNode_a_href(self, node):
218
supported_schemes=['http', 'https', 'ftp', 'mailto']
219
href = node.getAttribute('href')
220
if urlparse.urlparse(href)[0] in supported_schemes:
221
text = domhelpers.getNodeText(node)
222
self.visitNodeDefault(node)
224
self.writer('\\footnote{%s}' % latexEscape(href))
226
path, fragid = (href.split('#', 1) + [None])[:2]
230
path = os.path.join(os.path.dirname(self.filename), path)
232
#path = os.path.basename(self.filename)
234
# # Hack for linking to man pages from howtos, i.e.
235
# # ../doc/foo-man.html -> foo-man.html
236
# path = os.path.basename(path)
238
path = realpath(path)
241
ref = path + 'HASH' + fragid
244
self.writer('\\textit{')
245
self.visitNodeDefault(node)
247
self.writer('\\loreref{%s}' % ref)
249
def visitNode_a_name(self, node):
250
#self.writer('\\label{%sHASH%s}' % (os.path.basename(self.filename),
251
# node.getAttribute('name')))
252
self.writer('\\label{%sHASH%s}' % (realpath(self.filename),
253
node.getAttribute('name')))
254
self.visitNodeDefault(node)
256
def visitNode_table(self, node):
257
rows = [[col for col in row.childNodes
258
if getattr(col, 'tagName', None) in ('th', 'td')]
259
for row in node.childNodes if getattr(row, 'tagName', None)=='tr']
260
numCols = 1+max([len(row) for row in rows])
261
self.writer('\\begin{table}[ht]\\begin{center}')
262
self.writer('\\begin{tabular}{@{}'+'l'*numCols+'@{}}')
268
if col.tagName == 'th':
270
self.writer('\\\\\n') #\\ ends lines
272
self.writer('\\hline\n')
273
self.writer('\\end{tabular}\n')
274
if node.hasAttribute('title'):
275
self.writer('\\caption{%s}'
276
% latexEscape(node.getAttribute('title')))
277
self.writer('\\end{center}\\end{table}\n')
279
def visitNode_span_footnote(self, node):
280
self.writer('\\footnote{')
281
spitter = FootnoteLatexSpitter(self.writer, self.currDir, self.filename)
282
spitter.visitNodeDefault(node)
285
def visitNode_span_index(self, node):
286
self.writer('\\index{%s}\n' % node.getAttribute('value'))
287
self.visitNodeDefault(node)
289
visitNode_h2 = visitNode_h3 = visitNode_h4 = visitNodeHeader
291
start_title = '\\title{'
300
start_html = '''\\documentclass{article}
301
\\newcommand{\\loreref}[1]{%
302
\\ifthenelse{\\value{page}=\\pageref{#1}}%
304
{ (page \\pageref{#1})}%
307
start_body = '\\begin{document}\n\\maketitle\n'
308
end_body = '\\end{document}'
310
start_dl = '\\begin{description}\n'
311
end_dl = '\\end{description}\n'
312
start_ul = '\\begin{itemize}\n'
313
end_ul = '\\end{itemize}\n'
315
start_ol = '\\begin{enumerate}\n'
316
end_ol = '\\end{enumerate}\n'
327
start_strong = start_em = '\\begin{em}'
328
end_strong = end_em = '\\end{em}'
333
start_div_note = '\\begin{quotation}\\textbf{Note:}'
334
end_div_note = '\\end{quotation}'
336
start_th = '\\textbf{'
340
class SectionLatexSpitter(LatexSpitter):
344
start_title = '\\section{'
346
def visitNode_title(self, node):
347
self.visitNodeDefault(node)
348
#self.writer('\\label{%s}}\n' % os.path.basename(self.filename))
349
self.writer('\\label{%s}}\n' % realpath(self.filename))
351
end_title = end_body = start_body = start_html = ''
354
class ChapterLatexSpitter(SectionLatexSpitter):
356
start_title = '\\chapter{'
359
class HeadingLatexSpitter(BaseLatexSpitter):
363
writeNodeData = LatexSpitter.writeNodeData.im_func
366
class FootnoteLatexSpitter(LatexSpitter):
367
"""For multi-paragraph footnotes, this avoids having an empty leading
372
def visitNode_span_footnote(self, node):
373
self.visitNodeDefault(node)
375
def visitNode_p(self, node):
376
self.visitNodeDefault(node)
377
self.start_p = LatexSpitter.start_p
379
class BookLatexSpitter(LatexSpitter):
380
def visitNode_body(self, node):
381
tocs=domhelpers.locateNodes([node], 'class', 'toc')
382
domhelpers.clearNode(node)
385
node.appendChild(toc)
386
self.visitNodeDefault(node)
388
def visitNode_link(self, node):
389
if not node.hasAttribute('rel'):
390
return self.visitNodeDefault(node)
391
node.tagName += '_'+node.getAttribute('rel')
394
def visitNode_link_author(self, node):
395
self.writer('\\author{%s}\n' % node.getAttribute('text'))
397
def visitNode_link_stylesheet(self, node):
398
if node.hasAttribute('type') and node.hasAttribute('href'):
399
if node.getAttribute('type')=='application/x-latex':
400
packagename=node.getAttribute('href')
401
packagebase,ext=os.path.splitext(packagename)
402
self.writer('\\usepackage{%s}\n' % packagebase)
404
start_html = r'''\documentclass[oneside]{book}
405
\usepackage{graphicx}
406
\usepackage{times,mathptmx}
409
start_body = r'''\begin{document}
420
def visitNode_a(self, node):
421
if node.hasAttribute('class'):
422
a_class=node.getAttribute('class')
423
if a_class.endswith('listing'):
424
return self.visitNode_a_listing(node)
426
return getattr(self, 'visitNode_a_%s' % a_class)(node)
427
if node.hasAttribute('href'):
428
return self.visitNode_a_href(node)
429
if node.hasAttribute('name'):
430
return self.visitNode_a_name(node)
431
self.visitNodeDefault(node)
433
def visitNode_a_chapter(self, node):
434
self.writer('\\chapter{')
435
self.visitNodeDefault(node)
438
def visitNode_a_sect(self, node):
439
base,ext=os.path.splitext(node.getAttribute('href'))
440
self.writer('\\input{%s}\n' % base)
444
def processFile(spitter, fin):
445
dom = microdom.parse(fin).documentElement
446
spitter.visitNode(dom)
449
def convertFile(filename, spitterClass):
450
fout = open(os.path.splitext(filename)[0]+".tex", 'w')
451
spitter = spitterClass(fout.write, os.path.dirname(filename), filename)
453
processFile(spitter, fin)