1
# Copyright (c) 2001-2009 Twisted Matrix Laboratories.
2
# See LICENSE for details.
5
from itertools import count
6
import re, os, cStringIO, time, cgi, string, urlparse
7
from xml.dom import minidom as dom
8
from xml.sax.handler import ErrorHandler, feature_validation
9
from xml.dom.pulldom import SAX2DOM
10
from xml.sax import make_parser
11
from xml.sax.xmlreader import InputSource
13
from twisted.python import htmlizer, text
14
from twisted.python.filepath import FilePath
15
from twisted.python.deprecate import deprecated
16
from twisted.python.versions import Version
17
from twisted.web import domhelpers
18
import process, latex, indexer, numberer, htmlbook
20
# relative links to html files
21
def fixLinks(document, ext):
23
Rewrite links to XHTML lore input documents so they point to lore XHTML
26
Any node with an C{href} attribute which does not contain a value starting
27
with C{http}, C{https}, C{ftp}, or C{mailto} and which does not have a
28
C{class} attribute of C{absolute} or which contains C{listing} and which
29
does point to an URL ending with C{html} will have that attribute value
30
rewritten so that the filename extension is C{ext} instead of C{html}.
32
@type document: A DOM Node or Document
33
@param document: The input document which contains all of the content to be
37
@param ext: The extension to use when selecting an output file name. This
38
replaces the extension of the input file name.
42
supported_schemes=['http', 'https', 'ftp', 'mailto']
43
for node in domhelpers.findElementsWithAttribute(document, 'href'):
44
href = node.getAttribute("href")
45
if urlparse.urlparse(href)[0] in supported_schemes:
47
if node.getAttribute("class") == "absolute":
49
if node.getAttribute("class").find('listing') != -1:
52
# This is a relative link, so it should be munged.
53
if href.endswith('html') or href[:href.rfind('#')].endswith('html'):
54
fname, fext = os.path.splitext(href)
56
fext = ext+'#'+fext.split('#', 1)[1]
59
node.setAttribute("href", fname + fext)
63
def addMtime(document, fullpath):
65
Set the last modified time of the given document.
67
@type document: A DOM Node or Document
68
@param document: The output template which defines the presentation of the
71
@type fullpath: C{str}
72
@param fullpath: The file name from which to take the last modified time.
76
for node in domhelpers.findElementsWithAttribute(document, "class","mtime"):
78
txt.data = time.ctime(os.path.getmtime(fullpath))
85
Retrieve the fully qualified Python name represented by the given node.
87
The name is represented by one or two aspects of the node: the value of the
88
node's first child forms the end of the name. If the node has a C{base}
89
attribute, that attribute's value is prepended to the node's value, with
90
C{.} separating the two parts.
93
@return: The fully qualified Python name.
96
if node.hasAttribute("base"):
97
base = node.getAttribute("base") + "."
98
return base+node.childNodes[0].nodeValue
102
def fixAPI(document, url):
104
Replace API references with links to API documentation.
106
@type document: A DOM Node or Document
107
@param document: The input document which contains all of the content to be
111
@param url: A string which will be interpolated with the fully qualified
112
Python name of any API reference encountered in the input document, the
113
result of which will be used as a link to API documentation for that name
114
in the output document.
119
for node in domhelpers.findElementsWithAttribute(document, "class", "API"):
120
fullname = _getAPI(node)
121
anchor = dom.Element('a')
122
anchor.setAttribute('href', url % (fullname,))
123
anchor.setAttribute('title', fullname)
124
while node.childNodes:
125
child = node.childNodes[0]
126
node.removeChild(child)
127
anchor.appendChild(child)
128
node.appendChild(anchor)
129
if node.hasAttribute('base'):
130
node.removeAttribute('base')
134
def fontifyPython(document):
136
Syntax color any node in the given document which contains a Python source
139
@type document: A DOM Node or Document
140
@param document: The input document which contains all of the content to be
146
return (node.nodeName == 'pre' and node.hasAttribute('class') and
147
node.getAttribute('class') == 'python')
148
for node in domhelpers.findElements(document, matcher):
149
fontifyPythonNode(node)
153
def fontifyPythonNode(node):
155
Syntax color the given node containing Python source code.
157
The node must have a parent.
161
oldio = cStringIO.StringIO()
162
latex.getLatexText(node, oldio.write,
163
entities={'lt': '<', 'gt': '>', 'amp': '&'})
164
oldio = cStringIO.StringIO(oldio.getvalue().strip()+'\n')
165
howManyLines = len(oldio.getvalue().splitlines())
166
newio = cStringIO.StringIO()
167
htmlizer.filter(oldio, newio, writer=htmlizer.SmallerHTMLWriter)
168
lineLabels = _makeLineNumbers(howManyLines)
169
newel = dom.parseString(newio.getvalue()).documentElement
170
newel.setAttribute("class", "python")
171
node.parentNode.replaceChild(newel, node)
172
newel.insertBefore(lineLabels, newel.firstChild)
176
def addPyListings(document, dir):
178
Insert Python source listings into the given document from files in the
179
given directory based on C{py-listing} nodes.
181
Any node in C{document} with a C{class} attribute set to C{py-listing} will
182
have source lines taken from the file named in that node's C{href}
183
attribute (searched for in C{dir}) inserted in place of that node.
185
If a node has a C{skipLines} attribute, its value will be parsed as an
186
integer and that many lines will be skipped at the beginning of the source
189
@type document: A DOM Node or Document
190
@param document: The document within which to make listing replacements.
193
@param dir: The directory in which to find source files containing the
194
referenced Python listings.
198
for node in domhelpers.findElementsWithAttribute(document, "class",
200
filename = node.getAttribute("href")
201
outfile = cStringIO.StringIO()
202
lines = map(string.rstrip, open(os.path.join(dir, filename)).readlines())
204
skip = node.getAttribute('skipLines') or 0
205
lines = lines[int(skip):]
206
howManyLines = len(lines)
207
data = '\n'.join(lines)
209
data = cStringIO.StringIO(text.removeLeadingTrailingBlanks(data))
210
htmlizer.filter(data, outfile, writer=htmlizer.SmallerHTMLWriter)
211
sourceNode = dom.parseString(outfile.getvalue()).documentElement
212
sourceNode.insertBefore(_makeLineNumbers(howManyLines), sourceNode.firstChild)
213
_replaceWithListing(node, sourceNode.toxml(), filename, "py-listing")
217
def _makeLineNumbers(howMany):
219
Return an element which will render line numbers for a source listing.
221
@param howMany: The number of lines in the source listing.
222
@type howMany: C{int}
224
@return: An L{dom.Element} which can be added to the document before
225
the source listing to add line numbers to it.
227
# Figure out how many digits wide the widest line number label will be.
228
width = len(str(howMany))
230
# Render all the line labels with appropriate padding
231
labels = ['%*d' % (width, i) for i in range(1, howMany + 1)]
233
# Create a p element with the right style containing the labels
235
p.setAttribute('class', 'py-linenumber')
237
t.data = '\n'.join(labels) + '\n'
242
def _replaceWithListing(node, val, filename, class_):
243
captionTitle = domhelpers.getNodeText(node)
244
if captionTitle == os.path.basename(filename):
245
captionTitle = 'Source listing'
246
text = ('<div class="%s">%s<div class="caption">%s - '
247
'<a href="%s"><span class="filename">%s</span></a></div></div>' %
248
(class_, val, captionTitle, filename, filename))
249
newnode = dom.parseString(text).documentElement
250
node.parentNode.replaceChild(newnode, node)
254
def addHTMLListings(document, dir):
256
Insert HTML source listings into the given document from files in the given
257
directory based on C{html-listing} nodes.
259
Any node in C{document} with a C{class} attribute set to C{html-listing}
260
will have source lines taken from the file named in that node's C{href}
261
attribute (searched for in C{dir}) inserted in place of that node.
263
@type document: A DOM Node or Document
264
@param document: The document within which to make listing replacements.
267
@param dir: The directory in which to find source files containing the
268
referenced HTML listings.
272
for node in domhelpers.findElementsWithAttribute(document, "class",
274
filename = node.getAttribute("href")
275
val = ('<pre class="htmlsource">\n%s</pre>' %
276
cgi.escape(open(os.path.join(dir, filename)).read()))
277
_replaceWithListing(node, val, filename, "html-listing")
281
def addPlainListings(document, dir):
283
Insert text listings into the given document from files in the given
284
directory based on C{listing} nodes.
286
Any node in C{document} with a C{class} attribute set to C{listing} will
287
have source lines taken from the file named in that node's C{href}
288
attribute (searched for in C{dir}) inserted in place of that node.
290
@type document: A DOM Node or Document
291
@param document: The document within which to make listing replacements.
294
@param dir: The directory in which to find source files containing the
295
referenced text listings.
299
for node in domhelpers.findElementsWithAttribute(document, "class",
301
filename = node.getAttribute("href")
302
val = ('<pre>\n%s</pre>' %
303
cgi.escape(open(os.path.join(dir, filename)).read()))
304
_replaceWithListing(node, val, filename, "listing")
308
def getHeaders(document):
310
Return all H2 and H3 nodes in the given document.
312
@type document: A DOM Node or Document
316
return domhelpers.findElements(
318
lambda n, m=re.compile('h[23]$').match: m(n.nodeName))
322
def generateToC(document):
324
Create a table of contents for the given document.
326
@type document: A DOM Node or Document
329
@return: a Node containing a table of contents based on the headers of the
334
for element in getHeaders(document):
335
if element.tagName == 'h2':
337
headers.append((element, subHeaders))
338
elif subHeaders is None:
340
"No H3 element is allowed until after an H2 element")
342
subHeaders.append(element)
346
def addItem(headerElement, parent):
347
anchor = dom.Element('a')
348
name = 'auto%d' % (auto(),)
349
anchor.setAttribute('href', '#' + name)
351
text.data = domhelpers.getNodeText(headerElement)
352
anchor.appendChild(text)
353
headerNameItem = dom.Element('li')
354
headerNameItem.appendChild(anchor)
355
parent.appendChild(headerNameItem)
356
anchor = dom.Element('a')
357
anchor.setAttribute('name', name)
358
headerElement.appendChild(anchor)
360
toc = dom.Element('ol')
361
for headerElement, subHeaders in headers:
362
addItem(headerElement, toc)
364
subtoc = dom.Element('ul')
365
toc.appendChild(subtoc)
366
for subHeaderElement in subHeaders:
367
addItem(subHeaderElement, subtoc)
373
def putInToC(document, toc):
375
Insert the given table of contents into the given document.
377
The node with C{class} attribute set to C{toc} has its children replaced
380
@type document: A DOM Node or Document
381
@type toc: A DOM Node
383
tocOrig = domhelpers.findElementsWithAttribute(document, 'class', 'toc')
386
tocOrig.childNodes = [toc]
390
def removeH1(document):
392
Replace all C{h1} nodes in the given document with empty C{span} nodes.
394
C{h1} nodes mark up document sections and the output template is given an
395
opportunity to present this information in a different way.
397
@type document: A DOM Node or Document
398
@param document: The input document which contains all of the content to be
403
h1 = domhelpers.findNodesNamed(document, 'h1')
404
empty = dom.Element('span')
406
node.parentNode.replaceChild(empty, node)
410
def footnotes(document):
412
Find footnotes in the given document, move them to the end of the body, and
413
generate links to them.
415
A footnote is any node with a C{class} attribute set to C{footnote}.
416
Footnote links are generated as superscript. Footnotes are collected in a
417
C{ol} node at the end of the document.
419
@type document: A DOM Node or Document
420
@param document: The input document which contains all of the content to be
425
footnotes = domhelpers.findElementsWithAttribute(document, "class",
429
footnoteElement = dom.Element('ol')
431
for footnote in footnotes:
432
href = dom.parseString('<a href="#footnote-%(id)d">'
433
'<super>%(id)d</super></a>'
434
% vars()).documentElement
435
text = ' '.join(domhelpers.getNodeText(footnote).split())
436
href.setAttribute('title', text)
437
target = dom.Element('a')
438
target.setAttribute('name', 'footnote-%d' % (id,))
439
target.childNodes = [footnote]
440
footnoteContent = dom.Element('li')
441
footnoteContent.childNodes = [target]
442
footnoteElement.childNodes.append(footnoteContent)
443
footnote.parentNode.replaceChild(href, footnote)
445
body = domhelpers.findNodesNamed(document, "body")[0]
446
header = dom.parseString('<h2>Footnotes</h2>').documentElement
447
body.childNodes.append(header)
448
body.childNodes.append(footnoteElement)
454
Find notes in the given document and mark them up as such.
456
A note is any node with a C{class} attribute set to C{note}.
458
(I think this is a very stupid feature. When I found it I actually
459
exclaimed out loud. -exarkun)
461
@type document: A DOM Node or Document
462
@param document: The input document which contains all of the content to be
467
notes = domhelpers.findElementsWithAttribute(document, "class", "note")
468
notePrefix = dom.parseString('<strong>Note: </strong>').documentElement
470
note.childNodes.insert(0, notePrefix)
474
def compareMarkPos(a, b):
476
Perform in every way identically to L{cmp} for valid inputs.
478
linecmp = cmp(a[0], b[0])
481
return cmp(a[1], b[1])
482
compareMarkPos = deprecated(Version('Twisted', 9, 0, 0))(compareMarkPos)
486
def comparePosition(firstElement, secondElement):
488
Compare the two elements given by their position in the document or
489
documents they were parsed from.
491
@type firstElement: C{dom.Element}
492
@type secondElement: C{dom.Element}
494
@return: C{-1}, C{0}, or C{1}, with the same meanings as the return value
497
return cmp(firstElement._markpos, secondElement._markpos)
498
comparePosition = deprecated(Version('Twisted', 9, 0, 0))(comparePosition)
502
def findNodeJustBefore(target, nodes):
504
Find the last Element which is a sibling of C{target} and is in C{nodes}.
506
@param target: A node the previous sibling of which to return.
507
@param nodes: A list of nodes which might be the right node.
509
@return: The previous sibling of C{target}.
511
while target is not None:
512
node = target.previousSibling
513
while node is not None:
516
node = node.previousSibling
517
target = target.parentNode
518
raise RuntimeError("Oops")
522
def getFirstAncestorWithSectionHeader(entry):
524
Visit the ancestors of C{entry} until one with at least one C{h2} child
525
node is found, then return all of that node's C{h2} child nodes.
527
@type entry: A DOM Node
528
@param entry: The node from which to begin traversal. This node itself is
529
excluded from consideration.
531
@rtype: C{list} of DOM Nodes
532
@return: All C{h2} nodes of the ultimately selected parent node.
534
for a in domhelpers.getParents(entry)[1:]:
535
headers = domhelpers.findNodesNamed(a, "h2")
542
def getSectionNumber(header):
544
Retrieve the section number of the given node.
546
This is probably intended to interact in a rather specific way with
549
@type header: A DOM Node or L{None}
550
@param header: The section from which to extract a number. The section
551
number is the value of this node's first child.
553
@return: C{None} or a C{str} giving the section number.
557
return domhelpers.gatherTextNodes(header.childNodes[0])
561
def getSectionReference(entry):
563
Find the section number which contains the given node.
565
This function looks at the given node's ancestry until it finds a node
566
which defines a section, then returns that section's number.
568
@type entry: A DOM Node
569
@param entry: The node for which to determine the section.
572
@return: The section number, as returned by C{getSectionNumber} of the
573
first ancestor of C{entry} which defines a section, as determined by
574
L{getFirstAncestorWithSectionHeader}.
576
headers = getFirstAncestorWithSectionHeader(entry)
577
myHeader = findNodeJustBefore(entry, headers)
578
return getSectionNumber(myHeader)
582
def index(document, filename, chapterReference):
584
Extract index entries from the given document and store them for later use
585
and insert named anchors so that the index can link back to those entries.
587
Any node with a C{class} attribute set to C{index} is considered an index
590
@type document: A DOM Node or Document
591
@param document: The input document which contains all of the content to be
594
@type filename: C{str}
595
@param filename: A link to the output for the given document which will be
596
included in the index to link to any index entry found here.
598
@type chapterReference: ???
599
@param chapterReference: ???
603
entries = domhelpers.findElementsWithAttribute(document, "class", "index")
607
for entry in entries:
609
anchor = 'index%02d' % i
611
ref = getSectionReference(entry) or chapterReference
614
indexer.addEntry(filename, anchor, entry.getAttribute('value'), ref)
615
# does nodeName even affect anything?
616
entry.nodeName = entry.tagName = entry.endTagName = 'a'
617
for attrName in entry.attributes.keys():
618
entry.removeAttribute(attrName)
619
entry.setAttribute('name', anchor)
623
def setIndexLink(template, indexFilename):
625
Insert a link to an index document.
627
Any node with a C{class} attribute set to C{index-link} will have its tag
628
name changed to C{a} and its C{href} attribute set to C{indexFilename}.
630
@type template: A DOM Node or Document
631
@param template: The output template which defines the presentation of the
634
@type indexFilename: C{str}
635
@param indexFilename: The address of the index document to which to link.
636
If any C{False} value, this function will remove all index-link nodes.
640
indexLinks = domhelpers.findElementsWithAttribute(template,
643
for link in indexLinks:
644
if indexFilename is None:
645
link.parentNode.removeChild(link)
647
link.nodeName = link.tagName = link.endTagName = 'a'
648
for attrName in link.attributes.keys():
649
link.removeAttribute(attrName)
650
link.setAttribute('href', indexFilename)
654
def numberDocument(document, chapterNumber):
656
Number the sections of the given document.
658
A dot-separated chapter, section number is added to the beginning of each
659
section, as defined by C{h2} nodes.
661
This is probably intended to interact in a rather specific way with
664
@type document: A DOM Node or Document
665
@param document: The input document which contains all of the content to be
668
@type chapterNumber: C{int}
669
@param chapterNumber: The chapter number of this content in an overall
675
for node in domhelpers.findNodesNamed(document, "h2"):
677
label.data = "%s.%d " % (chapterNumber, i)
678
node.insertBefore(label, node.firstChild)
683
def fixRelativeLinks(document, linkrel):
685
Replace relative links in C{str} and C{href} attributes with links relative
688
@type document: A DOM Node or Document
689
@param document: The output template.
691
@type linkrel: C{str}
692
@param linkrel: An prefix to apply to all relative links in C{src} or
693
C{href} attributes in the input document when generating the output
696
for attr in 'src', 'href':
697
for node in domhelpers.findElementsWithAttribute(document, attr):
698
href = node.getAttribute(attr)
699
if not href.startswith('http') and not href.startswith('/'):
700
node.setAttribute(attr, linkrel+node.getAttribute(attr))
704
def setTitle(template, title, chapterNumber):
706
Add title and chapter number information to the template document.
708
The title is added to the end of the first C{title} tag and the end of the
709
first tag with a C{class} attribute set to C{title}. If specified, the
710
chapter is inserted before the title.
712
@type template: A DOM Node or Document
713
@param template: The output template which defines the presentation of the
716
@type title: C{list} of DOM Nodes
717
@param title: Nodes from the input document defining its title.
719
@type chapterNumber: C{int}
720
@param chapterNumber: The chapter number of this content in an overall
721
document. If not applicable, any C{False} value will result in this
722
information being omitted.
726
if numberer.getNumberSections() and chapterNumber:
727
titleNode = dom.Text()
728
# This is necessary in order for cloning below to work. See Python
730
titleNode.ownerDocument = template.ownerDocument
731
titleNode.data = '%s. ' % (chapterNumber,)
732
title.insert(0, titleNode)
734
for nodeList in (domhelpers.findNodesNamed(template, "title"),
735
domhelpers.findElementsWithAttribute(template, "class",
738
for titleNode in title:
739
nodeList[0].appendChild(titleNode.cloneNode(True))
743
def setAuthors(template, authors):
745
Add author information to the template document.
747
Names and contact information for authors are added to each node with a
748
C{class} attribute set to C{authors} and to the template head as C{link}
751
@type template: A DOM Node or Document
752
@param template: The output template which defines the presentation of the
755
@type authors: C{list} of two-tuples of C{str}
756
@param authors: List of names and contact information for the authors of
762
for node in domhelpers.findElementsWithAttribute(template,
765
# First, similarly to setTitle, insert text into an <div
767
container = dom.Element('span')
768
for name, href in authors:
769
anchor = dom.Element('a')
770
anchor.setAttribute('href', href)
771
anchorText = dom.Text()
772
anchorText.data = name
773
anchor.appendChild(anchorText)
774
if (name, href) == authors[-1]:
775
if len(authors) == 1:
776
container.appendChild(anchor)
779
andText.data = 'and '
780
container.appendChild(andText)
781
container.appendChild(anchor)
783
container.appendChild(anchor)
784
commaText = dom.Text()
785
commaText.data = ', '
786
container.appendChild(commaText)
788
node.appendChild(container)
790
# Second, add appropriate <link rel="author" ...> tags to the <head>.
791
head = domhelpers.findNodesNamed(template, 'head')[0]
792
authors = [dom.parseString('<link rel="author" href="%s" title="%s"/>'
793
% (href, name)).childNodes[0]
794
for name, href in authors]
795
head.childNodes.extend(authors)
799
def setVersion(template, version):
801
Add a version indicator to the given template.
803
@type template: A DOM Node or Document
804
@param template: The output template which defines the presentation of the
807
@type version: C{str}
808
@param version: The version string to add to the template.
812
for node in domhelpers.findElementsWithAttribute(template, "class",
816
node.appendChild(text)
820
def getOutputFileName(originalFileName, outputExtension, index=None):
822
Return a filename which is the same as C{originalFileName} except for the
823
extension, which is replaced with C{outputExtension}.
825
For example, if C{originalFileName} is C{'/foo/bar.baz'} and
826
C{outputExtension} is C{'quux'}, the return value will be
829
@type originalFileName: C{str}
830
@type outputExtension: C{stR}
831
@param index: ignored, never passed.
834
return os.path.splitext(originalFileName)[0]+outputExtension
838
def munge(document, template, linkrel, dir, fullpath, ext, url, config, outfileGenerator=getOutputFileName):
840
Mutate C{template} until it resembles C{document}.
842
@type document: A DOM Node or Document
843
@param document: The input document which contains all of the content to be
846
@type template: A DOM Node or Document
847
@param template: The template document which defines the desired
848
presentation format of the content.
850
@type linkrel: C{str}
851
@param linkrel: An prefix to apply to all relative links in C{src} or
852
C{href} attributes in the input document when generating the output
856
@param dir: The directory in which to search for source listing files.
858
@type fullpath: C{str}
859
@param fullpath: The file name which contained the input document.
862
@param ext: The extension to use when selecting an output file name. This
863
replaces the extension of the input file name.
866
@param url: A string which will be interpolated with the fully qualified
867
Python name of any API reference encountered in the input document, the
868
result of which will be used as a link to API documentation for that name
869
in the output document.
871
@type config: C{dict}
872
@param config: Further specification of the desired form of the output.
873
Valid keys in this dictionary::
875
noapi: If present and set to a True value, links to API documentation
876
will not be generated.
878
version: A string which will be included in the output to indicate the
879
version of this documentation.
881
@type outfileGenerator: Callable of C{str}, C{str} returning C{str}
882
@param outfileGenerator: Output filename factory. This is invoked with the
883
intput filename and C{ext} and the output document is serialized to the
884
file with the name returned.
888
fixRelativeLinks(template, linkrel)
889
addMtime(template, fullpath)
891
if not config.get('noapi', False):
892
fixAPI(document, url)
893
fontifyPython(document)
894
fixLinks(document, ext)
895
addPyListings(document, dir)
896
addHTMLListings(document, dir)
897
addPlainListings(document, dir)
898
putInToC(template, generateToC(document))
902
setIndexLink(template, indexer.getIndexFilename())
903
setVersion(template, config.get('version', ''))
905
# Insert the document into the template
906
chapterNumber = htmlbook.getNumber(fullpath)
907
title = domhelpers.findNodesNamed(document, 'title')[0].childNodes
908
setTitle(template, title, chapterNumber)
909
if numberer.getNumberSections() and chapterNumber:
910
numberDocument(document, chapterNumber)
911
index(document, outfileGenerator(os.path.split(fullpath)[1], ext),
912
htmlbook.getReference(fullpath))
914
authors = domhelpers.findNodesNamed(document, 'link')
915
authors = [(node.getAttribute('title') or '',
916
node.getAttribute('href') or '')
918
if node.getAttribute('rel') == 'author']
919
setAuthors(template, authors)
921
body = domhelpers.findNodesNamed(document, "body")[0]
922
tmplbody = domhelpers.findElementsWithAttribute(template, "class",
924
tmplbody.childNodes = body.childNodes
925
tmplbody.setAttribute("class", "content")
928
class _LocationReportingErrorHandler(ErrorHandler):
930
Define a SAX error handler which can report the location of fatal
933
Unlike the errors reported during parsing by other APIs in the xml
934
package, this one tries to mismatched tag errors by including the
935
location of both the relevant opening and closing tags.
937
def __init__(self, contentHandler):
938
self.contentHandler = contentHandler
940
def fatalError(self, err):
941
# Unfortunately, the underlying expat error code is only exposed as
942
# a string. I surely do hope no one ever goes and localizes expat.
943
if err.getMessage() == 'mismatched tag':
944
expect, begLine, begCol = self.contentHandler._locationStack[-1]
945
endLine, endCol = err.getLineNumber(), err.getColumnNumber()
946
raise process.ProcessingFailure(
947
"mismatched close tag at line %d, column %d; expected </%s> "
948
"(from line %d, column %d)" % (
949
endLine, endCol, expect, begLine, begCol))
950
raise process.ProcessingFailure(
951
'%s at line %d, column %d' % (err.getMessage(),
953
err.getColumnNumber()))
956
class _TagTrackingContentHandler(SAX2DOM):
958
Define a SAX content handler which keeps track of the start location of
959
all open tags. This information is used by the above defined error
960
handler to report useful locations when a fatal error is encountered.
963
SAX2DOM.__init__(self)
964
self._locationStack = []
966
def setDocumentLocator(self, locator):
967
self._docLocator = locator
968
SAX2DOM.setDocumentLocator(self, locator)
970
def startElement(self, name, attrs):
971
self._locationStack.append((name, self._docLocator.getLineNumber(), self._docLocator.getColumnNumber()))
972
SAX2DOM.startElement(self, name, attrs)
974
def endElement(self, name):
975
self._locationStack.pop()
976
SAX2DOM.endElement(self, name)
979
class _LocalEntityResolver(object):
981
Implement DTD loading (from a local source) for the limited number of
982
DTDs which are allowed for Lore input documents.
984
@ivar filename: The name of the file containing the lore input
987
@ivar knownDTDs: A mapping from DTD system identifiers to L{FilePath}
988
instances pointing to the corresponding DTD.
990
s = FilePath(__file__).sibling
993
None: s("xhtml1-strict.dtd"),
994
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd": s("xhtml1-strict.dtd"),
995
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd": s("xhtml1-transitional.dtd"),
996
"xhtml-lat1.ent": s("xhtml-lat1.ent"),
997
"xhtml-symbol.ent": s("xhtml-symbol.ent"),
998
"xhtml-special.ent": s("xhtml-special.ent"),
1002
def __init__(self, filename):
1003
self.filename = filename
1006
def resolveEntity(self, publicId, systemId):
1007
source = InputSource()
1008
source.setSystemId(systemId)
1010
dtdPath = self.knownDTDs[systemId]
1012
raise process.ProcessingFailure(
1013
"Invalid DTD system identifier (%r) in %s. Only "
1014
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd "
1015
"is allowed." % (systemId, self.filename))
1016
source.setByteStream(dtdPath.open())
1021
def parseFileAndReport(filename, _open=file):
1023
Parse and return the contents of the given lore XHTML document.
1025
@type filename: C{str}
1026
@param filename: The name of a file containing a lore XHTML document to
1029
@raise process.ProcessingFailure: When the contents of the specified file
1032
@rtype: A DOM Document
1033
@return: The document contained in C{filename}.
1035
content = _TagTrackingContentHandler()
1036
error = _LocationReportingErrorHandler(content)
1037
parser = make_parser()
1038
parser.setContentHandler(content)
1039
parser.setErrorHandler(error)
1041
# In order to call a method on the expat parser which will be used by this
1042
# parser, we need the expat parser to be created. This doesn't happen
1043
# until reset is called, normally by the parser's parse method. That's too
1044
# late for us, since it will then go on to parse the document without
1045
# letting us do any extra set up. So, force the expat parser to be created
1046
# here, and then disable reset so that the parser created is the one
1047
# actually used to parse our document. Resetting is only needed if more
1048
# than one document is going to be parsed, and that isn't the case here.
1050
parser.reset = lambda: None
1052
# This is necessary to make the xhtml1 transitional declaration optional.
1053
# It causes LocalEntityResolver.resolveEntity(None, None) to be called.
1054
# LocalEntityResolver handles that case by giving out the xhtml1
1055
# transitional dtd. Unfortunately, there is no public API for manipulating
1056
# the expat parser when using xml.sax. Using the private _parser attribute
1057
# may break. It's also possible that make_parser will return a parser
1058
# which doesn't use expat, but uses some other parser. Oh well. :(
1060
parser._parser.UseForeignDTD(True)
1061
parser.setEntityResolver(_LocalEntityResolver(filename))
1063
# This is probably no-op because expat is not a validating parser. Who
1064
# knows though, maybe you figured out a way to not use expat.
1065
parser.setFeature(feature_validation, False)
1067
fObj = _open(filename)
1072
raise process.ProcessingFailure(
1073
e.strerror + ", filename was '" + filename + "'")
1076
return content.document
1079
def makeSureDirectoryExists(filename):
1080
filename = os.path.abspath(filename)
1081
dirname = os.path.dirname(filename)
1082
if (not os.path.exists(dirname)):
1083
os.makedirs(dirname)
1085
def doFile(filename, linkrel, ext, url, templ, options={}, outfileGenerator=getOutputFileName):
1087
Process the input document at C{filename} and write an output document.
1089
@type filename: C{str}
1090
@param filename: The path to the input file which will be processed.
1092
@type linkrel: C{str}
1093
@param linkrel: An prefix to apply to all relative links in C{src} or
1094
C{href} attributes in the input document when generating the output
1098
@param ext: The extension to use when selecting an output file name. This
1099
replaces the extension of the input file name.
1102
@param url: A string which will be interpolated with the fully qualified
1103
Python name of any API reference encountered in the input document, the
1104
result of which will be used as a link to API documentation for that name
1105
in the output document.
1107
@type templ: A DOM Node or Document
1108
@param templ: The template on which the output document will be based.
1109
This is mutated and then serialized to the output file.
1111
@type options: C{dict}
1112
@param options: Further specification of the desired form of the output.
1113
Valid keys in this dictionary::
1115
noapi: If present and set to a True value, links to API documentation
1116
will not be generated.
1118
version: A string which will be included in the output to indicate the
1119
version of this documentation.
1121
@type outfileGenerator: Callable of C{str}, C{str} returning C{str}
1122
@param outfileGenerator: Output filename factory. This is invoked with the
1123
intput filename and C{ext} and the output document is serialized to the
1124
file with the name returned.
1128
doc = parseFileAndReport(filename)
1129
clonedNode = templ.cloneNode(1)
1130
munge(doc, clonedNode, linkrel, os.path.dirname(filename), filename, ext,
1131
url, options, outfileGenerator)
1132
newFilename = outfileGenerator(filename, ext)
1133
_writeDocument(newFilename, clonedNode)
1137
def _writeDocument(newFilename, clonedNode):
1139
Serialize the given node to XML into the named file.
1141
@param newFilename: The name of the file to which the XML will be
1142
written. If this is in a directory which does not exist, the
1143
directory will be created.
1145
@param clonedNode: The root DOM node which will be serialized.
1149
makeSureDirectoryExists(newFilename)
1150
f = open(newFilename, 'w')
1151
f.write(clonedNode.toxml('utf-8'))