1
# Copyright (c) 2001-2009 Twisted Matrix Laboratories.
2
# See LICENSE for details.
5
Checker for common errors in Lore documents.
8
from xml.dom import minidom as dom
9
import parser, urlparse, os.path
11
from twisted.lore import tree, process
12
from twisted.web import domhelpers
13
from twisted.python import reflect
16
# parser.suite in Python 2.3 raises SyntaxError, <2.3 raises parser.ParserError
17
parserErrors = (SyntaxError, parser.ParserError)
21
def check(self, dom, filename):
23
for method in reflect.prefixedMethods(self, 'check_'):
26
raise process.ProcessingFailure("invalid format")
28
def _reportError(self, filename, element, error):
29
hlint = element.hasAttribute('hlint') and element.getAttribute('hlint')
32
pos = getattr(element, '_markpos', None) or (0, 0)
33
print "%s:%s:%s: %s" % ((filename,)+pos+(error,))
36
class DefaultTagChecker(TagChecker):
38
def __init__(self, allowedTags, allowedClasses):
39
self.allowedTags = allowedTags
40
self.allowedClasses = allowedClasses
42
def check_disallowedElements(self, dom, filename):
43
def m(node, self=self):
44
return not self.allowedTags(node.tagName)
45
for element in domhelpers.findElements(dom, m):
46
self._reportError(filename, element,
47
'unrecommended tag %s' % element.tagName)
49
def check_disallowedClasses(self, dom, filename):
50
def matcher(element, self=self):
51
if not element.hasAttribute('class'):
53
checker = self.allowedClasses.get(element.tagName, lambda x:0)
54
return not checker(element.getAttribute('class'))
55
for element in domhelpers.findElements(dom, matcher):
56
self._reportError(filename, element,
57
'unknown class %s' %element.getAttribute('class'))
59
def check_quote(self, doc, filename):
61
return ('"' in getattr(node, 'data', '') and
62
not isinstance(node, dom.Comment) and
63
not [1 for n in domhelpers.getParents(node)[1:-1]
64
if n.tagName in ('pre', 'code')])
65
for node in domhelpers.findNodes(doc, matcher):
66
self._reportError(filename, node.parentNode, 'contains quote')
68
def check_styleattr(self, dom, filename):
69
for node in domhelpers.findElementsWithAttribute(dom, 'style'):
70
self._reportError(filename, node, 'explicit style')
72
def check_align(self, dom, filename):
73
for node in domhelpers.findElementsWithAttribute(dom, 'align'):
74
self._reportError(filename, node, 'explicit alignment')
76
def check_style(self, dom, filename):
77
for node in domhelpers.findNodesNamed(dom, 'style'):
78
if domhelpers.getNodeText(node) != '':
79
self._reportError(filename, node, 'hand hacked style')
81
def check_title(self, dom, filename):
82
doc = dom.documentElement
83
title = domhelpers.findNodesNamed(dom, 'title')
85
return self._reportError(filename, doc, 'not exactly one title')
86
h1 = domhelpers.findNodesNamed(dom, 'h1')
88
return self._reportError(filename, doc, 'not exactly one h1')
89
if domhelpers.getNodeText(h1[0]) != domhelpers.getNodeText(title[0]):
90
self._reportError(filename, h1[0], 'title and h1 text differ')
92
def check_80_columns(self, dom, filename):
93
for node in domhelpers.findNodesNamed(dom, 'pre'):
94
# the ps/pdf output is in a font that cuts off at 80 characters,
95
# so this is enforced to make sure the interesting parts (which
96
# are likely to be on the right-hand edge) stay on the printed
98
for line in domhelpers.gatherTextNodes(node, 1).split('\n'):
99
if len(line.rstrip()) > 80:
100
self._reportError(filename, node,
101
'text wider than 80 columns in pre')
102
for node in domhelpers.findNodesNamed(dom, 'a'):
103
if node.getAttribute('class').endswith('listing'):
105
fn = os.path.dirname(filename)
106
fn = os.path.join(fn, node.getAttribute('href'))
107
lines = open(fn,'r').readlines()
109
self._reportError(filename, node,
110
'bad listing href: %r' %
111
node.getAttribute('href'))
115
if len(line.rstrip()) > 80:
116
self._reportError(filename, node,
117
'listing wider than 80 columns')
119
def check_pre_py_listing(self, dom, filename):
120
for node in domhelpers.findNodesNamed(dom, 'pre'):
121
if node.getAttribute('class') == 'python':
123
text = domhelpers.getNodeText(node)
125
text = text.replace('>', '>').replace('<', '<')
127
lines = filter(None,[l.rstrip() for l in text.split('\n')])
128
# Strip leading space
129
while not [1 for line in lines if line[:1] not in ('',' ')]:
130
lines = [line[1:] for line in lines]
131
text = '\n'.join(lines) + '\n'
134
except parserErrors, e:
135
# Pretend the "..." idiom is syntactically valid
136
text = text.replace("...","'...'")
138
except parserErrors, e:
139
self._reportError(filename, node,
140
'invalid python code:' + str(e))
142
def check_anchor_in_heading(self, dom, filename):
143
headingNames = ['h%d' % n for n in range(1,7)]
144
for hname in headingNames:
145
for node in domhelpers.findNodesNamed(dom, hname):
146
if domhelpers.findNodesNamed(node, 'a'):
147
self._reportError(filename, node, 'anchor in heading')
149
def check_texturl_matches_href(self, dom, filename):
150
for node in domhelpers.findNodesNamed(dom, 'a'):
151
if not node.hasAttribute('href'):
153
text = domhelpers.getNodeText(node)
154
proto = urlparse.urlparse(text)[0]
155
if proto and ' ' not in text:
156
if text != node.getAttribute('href'):
157
self._reportError(filename, node,
158
'link text does not match href')
160
def check_lists(self, dom, filename):
161
for node in (domhelpers.findNodesNamed(dom, 'ul')+
162
domhelpers.findNodesNamed(dom, 'ol')):
163
if not node.childNodes:
164
self._reportError(filename, node, 'empty list')
165
for child in node.childNodes:
166
if child.nodeName != 'li':
167
self._reportError(filename, node,
168
'only list items allowed in lists')
177
classes = list2dict(['shell', 'API', 'python', 'py-prototype', 'py-filename',
178
'py-src-string', 'py-signature', 'py-src-parameter',
179
'py-src-identifier', 'py-src-keyword'])
181
tags = list2dict(["html", "title", "head", "body", "h1", "h2", "h3", "ol", "ul",
182
"dl", "li", "dt", "dd", "p", "code", "img", "blockquote", "a",
183
"cite", "div", "span", "strong", "em", "pre", "q", "table",
184
"tr", "td", "th", "style", "sub", "sup", "link"])
186
span = list2dict(['footnote', 'manhole-output', 'index'])
188
div = list2dict(['note', 'boxed', 'doit'])
190
a = list2dict(['listing', 'py-listing', 'html-listing', 'absolute'])
192
pre = list2dict(['python', 'shell', 'python-interpreter', 'elisp'])
194
allowed = {'code': classes.has_key, 'span': span.has_key, 'div': div.has_key,
195
'a': a.has_key, 'pre': pre.has_key, 'ul': lambda x: x=='toc',
196
'ol': lambda x: x=='toc', 'li': lambda x: x=='ignoretoc'}
198
def getDefaultChecker():
199
return DefaultTagChecker(tags.has_key, allowed)
201
def doFile(file, checker):
202
doc = tree.parseFileAndReport(file)
204
checker.check(doc, file)