1
# Copyright (C) 2014 Ivan Melnikov <iv at altlinux dot org>
3
# Author: Joshua Harlow <harlowja@yahoo-inc.com>
5
# Licensed under the Apache License, Version 2.0 (the "License"); you may
6
# not use this file except in compliance with the License. You may obtain
7
# a copy of the License at
9
# http://www.apache.org/licenses/LICENSE-2.0
11
# Unless required by applicable law or agreed to in writing, software
12
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14
# License for the specific language governing permissions and limitations
21
from docutils import nodes as docutils_nodes
24
from doc8 import utils
27
@six.add_metaclass(abc.ABCMeta)
28
class ContentCheck(object):
29
def __init__(self, cfg):
33
def report_iter(self, parsed_file):
37
@six.add_metaclass(abc.ABCMeta)
38
class LineCheck(object):
39
def __init__(self, cfg):
43
def report_iter(self, line):
47
class CheckTrailingWhitespace(LineCheck):
48
_TRAILING_WHITESPACE_REGEX = re.compile('\s$')
49
REPORTS = frozenset(["D002"])
51
def report_iter(self, line):
52
if self._TRAILING_WHITESPACE_REGEX.search(line):
53
yield ('D002', 'Trailing whitespace')
56
class CheckIndentationNoTab(LineCheck):
57
_STARTING_WHITESPACE_REGEX = re.compile('^(\s+)')
58
REPORTS = frozenset(["D003"])
60
def report_iter(self, line):
61
match = self._STARTING_WHITESPACE_REGEX.search(line)
63
spaces = match.group(1)
65
yield ('D003', 'Tabulation used for indentation')
68
class CheckCarriageReturn(LineCheck):
69
REPORTS = frozenset(["D004"])
71
def report_iter(self, line):
73
yield ('D004', 'Found literal carriage return')
76
class CheckNewlineEndOfFile(ContentCheck):
77
REPORTS = frozenset(["D005"])
79
def __init__(self, cfg):
80
super(CheckNewlineEndOfFile, self).__init__(cfg)
82
def report_iter(self, parsed_file):
83
if parsed_file.lines and not parsed_file.lines[-1].endswith(b'\n'):
84
yield (len(parsed_file.lines), 'D005', 'No newline at end of file')
87
class CheckValidity(ContentCheck):
88
REPORTS = frozenset(["D000"])
89
EXT_MATCHER = re.compile(r"(.*)[.]rst", re.I)
93
# Report system messages at or higher than <level>: "info" or "1",
94
# "warning"/"2" (default), "error"/"3", "severe"/"4", "none"/"5"
96
# See: http://docutils.sourceforge.net/docs/user/config.html#report-level
97
WARN_LEVELS = frozenset([2, 3, 4])
99
# Only used when running in sphinx mode.
100
SPHINX_IGNORES_REGEX = [
101
re.compile(r'^Unknown interpreted text'),
102
re.compile(r'^Unknown directive type'),
103
re.compile(r'^Undefined substitution'),
104
re.compile(r'^Substitution definition contains illegal element'),
107
def __init__(self, cfg):
108
super(CheckValidity, self).__init__(cfg)
109
self._sphinx_mode = cfg.get('sphinx')
111
def report_iter(self, parsed_file):
112
for error in parsed_file.errors:
113
if error.level not in self.WARN_LEVELS:
116
if self._sphinx_mode:
117
for m in self.SPHINX_IGNORES_REGEX:
118
if m.match(error.message):
122
yield (error.line, 'D000', error.message)
125
class CheckMaxLineLength(ContentCheck):
126
REPORTS = frozenset(["D001"])
128
def __init__(self, cfg):
129
super(CheckMaxLineLength, self).__init__(cfg)
130
self._max_line_length = self._cfg['max_line_length']
131
self._allow_long_titles = self._cfg['allow_long_titles']
133
def _extract_node_lines(self, doc):
135
def extract_lines(node, start_line):
137
if isinstance(node, (docutils_nodes.title)):
138
start = start_line - len(node.rawsource.splitlines())
141
if isinstance(node, (docutils_nodes.literal_block)):
142
end = start_line + len(node.rawsource.splitlines()) - 1
146
def gather_lines(node):
148
for n in node.traverse(include_self=True):
149
lines.extend(extract_lines(n, find_line(n)))
155
if n.line is not None:
160
def filter_systems(node):
161
if utils.has_any_node_type(node, (docutils_nodes.system_message,)):
167
for n in utils.filtered_traverse(doc, filter_systems):
173
contained_lines = set(gather_lines(n))
174
nodes_lines.append((n, (min(contained_lines),
175
max(contained_lines))))
176
return (nodes_lines, first_line)
178
def _extract_directives(self, lines):
180
def starting_whitespace(line):
181
m = re.match(r"^(\s+)(.*)$", line)
184
return len(m.group(1))
186
def all_whitespace(line):
187
return bool(re.match(r"^(\s*)$", line))
189
def find_directive_end(start, lines):
190
after_lines = collections.deque(lines[start + 1:])
193
line = after_lines.popleft()
194
if all_whitespace(line) or starting_whitespace(line) >= 1:
200
# Find where directives start & end so that we can exclude content in
201
# these directive regions (the rst parser may not handle this correctly
202
# for unknown directives, so we have to do it manually).
204
for i, line in enumerate(lines):
205
if re.match(r"^..\s(.*?)::\s*", line):
206
directives.append((i, find_directive_end(i, lines)))
207
elif re.match(r"^::\s*$", line):
208
directives.append((i, find_directive_end(i, lines)))
211
def _txt_checker(self, parsed_file):
212
for i, line in enumerate(parsed_file.lines_iter()):
213
if len(line) > self._max_line_length:
214
if not utils.contains_url(line):
215
yield (i + 1, 'D001', 'Line too long')
217
def _rst_checker(self, parsed_file):
218
lines = list(parsed_file.lines_iter())
219
doc = parsed_file.document
220
nodes_lines, first_line = self._extract_node_lines(doc)
221
directives = self._extract_directives(lines)
223
def find_containing_nodes(num):
224
if num < first_line and len(nodes_lines):
225
return [nodes_lines[0][0]]
227
for (n, (line_min, line_max)) in nodes_lines:
228
if num >= line_min and num <= line_max:
229
contained_in.append((n, (line_min, line_max)))
232
for (n, (line_min, line_max)) in contained_in:
233
span = line_max - line_min
234
if smallest_span is None:
237
elif span < smallest_span:
240
elif span == smallest_span:
244
def any_types(nodes, types):
245
return any([isinstance(n, types) for n in nodes])
248
docutils_nodes.target,
249
docutils_nodes.literal_block,
252
docutils_nodes.title,
253
docutils_nodes.subtitle,
254
docutils_nodes.section,
256
for i, line in enumerate(lines):
257
if len(line) > self._max_line_length:
259
for (start, end) in directives:
260
if i >= start and i <= end:
265
stripped = line.lstrip()
266
if ' ' not in stripped:
267
# No room to split even if we could.
269
if utils.contains_url(stripped):
271
nodes = find_containing_nodes(i + 1)
272
if any_types(nodes, skip_types):
274
if self._allow_long_titles and any_types(nodes, title_types):
276
yield (i + 1, 'D001', 'Line too long')
278
def report_iter(self, parsed_file):
279
if parsed_file.extension.lower() != '.rst':
280
checker_func = self._txt_checker
282
checker_func = self._rst_checker
283
for issue in checker_func(parsed_file):