2
# pep8.py - Check Python source code formatting, according to PEP 8
3
# Copyright (C) 2006 Johann C. Rocholl <johann@browsershots.org>
5
# Permission is hereby granted, free of charge, to any person
6
# obtaining a copy of this software and associated documentation files
7
# (the "Software"), to deal in the Software without restriction,
8
# including without limitation the rights to use, copy, modify, merge,
9
# publish, distribute, sublicense, and/or sell copies of the Software,
10
# and to permit persons to whom the Software is furnished to do so,
11
# subject to the following conditions:
13
# The above copyright notice and this permission notice shall be
14
# included in all copies or substantial portions of the Software.
16
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
Check Python source code formatting, according to PEP 8:
27
http://www.python.org/dev/peps/pep-0008/
29
For usage and a list of options, try this:
32
This program and its regression test suite live here:
33
http://svn.browsershots.org/trunk/devtools/pep8/
34
http://trac.browsershots.org/browser/trunk/devtools/pep8/
36
Groups of errors and warnings:
47
You can add checks to this program by writing plugins. Each plugin is
48
a simple function that is called for each line of source code, either
52
- Raw line of text from the input file.
55
- Multi-line statements converted to a single line.
56
- Stripped left and right.
57
- Contents of strings replaced with 'xxx' of same length.
60
The check function requests physical or logical lines by the name of
63
def maximum_line_length(physical_line)
64
def extraneous_whitespace(logical_line)
65
def blank_lines(logical_line, blank_lines, indent_level, line_number)
67
The last example above demonstrates how check plugins can request
68
additional information with extra arguments. All attributes of the
69
Checker object are available. Some examples:
71
lines: a list of the raw lines from the input file
72
tokens: the tokens that contribute to this logical line
73
line_number: line number in the input file
74
blank_lines: blank lines before this one
75
indent_char: first indentation character in this file (' ' or '\t')
76
indent_level: indentation (with tabs expanded to multiples of 8)
77
previous_indent_level: indentation on previous line
78
previous_logical: previous logical line
80
The docstring of each check function shall be the relevant part of
81
text from PEP 8. It is printed if the user enables --show-pep8.
91
from optparse import OptionParser
92
from keyword import iskeyword
93
from fnmatch import fnmatch
96
__revision__ = '$Rev: 2208 $'
98
default_exclude = '.svn,CVS,*.pyc,*.pyo'
100
indent_match = re.compile(r'([ \t]*)').match
101
raise_comma_match = re.compile(r'raise\s+\w+\s*(,)').match
104
+ - * / % ^ & | = < > >> <<
105
+= -= *= /= %= ^= &= |= == <= >= >>= <<=
114
##############################################################################
115
# Plugins (check functions) for physical lines
116
##############################################################################
119
def tabs_or_spaces(physical_line, indent_char):
121
Never mix tabs and spaces.
123
The most popular way of indenting Python is with spaces only. The
124
second-most popular way is with tabs only. Code indented with a mixture
125
of tabs and spaces should be converted to using spaces exclusively. When
126
invoking the Python command line interpreter with the -t option, it issues
127
warnings about code that illegally mixes tabs and spaces. When using -tt
128
these warnings become errors. These options are highly recommended!
130
indent = indent_match(physical_line).group(1)
131
for offset, char in enumerate(indent):
132
if char != indent_char:
133
return offset, "E101 indentation contains mixed spaces and tabs"
136
def tabs_obsolete(physical_line):
138
For new projects, spaces-only are strongly recommended over tabs. Most
139
editors have features that make this easy to do.
141
indent = indent_match(physical_line).group(1)
142
if indent.count('\t'):
143
return indent.index('\t'), "W191 indentation contains tabs"
146
def trailing_whitespace(physical_line):
148
JCR: Trailing whitespace is superfluous.
150
physical_line = physical_line.rstrip('\n') # chr(10), newline
151
physical_line = physical_line.rstrip('\r') # chr(13), carriage return
152
physical_line = physical_line.rstrip('\x0c') # chr(12), form feed, ^L
153
stripped = physical_line.rstrip()
154
if physical_line != stripped:
155
return len(stripped), "W291 trailing whitespace"
158
def trailing_blank_lines(physical_line, lines, line_number):
160
JCR: Trailing blank lines are superfluous.
162
if physical_line.strip() == '' and line_number == len(lines):
163
return 0, "W391 blank line at end of file"
166
def missing_newline(physical_line):
168
JCR: The last line should have a newline.
170
if physical_line.rstrip() == physical_line:
171
return len(physical_line), "W292 no newline at end of file"
174
def maximum_line_length(physical_line):
176
Limit all lines to a maximum of 79 characters.
178
There are still many devices around that are limited to 80 character
179
lines; plus, limiting windows to 80 characters makes it possible to have
180
several windows side-by-side. The default wrapping on such devices looks
181
ugly. Therefore, please limit all lines to a maximum of 79 characters.
182
For flowing long blocks of text (docstrings or comments), limiting the
183
length to 72 characters is recommended.
185
length = len(physical_line.rstrip())
187
return 79, "E501 line too long (%d characters)" % length
190
##############################################################################
191
# Plugins (check functions) for logical lines
192
##############################################################################
195
def blank_lines(logical_line, blank_lines, indent_level, line_number,
198
Separate top-level function and class definitions with two blank lines.
200
Method definitions inside a class are separated by a single blank line.
202
Extra blank lines may be used (sparingly) to separate groups of related
203
functions. Blank lines may be omitted between a bunch of related
204
one-liners (e.g. a set of dummy implementations).
206
Use blank lines in functions, sparingly, to indicate logical sections.
209
return # Don't expect blank lines before the first line
210
if previous_logical.startswith('@'):
211
return # Don't expect blank lines after function decorator
212
if (logical_line.startswith('def ') or
213
logical_line.startswith('class ') or
214
logical_line.startswith('@')):
215
if indent_level > 0 and blank_lines != 1:
216
return 0, "E301 expected 1 blank line, found %d" % blank_lines
217
if indent_level == 0 and blank_lines != 2:
218
return 0, "E302 expected 2 blank lines, found %d" % blank_lines
220
return 0, "E303 too many blank lines (%d)" % blank_lines
223
def extraneous_whitespace(logical_line):
225
Avoid extraneous whitespace in the following situations:
227
- Immediately inside parentheses, brackets or braces.
229
- Immediately before a comma, semicolon, or colon.
233
found = line.find(char + ' ')
235
return found + 1, "E201 whitespace after '%s'" % char
237
found = line.find(' ' + char)
238
if found > -1 and line[found - 1] != ',':
239
return found, "E202 whitespace before '%s'" % char
241
found = line.find(' ' + char)
243
return found, "E203 whitespace before '%s'" % char
246
def missing_whitespace(logical_line):
248
JCR: Each comma, semicolon or colon should be followed by whitespace.
251
for index in range(len(line) - 1):
253
if char in ',;:' and line[index + 1] != ' ':
254
before = line[:index]
255
if char == ':' and before.count('[') > before.count(']'):
256
continue # Slice syntax, no space required
257
return index, "E231 missing whitespace after '%s'" % char
260
def indentation(logical_line, previous_logical, indent_char,
261
indent_level, previous_indent_level):
263
Use 4 spaces per indentation level.
265
For really old code that you don't want to mess up, you can continue to
268
if indent_char == ' ' and indent_level % 4:
269
return 0, "E111 indentation is not a multiple of four"
270
indent_expect = previous_logical.endswith(':')
271
if indent_expect and indent_level <= previous_indent_level:
272
return 0, "E112 expected an indented block"
273
if indent_level > previous_indent_level and not indent_expect:
274
return 0, "E113 unexpected indentation"
277
def whitespace_before_parameters(logical_line, tokens):
279
Avoid extraneous whitespace in the following situations:
281
- Immediately before the open parenthesis that starts the argument
282
list of a function call.
284
- Immediately before the open parenthesis that starts an indexing or
287
prev_type = tokens[0][0]
288
prev_text = tokens[0][1]
289
prev_end = tokens[0][3]
290
for index in range(1, len(tokens)):
291
token_type, text, start, end, line = tokens[index]
292
if (token_type == tokenize.OP and
294
start != prev_end and
295
prev_type == tokenize.NAME and
296
(index < 2 or tokens[index - 2][1] != 'class') and
297
(not iskeyword(prev_text))):
298
return prev_end, "E211 whitespace before '%s'" % text
299
prev_type = token_type
304
def whitespace_around_operator(logical_line):
306
Avoid extraneous whitespace in the following situations:
308
- More than one space around an assignment (or other) operator to
309
align it with another.
312
for operator in operators:
313
found = line.find(' ' + operator)
315
return found, "E221 multiple spaces before operator"
316
found = line.find(operator + ' ')
318
return found, "E222 multiple spaces after operator"
319
found = line.find('\t' + operator)
321
return found, "E223 tab before operator"
322
found = line.find(operator + '\t')
324
return found, "E224 tab after operator"
327
def whitespace_around_comma(logical_line):
329
Avoid extraneous whitespace in the following situations:
331
- More than one space around an assignment (or other) operator to
332
align it with another.
334
JCR: This should also be applied around comma etc.
337
for separator in ',;:':
338
found = line.find(separator + ' ')
340
return found + 1, "E241 multiple spaces after '%s'" % separator
341
found = line.find(separator + '\t')
343
return found + 1, "E242 tab after '%s'" % separator
346
def imports_on_separate_lines(logical_line):
348
Imports should usually be on separate lines.
351
if line.startswith('import '):
352
found = line.find(',')
354
return found, "E401 multiple imports on one line"
357
def compound_statements(logical_line):
359
Compound statements (multiple statements on the same line) are
360
generally discouraged.
363
found = line.find(':')
364
if -1 < found < len(line) - 1:
365
before = line[:found]
366
if (before.count('{') <= before.count('}') and # {'a': 1} (dict)
367
before.count('[') <= before.count(']') and # [1:2] (slice)
368
not re.search(r'\blambda\b', before)): # lambda x: x
369
return found, "E701 multiple statements on one line (colon)"
370
found = line.find(';')
372
return found, "E702 multiple statements on one line (semicolon)"
375
def python_3000_has_key(logical_line):
377
The {}.has_key() method will be removed in the future version of
378
Python. Use the 'in' operation instead, like:
383
pos = logical_line.find('.has_key(')
385
return pos, "W601 .has_key() is deprecated, use 'in'"
388
def python_3000_raise_comma(logical_line):
390
When raising an exception, use "raise ValueError('message')"
391
instead of the older form "raise ValueError, 'message'".
393
The paren-using form is preferred because when the exception arguments
394
are long or include string formatting, you don't need to use line
395
continuation characters thanks to the containing parentheses. The older
396
form will be removed in Python 3000.
398
match = raise_comma_match(logical_line)
400
return match.start(1), "W602 deprecated form of raising exception"
403
##############################################################################
405
##############################################################################
408
def expand_indent(line):
410
Return the amount of indentation.
411
Tabs are expanded to the next multiple of 8.
413
>>> expand_indent(' ')
415
>>> expand_indent('\\t')
417
>>> expand_indent(' \\t')
419
>>> expand_indent(' \\t')
421
>>> expand_indent(' \\t')
427
result = result / 8 * 8 + 8
435
##############################################################################
436
# Framework to run all checks
437
##############################################################################
441
"""Print a message."""
442
# print >> sys.stderr, options.prog + ': ' + text
443
# print >> sys.stderr, text
447
def find_checks(argument_name):
449
Find all globally visible functions where the first argument name
450
starts with argument_name.
453
function_type = type(find_checks)
454
for name, function in globals().iteritems():
455
if type(function) is function_type:
456
args = inspect.getargspec(function)[0]
457
if len(args) >= 1 and args[0].startswith(argument_name):
458
checks.append((name, function, args))
463
def mute_string(text):
465
Replace contents with 'xxx' to prevent syntax matching.
467
>>> mute_string('"abc"')
469
>>> mute_string("'''abc'''")
471
>>> mute_string("r'abc'")
476
# String modifiers (e.g. u or r)
477
if text.endswith('"'):
478
start += text.index('"')
479
elif text.endswith("'"):
480
start += text.index("'")
482
if text.endswith('"""') or text.endswith("'''"):
485
return text[:start] + 'x' * (end - start) + text[end:]
490
Load a Python source file, tokenize it, check coding style.
493
def __init__(self, filename):
494
self.filename = filename
495
self.lines = file(filename).readlines()
496
self.physical_checks = find_checks('physical_line')
497
self.logical_checks = find_checks('logical_line')
498
options.counters['physical lines'] = \
499
options.counters.get('physical lines', 0) + len(self.lines)
503
Get the next line from the input buffer.
505
self.line_number += 1
506
if self.line_number > len(self.lines):
508
return self.lines[self.line_number - 1]
510
def readline_check_physical(self):
512
Check and return the next physical line. This method can be
513
used to feed tokenize.generate_tokens.
515
line = self.readline()
517
self.check_physical(line)
520
def run_check(self, check, argument_names):
525
for name in argument_names:
526
arguments.append(getattr(self, name))
527
return check(*arguments)
529
def check_physical(self, line):
531
Run all physical checks on a raw input line.
533
self.physical_line = line
534
if self.indent_char is None and len(line) and line[0] in ' \t':
535
self.indent_char = line[0]
536
for name, check, argument_names in self.physical_checks:
537
result = self.run_check(check, argument_names)
538
if result is not None:
539
offset, text = result
540
self.report_error(self.line_number, offset, text, check)
542
def build_tokens_line(self):
544
Build a logical line from tokens.
550
for token in self.tokens:
551
token_type, text = token[0:2]
552
if token_type in (tokenize.COMMENT, tokenize.NL,
553
tokenize.INDENT, tokenize.DEDENT,
556
if token_type == tokenize.STRING:
557
text = mute_string(text)
559
end_line, end = previous[3]
560
start_line, start = token[2]
561
if end_line != start_line: # different row
562
if self.lines[end_line - 1][end - 1] not in '{[(':
565
elif end != start: # different column
566
fill = self.lines[end_line - 1][end:start]
569
self.mapping.append((length, token))
573
self.logical_line = ''.join(logical)
574
assert self.logical_line.lstrip() == self.logical_line
575
assert self.logical_line.rstrip() == self.logical_line
577
def check_logical(self):
579
Build a line from tokens and run all logical checks on it.
581
options.counters['logical lines'] = \
582
options.counters.get('logical lines', 0) + 1
583
self.build_tokens_line()
584
first_line = self.lines[self.mapping[0][1][2][0] - 1]
585
indent = first_line[:self.mapping[0][1][2][1]]
586
self.previous_indent_level = self.indent_level
587
self.indent_level = expand_indent(indent)
588
if options.verbose >= 2:
589
print self.logical_line[:80].rstrip()
590
for name, check, argument_names in self.logical_checks:
591
if options.verbose >= 3:
593
result = self.run_check(check, argument_names)
594
if result is not None:
595
offset, text = result
596
if type(offset) is tuple:
597
original_number, original_offset = offset
599
for token_offset, token in self.mapping:
600
if offset >= token_offset:
601
original_number = token[2][0]
602
original_offset = (token[2][1]
603
+ offset - token_offset)
604
self.report_error(original_number, original_offset,
606
self.previous_logical = self.logical_line
610
Run all checks on the input file.
614
self.indent_char = None
615
self.indent_level = 0
616
self.previous_logical = ''
620
for token in tokenize.generate_tokens(self.readline_check_physical):
621
# print tokenize.tok_name[token[0]], repr(token)
622
self.tokens.append(token)
623
token_type, text = token[0:2]
624
if token_type == tokenize.OP and text in '([{':
626
if token_type == tokenize.OP and text in '}])':
628
if token_type == tokenize.NEWLINE and not parens:
632
if token_type == tokenize.NL and not parens:
633
self.blank_lines += 1
635
if token_type == tokenize.COMMENT:
636
source_line = token[4]
637
token_start = token[2][1]
638
if source_line[:token_start].strip() == '':
640
return self.file_errors
642
def report_error(self, line_number, offset, text, check):
644
Report an error, according to options.
646
if options.quiet == 1 and not self.file_errors:
647
message(self.filename)
648
self.file_errors += 1
650
options.counters[code] = options.counters.get(code, 0) + 1
651
options.messages[code] = text[5:]
654
if options.testsuite:
655
base = os.path.basename(self.filename)[:4]
658
if base[0] == 'E' and code[0] == 'W':
660
if ignore_code(code):
662
if options.counters[code] == 1 or options.repeat:
663
message("%s:%s:%d: %s" %
664
(self.filename, line_number, offset + 1, text))
665
if options.show_source:
666
line = self.lines[line_number - 1]
667
message(line.rstrip())
668
message(' ' * offset + '^')
669
if options.show_pep8:
670
message(check.__doc__.lstrip('\n').rstrip())
673
def input_file(filename):
675
Run all checks on a Python source file.
677
if excluded(filename) or not filename_match(filename):
680
message('checking ' + filename)
681
options.counters['files'] = options.counters.get('files', 0) + 1
682
errors = Checker(filename).check_all()
683
if options.testsuite and not errors:
684
message("%s: %s" % (filename, "no errors found"))
687
def input_dir(dirname):
689
Check all Python source files in this directory and all subdirectories.
691
dirname = dirname.rstrip('/')
692
if excluded(dirname):
694
for root, dirs, files in os.walk(dirname):
696
message('directory ' + root)
697
options.counters['directories'] = \
698
options.counters.get('directories', 0) + 1
704
for filename in files:
705
input_file(os.path.join(root, filename))
708
def excluded(filename):
710
Check if options.exclude contains a pattern that matches filename.
712
basename = os.path.basename(filename)
713
for pattern in options.exclude:
714
if fnmatch(basename, pattern):
715
# print basename, 'excluded because it matches', pattern
719
def filename_match(filename):
721
Check if options.filename contains a pattern that matches filename.
722
If options.filename is unspecified, this always returns True.
724
if not options.filename:
726
for pattern in options.filename:
727
if fnmatch(filename, pattern):
731
def ignore_code(code):
733
Check if options.ignore contains a prefix of the error code.
735
for ignore in options.ignore:
736
if code.startswith(ignore):
740
def get_error_statistics():
741
"""Get error statistics."""
742
return get_statistics("E")
745
def get_warning_statistics():
746
"""Get warning statistics."""
747
return get_statistics("W")
750
def get_statistics(prefix=''):
752
Get statistics for message codes that start with the prefix.
754
prefix='' matches all errors and warnings
755
prefix='E' matches all errors
756
prefix='W' matches all warnings
757
prefix='E4' matches all errors that have to do with imports
760
keys = options.messages.keys()
763
if key.startswith(prefix):
764
stats.append('%-7s %s %s' %
765
(options.counters[key], key, options.messages[key]))
769
def print_statistics(prefix=''):
770
"""Print overall statistics (number of errors and warnings)."""
771
for line in get_statistics(prefix):
775
def print_benchmark(elapsed):
777
Print benchmark numbers.
779
print '%-7.2f %s' % (elapsed, 'seconds elapsed')
780
keys = ['directories', 'files',
781
'logical lines', 'physical lines']
783
if key in options.counters:
784
print '%-7d %s per second (%d total)' % (
785
options.counters[key] / elapsed, key,
786
options.counters[key])
789
def process_options(arglist=None):
791
Process options passed either via arglist or via command line args.
794
usage = "%prog [options] input ..."
795
parser = OptionParser(usage)
796
parser.add_option('-v', '--verbose', default=0, action='count',
797
help="print status messages, or debug with -vv")
798
parser.add_option('-q', '--quiet', default=0, action='count',
799
help="report only file names, or nothing with -qq")
800
parser.add_option('--exclude', metavar='patterns', default=default_exclude,
801
help="skip matches (default %s)" % default_exclude)
802
parser.add_option('--filename', metavar='patterns',
803
help="only check matching files (e.g. *.py)")
804
parser.add_option('--ignore', metavar='errors', default='',
805
help="skip errors and warnings (e.g. E4,W)")
806
parser.add_option('--repeat', action='store_true',
807
help="show all occurrences of the same error")
808
parser.add_option('--show-source', action='store_true',
809
help="show source code for each error")
810
parser.add_option('--show-pep8', action='store_true',
811
help="show text of PEP 8 for each error")
812
parser.add_option('--statistics', action='store_true',
813
help="count errors and warnings")
814
parser.add_option('--benchmark', action='store_true',
815
help="measure processing speed")
816
parser.add_option('--testsuite', metavar='dir',
817
help="run regression tests from dir")
818
parser.add_option('--doctest', action='store_true',
819
help="run doctest on myself")
820
options, args = parser.parse_args(arglist)
821
if options.testsuite:
822
args.append(options.testsuite)
824
parser.error('input not specified')
825
options.prog = os.path.basename(sys.argv[0])
826
options.exclude = options.exclude.split(',')
827
for index in range(len(options.exclude)):
828
options.exclude[index] = options.exclude[index].rstrip('/')
830
options.filename = options.filename.split(',')
832
options.ignore = options.ignore.split(',')
835
options.counters = {}
836
options.messages = {}
843
Parse options and run checks on Python source.
845
options, args = process_options()
848
return doctest.testmod()
849
start_time = time.time()
851
if os.path.isdir(path):
855
elapsed = time.time() - start_time
856
if options.statistics:
858
if options.benchmark:
859
print_benchmark(elapsed)
862
if __name__ == '__main__':