1
"""Checker Manager and Checker classes."""
11
import multiprocessing
13
multiprocessing = None
15
from flake8 import defaults
16
from flake8 import exceptions
17
from flake8 import processor
18
from flake8 import utils
20
LOG = logging.getLogger(__name__)
22
SERIAL_RETRY_ERRNOS = set([
23
# ENOSPC: Added by sigmavirus24
24
# > On some operating systems (OSX), multiprocessing may cause an
25
# > ENOSPC error while trying to trying to create a Semaphore.
26
# > In those cases, we should replace the customized Queue Report
27
# > class with pep8's StandardReport class to ensure users don't run
28
# > into this problem.
29
# > (See also: https://gitlab.com/pycqa/flake8/issues/74)
31
# NOTE(sigmavirus24): When adding to this list, include the reasoning
32
# on the lines before the error code and always append your error
33
# code. Further, please always add a trailing `,` to reduce the visual
38
class Manager(object):
39
"""Manage the parallelism and checker instances for each plugin and file.
41
This class will be responsible for the following:
43
- Determining the parallelism of Flake8, e.g.:
45
* Do we use :mod:`multiprocessing` or is it unavailable?
47
* Do we automatically decide on the number of jobs to use or did the
50
- Falling back to a serial way of processing files if we run into an
51
OSError related to :mod:`multiprocessing`
53
- Organizing the results of each checker so we can group the output
54
together and make our output deterministic.
57
def __init__(self, style_guide, arguments, checker_plugins):
58
"""Initialize our Manager instance.
61
The instantiated style guide for this instance of Flake8.
63
flake8.style_guide.StyleGuide
64
:param list arguments:
65
The extra arguments parsed from the CLI (if any)
66
:param checker_plugins:
67
The plugins representing checks parsed from entry-points.
68
:type checker_plugins:
69
flake8.plugins.manager.Checkers
71
self.arguments = arguments
72
self.style_guide = style_guide
73
self.options = style_guide.options
74
self.checks = checker_plugins
75
self.jobs = self._job_count()
76
self.using_multiprocessing = self.jobs > 1
87
if self.using_multiprocessing:
89
self.pool = multiprocessing.Pool(self.jobs, _pool_init)
90
except OSError as oserr:
91
if oserr.errno not in SERIAL_RETRY_ERRNOS:
93
self.using_multiprocessing = False
95
def _process_statistics(self):
96
for checker in self.checkers:
97
for statistic in defaults.STATISTIC_NAMES:
98
self.statistics[statistic] += checker.statistics[statistic]
99
self.statistics['files'] += len(self.checkers)
101
def _job_count(self):
103
# First we walk through all of our error cases:
104
# - multiprocessing library is not present
105
# - we're running on windows in which case we know we have significant
106
# implemenation issues
107
# - the user provided stdin and that's not something we can handle
109
# - we're processing a diff, which again does not work well with
110
# multiprocessing and which really shouldn't require multiprocessing
111
# - the user provided some awful input
112
if not multiprocessing:
113
LOG.warning('The multiprocessing module is not available. '
114
'Ignoring --jobs arguments.')
117
if (utils.is_windows() and
118
not utils.can_run_multiprocessing_on_windows()):
119
LOG.warning('The --jobs option is not available on Windows due to'
120
' a bug (https://bugs.python.org/issue27649) in '
121
'Python 2.7.11+ and 3.3+. We have detected that you '
122
'are running an unsupported version of Python on '
123
'Windows. Ignoring --jobs arguments.')
126
if utils.is_using_stdin(self.arguments):
127
LOG.warning('The --jobs option is not compatible with supplying '
128
'input using - . Ignoring --jobs arguments.')
131
if self.options.diff:
132
LOG.warning('The --diff option was specified with --jobs but '
133
'they are not compatible. Ignoring --jobs arguments.')
136
jobs = self.options.jobs
137
if jobs != 'auto' and not jobs.isdigit():
138
LOG.warning('"%s" is not a valid parameter to --jobs. Must be one '
139
'of "auto" or a numerical value, e.g., 4.', jobs)
142
# If the value is "auto", we want to let the multiprocessing library
143
# decide the number based on the number of CPUs. However, if that
144
# function is not implemented for this particular value of Python we
148
return multiprocessing.cpu_count()
149
except NotImplementedError:
152
# Otherwise, we know jobs should be an integer and we can just convert
156
def _handle_results(self, filename, results):
157
style_guide = self.style_guide
158
reported_results_count = 0
159
for (error_code, line_number, column, text, physical_line) in results:
160
reported_results_count += style_guide.handle_error(
163
line_number=line_number,
164
column_number=column,
166
physical_line=physical_line,
168
return reported_results_count
170
def is_path_excluded(self, path):
171
# type: (str) -> bool
172
"""Check if a path is excluded.
175
Path to check against the exclude patterns.
177
True if there are exclude patterns and the path matches,
183
if self.options.stdin_display_name == 'stdin':
185
path = self.options.stdin_display_name
187
exclude = self.options.exclude
190
basename = os.path.basename(path)
191
if utils.fnmatch(basename, exclude):
192
LOG.debug('"%s" has been excluded', basename)
195
absolute_path = os.path.abspath(path)
196
match = utils.fnmatch(absolute_path, exclude)
197
LOG.debug('"%s" has %sbeen excluded', absolute_path,
198
'' if match else 'not ')
201
def make_checkers(self, paths=None):
202
# type: (List[str]) -> NoneType
203
"""Create checkers for each file."""
205
paths = self.arguments
210
filename_patterns = self.options.filename
211
running_from_vcs = self.options._running_from_vcs
213
# NOTE(sigmavirus24): Yes this is a little unsightly, but it's our
214
# best solution right now.
215
def should_create_file_checker(filename, argument):
216
"""Determine if we should create a file checker."""
217
matches_filename_patterns = utils.fnmatch(
218
filename, filename_patterns
220
is_stdin = filename == '-'
221
file_exists = os.path.exists(filename)
222
# NOTE(sigmavirus24): If a user explicitly specifies something,
223
# e.g, ``flake8 bin/script`` then we should run Flake8 against
224
# that. Since should_create_file_checker looks to see if the
225
# filename patterns match the filename, we want to skip that in
226
# the event that the argument and the filename are identical.
227
# If it was specified explicitly, the user intended for it to be
229
explicitly_provided = (not running_from_vcs and
230
(argument == filename))
231
return ((file_exists and
232
(explicitly_provided or matches_filename_patterns)) or
235
checks = self.checks.to_dictionary()
237
FileChecker(filename, checks, self.options)
238
for argument in paths
239
for filename in utils.filenames_from(argument,
240
self.is_path_excluded)
241
if should_create_file_checker(filename, argument)
244
checker for checker in checkers if checker.should_process
246
LOG.info('Checking %d files', len(self.checkers))
249
# type: () -> (int, int)
250
"""Report all of the errors found in the managed file checkers.
252
This iterates over each of the checkers and reports the errors sorted
256
A tuple of the total results found and the results reported.
260
results_reported = results_found = 0
261
for checker in self.checkers:
262
results = sorted(checker.results, key=lambda tup: (tup[1], tup[2]))
263
filename = checker.display_name
264
with self.style_guide.processing_file(filename):
265
results_reported += self._handle_results(filename, results)
266
results_found += len(results)
267
return (results_found, results_reported)
269
def _force_cleanup(self):
270
if self.pool is not None:
271
self.pool.terminate()
274
def run_parallel(self):
275
"""Run the checkers in parallel."""
276
final_results = collections.defaultdict(list)
277
final_statistics = collections.defaultdict(dict)
278
pool_map = self.pool.imap_unordered(
281
chunksize=calculate_pool_chunksize(
287
filename, results, statistics = ret
288
final_results[filename] = results
289
final_statistics[filename] = statistics
294
for checker in self.checkers:
295
filename = checker.display_name
296
checker.results = sorted(final_results[filename],
297
key=lambda tup: (tup[2], tup[2]))
298
checker.statistics = final_statistics[filename]
300
def run_serial(self):
301
"""Run the checkers in serial."""
302
for checker in self.checkers:
306
"""Run all the checkers.
308
This will intelligently decide whether to run the checks in parallel
309
or whether to run them in serial.
311
If running the checks in parallel causes a problem (e.g.,
312
https://gitlab.com/pycqa/flake8/issues/74) this also implements
313
fallback to serial processing.
316
if self.using_multiprocessing:
320
except OSError as oserr:
321
if oserr.errno not in SERIAL_RETRY_ERRNOS:
324
LOG.warning('Running in serial after OS exception, %r', oserr)
326
except KeyboardInterrupt:
327
LOG.warning('Flake8 was interrupted by the user')
328
raise exceptions.EarlyQuit('Early quit while running checks')
330
self._force_cleanup()
332
def start(self, paths=None):
333
"""Start checking files.
336
Path names to check. This is passed directly to
337
:meth:`~Manager.make_checkers`.
339
LOG.info('Making checkers')
340
self.make_checkers(paths)
343
"""Stop checking files."""
344
self._process_statistics()
345
for proc in self.processes:
346
LOG.info('Joining %s to the main process', proc.name)
350
class FileChecker(object):
351
"""Manage running checks for a file and aggregate the results."""
353
def __init__(self, filename, checks, options):
354
"""Initialize our file checker.
357
Name of the file to check.
359
The plugins registered to check the file.
363
Parsed option values from config and command-line.
367
self.options = options
368
self.filename = filename
376
self.processor = self._make_processor()
377
self.display_name = filename
378
self.should_process = False
379
if self.processor is not None:
380
self.display_name = self.processor.filename
381
self.should_process = not self.processor.should_ignore_file()
382
self.statistics['physical lines'] = len(self.processor.lines)
384
def _make_processor(self):
386
return processor.FileProcessor(self.filename, self.options)
388
# If we can not read the file due to an IOError (e.g., the file
389
# does not exist or we do not have the permissions to open it)
390
# then we need to format that exception for the user.
391
# NOTE(sigmavirus24): Historically, pep8 has always reported this
392
# as an E902. We probably *want* a better error code for this
394
(exc_type, exception) = sys.exc_info()[:2]
395
message = '{0}: {1}'.format(exc_type.__name__, exception)
396
self.report('E902', 0, 0, message)
399
def report(self, error_code, line_number, column, text, line=None):
400
# type: (str, int, int, str) -> str
401
"""Report an error by storing it in the results list."""
402
if error_code is None:
403
error_code, text = text.split(' ', 1)
406
# If we're recovering from a problem in _make_processor, we will not
407
# have this attribute.
408
if not physical_line and getattr(self, 'processor', None):
409
physical_line = self.processor.line_for(line_number)
411
error = (error_code, line_number, column, text, physical_line)
412
self.results.append(error)
415
def run_check(self, plugin, **arguments):
416
"""Run the check in a single plugin."""
417
LOG.debug('Running %r with %r', plugin, arguments)
419
self.processor.keyword_arguments_for(
420
plugin['parameters'],
423
except AttributeError as ae:
424
LOG.error('Plugin requested unknown parameters.')
425
raise exceptions.PluginRequestedUnknownParameters(
429
return plugin['plugin'](**arguments)
432
def _extract_syntax_information(exception):
434
if len(exception.args) > 1:
435
token = exception.args[1]
437
row, column = token[1:3]
441
if column > 0 and token and isinstance(exception, SyntaxError):
442
# NOTE(sigmavirus24): SyntaxErrors report 1-indexed column
443
# numbers. We need to decrement the column number by 1 at
447
# See also: https://gitlab.com/pycqa/flake8/issues/237
448
physical_line = token[-1]
450
# NOTE(sigmavirus24): Not all "tokens" have a string as the last
451
# argument. In this event, let's skip trying to find the correct
452
# column and row values.
453
if physical_line is not None:
454
# NOTE(sigmavirus24): SyntaxErrors also don't exactly have a
455
# "physical" line so much as what was accumulated by the point
457
# See also: https://gitlab.com/pycqa/flake8/issues/237
458
lines = physical_line.rstrip('\n').split('\n')
459
row_offset = len(lines) - 1
460
logical_line = lines[0]
461
logical_line_length = len(logical_line)
462
if column > logical_line_length:
463
column = logical_line_length
465
column -= column_offset
468
def run_ast_checks(self):
469
"""Run all checks expecting an abstract syntax tree."""
471
ast = self.processor.build_ast()
472
except (ValueError, SyntaxError, TypeError):
473
(exc_type, exception) = sys.exc_info()[:2]
474
row, column = self._extract_syntax_information(exception)
475
self.report('E999', row, column, '%s: %s' %
476
(exc_type.__name__, exception.args[0]))
479
for plugin in self.checks['ast_plugins']:
480
checker = self.run_check(plugin, tree=ast)
481
# If the plugin uses a class, call the run method of it, otherwise
482
# the call should return something iterable itself
484
runner = checker.run()
485
except AttributeError:
487
for (line_number, offset, text, check) in runner:
490
line_number=line_number,
495
def run_logical_checks(self):
496
"""Run all checks expecting a logical line."""
497
comments, logical_line, mapping = self.processor.build_logical_line()
500
self.processor.update_state(mapping)
502
LOG.debug('Logical line: "%s"', logical_line.rstrip())
504
for plugin in self.checks['logical_line_plugins']:
505
self.processor.update_checker_state_for(plugin)
506
results = self.run_check(plugin, logical_line=logical_line) or ()
507
for offset, text in results:
508
offset = find_offset(offset, mapping)
509
line_number, column_offset = offset
512
line_number=line_number,
513
column=column_offset,
517
self.processor.next_logical_line()
519
def run_physical_checks(self, physical_line, override_error_line=None):
520
"""Run all checks for a given physical line."""
521
for plugin in self.checks['physical_line_plugins']:
522
self.processor.update_checker_state_for(plugin)
523
result = self.run_check(plugin, physical_line=physical_line)
524
if result is not None:
525
column_offset, text = result
526
error_code = self.report(
528
line_number=self.processor.line_number,
529
column=column_offset,
531
line=(override_error_line or physical_line),
534
self.processor.check_physical_error(error_code, physical_line)
536
def process_tokens(self):
537
"""Process tokens and trigger checks.
539
This can raise a :class:`flake8.exceptions.InvalidSyntax` exception.
540
Instead of using this directly, you should use
541
:meth:`flake8.checker.FileChecker.run_checks`.
544
statistics = self.statistics
545
file_processor = self.processor
546
for token in file_processor.generate_tokens():
547
statistics['tokens'] += 1
548
self.check_physical_eol(token)
549
token_type, text = token[0:2]
550
processor.log_token(LOG, token)
551
if token_type == tokenize.OP:
552
parens = processor.count_parentheses(parens, text)
554
if processor.token_is_newline(token):
555
self.handle_newline(token_type)
556
elif (processor.token_is_comment(token) and
557
len(file_processor.tokens) == 1):
558
self.handle_comment(token, text)
560
if file_processor.tokens:
561
# If any tokens are left over, process them
562
self.run_physical_checks(file_processor.lines[-1])
563
self.run_logical_checks()
565
def run_checks(self):
566
"""Run checks against the file."""
568
self.process_tokens()
569
except exceptions.InvalidSyntax as exc:
570
self.report(exc.error_code, exc.line_number, exc.column_number,
573
self.run_ast_checks()
575
logical_lines = self.processor.statistics['logical lines']
576
self.statistics['logical lines'] = logical_lines
577
return self.filename, self.results, self.statistics
579
def handle_comment(self, token, token_text):
580
"""Handle the logic when encountering a comment token."""
581
# The comment also ends a physical line
583
token[1] = token_text.rstrip('\r\n')
584
token[3] = (token[2][0], token[2][1] + len(token[1]))
585
self.processor.tokens = [tuple(token)]
586
self.run_logical_checks()
588
def handle_newline(self, token_type):
589
"""Handle the logic when encountering a newline token."""
590
if token_type == tokenize.NEWLINE:
591
self.run_logical_checks()
592
self.processor.reset_blank_before()
593
elif len(self.processor.tokens) == 1:
594
# The physical line contains only this token.
595
self.processor.visited_new_blank_line()
596
self.processor.delete_first_token()
598
self.run_logical_checks()
600
def check_physical_eol(self, token):
601
"""Run physical checks if and only if it is at the end of the line."""
602
if processor.is_eol_token(token):
603
# Obviously, a newline token ends a single physical line.
604
self.run_physical_checks(token[4])
605
elif processor.is_multiline_string(token):
606
# Less obviously, a string that contains newlines is a
607
# multiline string, either triple-quoted or with internal
608
# newlines backslash-escaped. Check every physical line in the
609
# string *except* for the last one: its newline is outside of
610
# the multiline string, so we consider it a regular physical
611
# line, and will check it like any other physical line.
614
# - have to wind self.line_number back because initially it
615
# points to the last line of the string, and we want
616
# check_physical() to give accurate feedback
617
line_no = token[2][0]
618
with self.processor.inside_multiline(line_number=line_no):
619
for line in self.processor.split_line(token):
620
self.run_physical_checks(line + '\n',
621
override_error_line=token[4])
625
"""Ensure correct signaling of ^C using multiprocessing.Pool."""
626
signal.signal(signal.SIGINT, signal.SIG_IGN)
629
def calculate_pool_chunksize(num_checkers, num_jobs):
630
"""Determine the chunksize for the multiprocessing Pool.
632
- For chunksize, see: https://docs.python.org/3/library/multiprocessing.html#multiprocessing.pool.Pool.imap # noqa
633
- This formula, while not perfect, aims to give each worker two batches of
635
- See: https://gitlab.com/pycqa/flake8/merge_requests/156#note_18878876
636
- See: https://gitlab.com/pycqa/flake8/issues/265
638
return max(num_checkers // (num_jobs * 2), 1)
641
def _run_checks(checker):
642
return checker.run_checks()
645
def find_offset(offset, mapping):
646
"""Find the offset tuple for a single offset."""
647
if isinstance(offset, tuple):
650
for token_offset, position in mapping:
651
if offset <= token_offset:
653
return (position[0], position[1] + offset - token_offset)