2
# -*- coding: utf-8 -*-
4
# This program is free software; you can redistribute it and/or modify
5
# it under the terms of the GNU General Public License as published by
6
# the Free Software Foundation; version 2 of the License.
8
# This program is distributed in the hope that it will be useful,
9
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
# GNU General Public License for more details.
13
# You should have received a copy of the GNU General Public License
14
# along with this program; if not, see
15
# http://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
17
Copyright (C) 2010-2011 Lucas De Marchi <lucas.de.marchi@gmail.com>
18
Copyright (C) 2011 ProFUSION embedded systems
23
from optparse import OptionParser
28
\t%prog [OPTIONS] [file1 file2 ... fileN]
37
encodings = [ 'utf-8', 'iso-8859-1' ]
38
default_dictionary = os.path.join(os.path.dirname(__file__), 'data', 'dictionary.txt')
43
# dict_filename The file containing the dictionary of misspellings.
44
# If set to '-', it will be read from stdin
45
# file1 .. fileN Files to check spelling
52
NON_AUTOMATIC_FIXES = 8
56
def __init__(self, pattern):
58
self.pattern_list = pattern.split(',')
60
self.pattern_list = None
62
def match(self, filename):
63
if self.pattern_list is None:
66
for p in self.pattern_list:
67
if fnmatch.fnmatch(filename, p):
73
def __init__(self, data, fix, reason):
80
self.FILE = '\033[33m'
81
self.WWORD = '\033[31m'
82
self.FWORD = '\033[32m'
83
self.DISABLE = '\033[0m'
95
def update(self, wrongword):
96
if wrongword in self.summary:
97
self.summary[wrongword] += 1
99
self.summary[wrongword] = 1
102
keys = list(self.summary.keys())
105
return "\n".join(["{0}{1:{width}}".format(key, self.summary.get(key), width=15 - len(key)) for key in keys])
108
def __init__(self, use_chardet):
109
self.use_chardet = use_chardet
113
def init_chardet(self):
115
from chardet.universaldetector import UniversalDetector
117
raise Exception("There's no chardet installed to import from. "
118
"Please, install it and check your PYTHONPATH "
119
"environment variable")
121
self.encdetector = UniversalDetector()
123
def open(self, filename):
125
return self.open_with_chardet(filename)
127
return self.open_with_internal(filename)
129
def open_with_chardet(self, filename):
130
self.encdetector.reset()
131
with open(filename, 'rb') as f:
133
self.encdetector.feed(line)
134
if self.encdetector.done:
136
self.encdetector.close()
137
encoding = self.encdetector.result['encoding']
140
f = open(filename, encoding=encoding)
141
lines = f.readlines()
142
except UnicodeDecodeError:
143
print('ERROR: Could not detect encoding: %s' % filename,
147
print('ERROR: %s -- Don\'t know how to handle encoding %s'
148
% (filename, encoding), file=sys.stderr)
153
return lines, encoding
156
def open_with_internal(self, filename):
162
f = open(filename, 'r', encoding=encodings[curr])
163
lines = f.readlines()
165
except UnicodeDecodeError:
166
if not quiet_level & QuietLevels.ENCODING:
167
print('WARNING: Decoding file %s' % filename,
169
print('WARNING: using encoding=%s failed. '
172
print('WARNING: Trying next encoding: %s' % encodings[curr],
181
print('ERROR: Could not detect encoding: %s' % filename,
183
raise Exception('Unknown encoding')
185
encoding = encodings[curr]
187
return lines, encoding
189
# -.-:-.-:-.-:-.:-.-:-.-:-.-:-.-:-.:-.-:-.-:-.-:-.-:-.:-.-:-
191
def parse_options(args):
192
parser = OptionParser(usage=USAGE, version=VERSION)
194
parser.add_option('-d', '--disable-colors',
195
action = 'store_true', default = False,
196
help = 'Disable colors even when printing to terminal')
197
parser.add_option('-w', '--write-changes',
198
action = 'store_true', default = False,
199
help = 'write changes in place if possible')
200
parser.add_option('-D', '--dictionary',
201
action = 'store', metavar='FILE',
202
default = default_dictionary,
203
help = 'Custom dictionary file that contains spelling '\
204
'corrections. If this flag is not specified '\
205
'then default dictionary "%s" is used.' %
208
parser.add_option('-s', '--summary',
209
action = 'store_true', default = False,
210
help = 'print summary of fixes')
212
parser.add_option('-S', '--skip',
213
help = 'Comma-separated list of files to skip. It '\
214
'accepts globs as well. E.g.: if you want '\
215
'codespell to skip .eps and .txt files, '\
216
'you\'d give "*.eps,*.txt" to this option. '\
217
'It is especially useful if you are using in '\
218
'conjunction with -r option.')
220
parser.add_option('-x', '--exclude-file',
221
help = 'FILE with lines that should not be changed',
224
parser.add_option('-i', '--interactive',
225
action='store', type='int', default=0,
226
help = 'Set interactive mode when writing changes. ' \
227
'0 is the same of no interactivity; 1 makes ' \
228
'codespell ask confirmation; 2 ask user to ' \
229
'choose one fix when more than one is ' \
230
'available; 3 applies both 1 and 2')
232
parser.add_option('-q', '--quiet-level',
233
action='store', type='int', default=0,
234
help = 'Bitmask that allows codespell to run quietly. '\
235
'0: the default, in which all messages are '\
236
'printed. 1: disable warnings about wrong '\
237
'encoding. 2: disable warnings about binary '\
238
'file. 4: shut down warnings about automatic '\
239
'fixes that were disabled in dictionary. '\
240
'8: don\'t print anything for non-automatic '\
241
'fixes. 16: don\'t print fixed files.')
243
parser.add_option('-e', '--hard-encoding-detection',
244
action='store_true', default = False,
245
help = 'Use chardet to detect the encoding of each '\
246
'file. This can slow down codespell, but is more '\
247
'reliable in detecting encodings other than utf-8, '\
248
'iso8859-1 and ascii.')
251
(o, args) = parser.parse_args()
253
if not os.path.exists(o.dictionary):
254
print('ERROR: cannot find dictionary file!', file=sys.stderr)
263
def build_exclude_hashes(filename):
264
with open(filename, 'r') as f:
266
exclude_lines.add(line)
268
def build_dict(filename):
269
with open(filename, 'r', 1, 'utf-8') as f:
271
[key, data] = line.split('->')
273
fix = data.rfind(',')
278
elif fix == (len(data) - 1):
283
reason = data[fix + 1:].strip()
287
misspellings[key] = Misspell(data, fix, reason)
289
def ishidden(filename):
290
bfilename = os.path.basename(filename)
292
if bfilename != '' and bfilename != '.' and bfilename != '..' \
293
and bfilename[0] == '.':
299
def istextfile(filename):
300
with open(filename, mode='rb') as f:
307
def fix_case(word, fixword):
308
if word == word.capitalize():
309
return fixword.capitalize()
310
elif word == word.upper():
311
return fixword.upper()
312
# they are both lower case
313
# or we don't have any idea
316
def ask_for_word_fix(line, wrongword, misspelling, interactivity):
317
if interactivity <= 0:
318
return misspelling.fix, fix_case(wrongword, misspelling.data)
320
if misspelling.fix and interactivity & 1:
322
fixword = fix_case(wrongword, misspelling.data)
324
print("%s\t%s ==> %s (Y/n) " % (line, wrongword, fixword), end='')
325
r = sys.stdin.readline().strip().upper()
327
if r != 'Y' and r != 'N':
328
print("Say 'y' or 'n'")
332
misspelling.fix = False
333
misspelling.fixword = ''
335
elif (interactivity & 2) and not misspelling.reason:
336
# if it is not disabled, i.e. it just has more than one possible fix,
337
# we ask the user which word to use
340
opt = list(map(lambda x: x.strip(), misspelling.data.split(',')))
342
print("%s Choose an option (blank for none): " % line, end='')
343
for i in range(len(opt)):
344
fixword = fix_case(wrongword, opt[i])
345
print(" %d) %s" % (i, fixword), end='')
349
n = sys.stdin.readline().strip()
356
except (ValueError, IndexError):
357
print("Not a valid option\n")
360
misspelling.fix = True
363
return misspelling.fix, fix_case(wrongword, misspelling.data)
365
def parse_file(filename, colors, summary):
373
encoding = encodings[0] # if not defined, use UTF-8
377
lines = f.readlines()
379
# ignore binary files
380
if not istextfile(filename):
381
if not quiet_level & QuietLevels.BINARY_FILE:
382
print("WARNING: Binary file: %s " % filename, file=sys.stderr)
385
lines, encoding = fileopener.open(filename)
390
rx = re.compile(r"[\w\-']+")
392
if line in exclude_lines:
399
for word in rx.findall(line):
401
if lword in misspellings:
402
fix = misspellings[lword].fix
403
fixword = fix_case(word, misspellings[lword].data)
405
if options.interactive and not lword in asked_for:
406
fix, fixword = ask_for_word_fix(lines[i - 1], word,
412
summary.update(lword)
414
if word in fixed_words:
417
if options.write_changes and fix:
419
lines[i - 1] = re.sub(r'\b%s\b' % word, fixword, lines[i - 1])
420
fixed_words.add(word)
423
# otherwise warning was explicitly set by interactive mode
424
if options.interactive & 2 and not fix and not misspellings[lword].reason:
427
cfilename = "%s%s%s" % (colors.FILE, filename, colors.DISABLE)
428
cline = "%s%d%s" % (colors.FILE, i, colors.DISABLE)
429
cwrongword = "%s%s%s" % (colors.WWORD, word, colors.DISABLE)
430
crightword = "%s%s%s" % (colors.FWORD, fixword, colors.DISABLE)
432
if misspellings[lword].reason:
433
if quiet_level & QuietLevels.DISABLED_FIXES:
436
creason = " | %s%s%s" % (colors.FILE,
437
misspellings[lword].reason,
440
if quiet_level & QuietLevels.NON_AUTOMATIC_FIXES:
446
print("%(FILENAME)s:%(LINE)s: %(WRONGWORD)s " \
447
" ==> %(RIGHTWORD)s%(REASON)s"
448
% {'FILENAME': cfilename, 'LINE': cline,
449
'WRONGWORD': cwrongword,
450
'RIGHTWORD': crightword, 'REASON': creason })
452
print('%(LINE)s: %(STRLINE)s\n\t%(WRONGWORD)s ' \
453
'==> %(RIGHTWORD)s%(REASON)s'
454
% { 'LINE': cline, 'STRLINE': line.strip(),
455
'WRONGWORD': cwrongword,
456
'RIGHTWORD': crightword, 'REASON': creason })
465
if not quiet_level & QuietLevels.FIXES:
466
print("%sFIXED:%s %s" % (colors.FWORD, colors.DISABLE, filename),
468
f = open(filename, 'w', encoding=encoding)
477
(options, args) = parse_options(args)
479
build_dict(options.dictionary)
480
colors = TermColors();
481
if options.disable_colors:
489
if options.exclude_file:
490
build_exclude_hashes(options.exclude_file)
492
if options.quiet_level:
493
quiet_level = options.quiet_level
495
fileopener = FileOpener(options.hard_encoding_detection)
497
glob_match = GlobMatch(options.skip)
499
for filename in args:
500
# ignore hidden files
501
if ishidden(filename):
504
if os.path.isdir(filename):
505
for root, dirs, files in os.walk(filename):
514
if os.path.islink(file):
516
if glob_match.match(file):
518
parse_file(os.path.join(root, file), colors, summary)
522
parse_file(filename, colors, summary)
525
print("\n-------8<-------\nSUMMARY:")
528
if __name__ == '__main__':
529
sys.exit(main(*sys.argv))