~ubuntu-branches/ubuntu/utopic/codespell/utopic : revision 1

1

#!/usr/bin/env python3

2

# -*- coding: utf-8 -*-

3

#

4

# This program is free software; you can redistribute it and/or modify

5

# it under the terms of the GNU General Public License as published by

6

# the Free Software Foundation; version 2 of the License.

7

#

8

# This program is distributed in the hope that it will be useful,

9

# but WITHOUT ANY WARRANTY; without even the implied warranty of

10

# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

11

# GNU General Public License for more details.

12

#

13

# You should have received a copy of the GNU General Public License

14

# along with this program; if not, see

15

# http://www.gnu.org/licenses/old-licenses/gpl-2.0.html.

16

"""

17

18

19

"""

20

21

import sys

22

import re

23

from optparse import OptionParser

24

import os

25

import fnmatch

26

27

USAGE = """

28

\t%prog [OPTIONS] [file1 file2 ... fileN]

29

"""

30

VERSION = '1.6'

31

32

misspellings = {}

33

exclude_lines = set()

34

options = None

35

fileopener = None

36

quiet_level = 0

37

encodings = [ 'utf-8', 'iso-8859-1' ]

38

default_dictionary = os.path.join(os.path.dirname(__file__), 'data', 'dictionary.txt')

39

40

#OPTIONS:

41

#

42

#ARGUMENTS:

43

# dict_filename The file containing the dictionary of misspellings.

44

# If set to '-', it will be read from stdin

45

# file1 .. fileN Files to check spelling

46

47

class QuietLevels:

48

NONE = 0

49

ENCODING = 1

50

BINARY_FILE = 2

51

DISABLED_FIXES = 4

52

NON_AUTOMATIC_FIXES = 8

53

FIXES = 16

54

55

class GlobMatch:

56

def __init__(self, pattern):

57

if pattern:

58

self.pattern_list = pattern.split(',')

59

else:

60

self.pattern_list = None

61

62

def match(self, filename):

63

if self.pattern_list is None:

64

return False

65

66

for p in self.pattern_list:

67

if fnmatch.fnmatch(filename, p):

68

return True

69

70

return False

71

72

class Misspell:

73

def __init__(self, data, fix, reason):

74

self.data = data

75

self.fix = fix

76

self.reason = reason

77

78

class TermColors:

79

def __init__(self):

80

self.FILE = '\033[33m'

81

self.WWORD = '\033[31m'

82

self.FWORD = '\033[32m'

83

self.DISABLE = '\033[0m'

84

85

def disable(self):

86

self.FILE = ''

87

self.WWORD = ''

88

self.FWORD = ''

89

self.DISABLE = ''

90

91

class Summary:

92

def __init__(self):

93

self.summary = {}

94

95

def update(self, wrongword):

96

if wrongword in self.summary:

97

self.summary[wrongword] += 1

98

else:

99

self.summary[wrongword] = 1

100

101

def __str__(self):

102

keys = list(self.summary.keys())

103

keys.sort()

104

105

return "\n".join(["{0}{1:{width}}".format(key, self.summary.get(key), width=15 - len(key)) for key in keys])

106

107

class FileOpener:

108

def __init__(self, use_chardet):

109

self.use_chardet = use_chardet

110

if use_chardet:

111

self.init_chardet()

112

113

def init_chardet(self):

114

try:

115

from chardet.universaldetector import UniversalDetector

116

except ImportError:

117

raise Exception("There's no chardet installed to import from. "

118

"Please, install it and check your PYTHONPATH "

119

"environment variable")

120

121

self.encdetector = UniversalDetector()

122

123

def open(self, filename):

124

if self.use_chardet:

125

return self.open_with_chardet(filename)

126

else:

127

return self.open_with_internal(filename)

128

129

def open_with_chardet(self, filename):

130

self.encdetector.reset()

131

with open(filename, 'rb') as f:

132

for line in f:

133

self.encdetector.feed(line)

134

if self.encdetector.done:

135

break

136

self.encdetector.close()

137

encoding = self.encdetector.result['encoding']

138

139

try:

140

f = open(filename, encoding=encoding)

141

lines = f.readlines()

142

except UnicodeDecodeError:

143

print('ERROR: Could not detect encoding: %s' % filename,

144

file=sys.stderr)

145

raise

146

except LookupError:

147

print('ERROR: %s -- Don\'t know how to handle encoding %s'

148

% (filename, encoding), file=sys.stderr)

149

raise

150

finally:

151

f.close()

152

153

return lines, encoding

154

155

156

def open_with_internal(self, filename):

157

curr = 0

158

global encodings

159

160

while True:

161

try:

162

f = open(filename, 'r', encoding=encodings[curr])

163

lines = f.readlines()

164

break

165

except UnicodeDecodeError:

166

if not quiet_level & QuietLevels.ENCODING:

167

print('WARNING: Decoding file %s' % filename,

168

file=sys.stderr)

169

print('WARNING: using encoding=%s failed. '

170

% encodings[curr],

171

file=sys.stderr)

172

print('WARNING: Trying next encoding: %s' % encodings[curr],

173

file=sys.stderr)

174

175

curr += 1

176

177

finally:

178

f.close()

179

180

if not lines:

181

print('ERROR: Could not detect encoding: %s' % filename,

182

file=sys.stderr)

183

raise Exception('Unknown encoding')

184

185

encoding = encodings[curr]

186

187

return lines, encoding

188

189

# -.-:-.-:-.-:-.:-.-:-.-:-.-:-.-:-.:-.-:-.-:-.-:-.-:-.:-.-:-

190

191

def parse_options(args):

192

parser = OptionParser(usage=USAGE, version=VERSION)

193

194

parser.add_option('-d', '--disable-colors',

195

action = 'store_true', default = False,

196

help = 'Disable colors even when printing to terminal')

197

parser.add_option('-w', '--write-changes',

198

action = 'store_true', default = False,

199

help = 'write changes in place if possible')

200

parser.add_option('-D', '--dictionary',

201

action = 'store', metavar='FILE',

202

default = default_dictionary,

203

help = 'Custom dictionary file that contains spelling '\

204

'corrections. If this flag is not specified '\

205

'then default dictionary "%s" is used.' %

206

default_dictionary)

207

208

parser.add_option('-s', '--summary',

209

action = 'store_true', default = False,

210

help = 'print summary of fixes')

211

212

parser.add_option('-S', '--skip',

213

help = 'Comma-separated list of files to skip. It '\

214

'accepts globs as well. E.g.: if you want '\

215

'codespell to skip .eps and .txt files, '\

216

'you\'d give "*.eps,*.txt" to this option. '\

217

'It is especially useful if you are using in '\

218

'conjunction with -r option.')

219

220

parser.add_option('-x', '--exclude-file',

221

help = 'FILE with lines that should not be changed',

222

metavar='FILE')

223

224

parser.add_option('-i', '--interactive',

225

action='store', type='int', default=0,

226

help = 'Set interactive mode when writing changes. ' \

227

'0 is the same of no interactivity; 1 makes ' \

228

'codespell ask confirmation; 2 ask user to ' \

229

'choose one fix when more than one is ' \

230

'available; 3 applies both 1 and 2')

231

232

parser.add_option('-q', '--quiet-level',

233

action='store', type='int', default=0,

234

help = 'Bitmask that allows codespell to run quietly. '\

235

'0: the default, in which all messages are '\

236

'printed. 1: disable warnings about wrong '\

237

'encoding. 2: disable warnings about binary '\

238

'file. 4: shut down warnings about automatic '\

239

'fixes that were disabled in dictionary. '\

240

'8: don\'t print anything for non-automatic '\

241

'fixes. 16: don\'t print fixed files.')

242

243

parser.add_option('-e', '--hard-encoding-detection',

244

action='store_true', default = False,

245

help = 'Use chardet to detect the encoding of each '\

246

'file. This can slow down codespell, but is more '\

247

'reliable in detecting encodings other than utf-8, '\

248

'iso8859-1 and ascii.')

249

250

251

(o, args) = parser.parse_args()

252

253

if not os.path.exists(o.dictionary):

254

print('ERROR: cannot find dictionary file!', file=sys.stderr)

255

parser.print_help()

256

sys.exit(1)

257

258

if not args:

259

args.append('.')

260

261

return o, args

262

263

def build_exclude_hashes(filename):

264

with open(filename, 'r') as f:

265

for line in f:

266

exclude_lines.add(line)

267

268

def build_dict(filename):

269

with open(filename, 'r', 1, 'utf-8') as f:

270

for line in f:

271

[key, data] = line.split('->')

272

data = data.strip()

273

fix = data.rfind(',')

274

275

if fix < 0:

276

fix = True

277

reason = ''

278

elif fix == (len(data) - 1):

279

data = data[:fix]

280

reason = ''

281

fix = False

282

else:

283

reason = data[fix + 1:].strip()

284

data = data[:fix]

285

fix = False

286

287

misspellings[key] = Misspell(data, fix, reason)

288

289

def ishidden(filename):

290

bfilename = os.path.basename(filename)

291

292

if bfilename != '' and bfilename != '.' and bfilename != '..' \

293

and bfilename[0] == '.':

294

return True

295

296

return False

297

298

299

def istextfile(filename):

300

with open(filename, mode='rb') as f:

301

s = f.read(1024)

302

if 0 in s:

303

return False

304

305

return True

306

307

def fix_case(word, fixword):

308

if word == word.capitalize():

309

return fixword.capitalize()

310

elif word == word.upper():

311

return fixword.upper()

312

# they are both lower case

313

# or we don't have any idea

314

return fixword

315

316

def ask_for_word_fix(line, wrongword, misspelling, interactivity):

317

if interactivity <= 0:

318

return misspelling.fix, fix_case(wrongword, misspelling.data)

319

320

if misspelling.fix and interactivity & 1:

321

r = ''

322

fixword = fix_case(wrongword, misspelling.data)

323

while not r:

324

print("%s\t%s ==> %s (Y/n) " % (line, wrongword, fixword), end='')

325

r = sys.stdin.readline().strip().upper()

326

if not r: r = 'Y'

327

if r != 'Y' and r != 'N':

328

print("Say 'y' or 'n'")

329

r = ''

330

331

if r == 'N':

332

misspelling.fix = False

333

misspelling.fixword = ''

334

335

elif (interactivity & 2) and not misspelling.reason:

336

# if it is not disabled, i.e. it just has more than one possible fix,

337

# we ask the user which word to use

338

339

r = ''

340

opt = list(map(lambda x: x.strip(), misspelling.data.split(',')))

341

while not r:

342

print("%s Choose an option (blank for none): " % line, end='')

343

for i in range(len(opt)):

344

fixword = fix_case(wrongword, opt[i])

345

print(" %d) %s" % (i, fixword), end='')

346

print(": ", end='')

347

sys.stdout.flush()

348

349

n = sys.stdin.readline().strip()

350

if not n:

351

break

352

353

try:

354

n = int(n)

355

r = opt[n]

356

except (ValueError, IndexError):

357

print("Not a valid option\n")

358

359

if r:

360

misspelling.fix = True

361

misspelling.data = r

362

363

return misspelling.fix, fix_case(wrongword, misspelling.data)

364

365

def parse_file(filename, colors, summary):

366

lines = None

367

changed = False

368

global misspellings

369

global options

370

global encodings

371

global quiet_level

372

373

encoding = encodings[0] # if not defined, use UTF-8

374

375

if filename == '-':

376

f = sys.stdin

377

lines = f.readlines()

378

else:

379

# ignore binary files

380

if not istextfile(filename):

381

if not quiet_level & QuietLevels.BINARY_FILE:

382

print("WARNING: Binary file: %s " % filename, file=sys.stderr)

383

return

384

try:

385

lines, encoding = fileopener.open(filename)

386

except:

387

return

388

389

i = 1

390

rx = re.compile(r"[\w\-']+")

391

for line in lines:

392

if line in exclude_lines:

393

i += 1

394

continue

395

396

fixed_words = set()

397

asked_for = set()

398

399

for word in rx.findall(line):

400

lword = word.lower()

401

if lword in misspellings:

402

fix = misspellings[lword].fix

403

fixword = fix_case(word, misspellings[lword].data)

404

405

if options.interactive and not lword in asked_for:

406

fix, fixword = ask_for_word_fix(lines[i - 1], word,

407

misspellings[lword],

408

options.interactive)

409

asked_for.add(lword)

410

411

if summary and fix:

412

summary.update(lword)

413

414

if word in fixed_words:

415

continue

416

417

if options.write_changes and fix:

418

changed = True

419

lines[i - 1] = re.sub(r'\b%s\b' % word, fixword, lines[i - 1])

420

fixed_words.add(word)

421

continue

422

423

# otherwise warning was explicitly set by interactive mode

424

if options.interactive & 2 and not fix and not misspellings[lword].reason:

425

continue

426

427

cfilename = "%s%s%s" % (colors.FILE, filename, colors.DISABLE)

428

cline = "%s%d%s" % (colors.FILE, i, colors.DISABLE)

429

cwrongword = "%s%s%s" % (colors.WWORD, word, colors.DISABLE)

430

crightword = "%s%s%s" % (colors.FWORD, fixword, colors.DISABLE)

431

432

if misspellings[lword].reason:

433

if quiet_level & QuietLevels.DISABLED_FIXES:

434

continue

435

436

creason = " | %s%s%s" % (colors.FILE,

437

misspellings[lword].reason,

438

colors.DISABLE)

439

else:

440

if quiet_level & QuietLevels.NON_AUTOMATIC_FIXES:

441

continue

442

443

creason = ''

444

445

if filename != '-':

446

print("%(FILENAME)s:%(LINE)s: %(WRONGWORD)s " \

447

" ==> %(RIGHTWORD)s%(REASON)s"

448

% {'FILENAME': cfilename, 'LINE': cline,

449

'WRONGWORD': cwrongword,

450

'RIGHTWORD': crightword, 'REASON': creason })

451

else:

452

print('%(LINE)s: %(STRLINE)s\n\t%(WRONGWORD)s ' \

453

'==> %(RIGHTWORD)s%(REASON)s'

454

% { 'LINE': cline, 'STRLINE': line.strip(),

455

'WRONGWORD': cwrongword,

456

'RIGHTWORD': crightword, 'REASON': creason })

457

i += 1

458

459

if changed:

460

if filename == '-':

461

print("---")

462

for line in lines:

463

print(line, end='')

464

else:

465

if not quiet_level & QuietLevels.FIXES:

466

print("%sFIXED:%s %s" % (colors.FWORD, colors.DISABLE, filename),

467

file=sys.stderr)

468

f = open(filename, 'w', encoding=encoding)

469

f.writelines(lines)

470

f.close()

471

472

def main(*args):

473

global options

474

global quiet_level

475

global fileopener

476

477

(options, args) = parse_options(args)

478

479

build_dict(options.dictionary)

480

colors = TermColors();

481

if options.disable_colors:

482

colors.disable()

483

484

if options.summary:

485

summary = Summary()

486

else:

487

summary = None

488

489

if options.exclude_file:

490

build_exclude_hashes(options.exclude_file)

491

492

if options.quiet_level:

493

quiet_level = options.quiet_level

494

495

fileopener = FileOpener(options.hard_encoding_detection)

496

497

glob_match = GlobMatch(options.skip)

498

499

for filename in args:

500

# ignore hidden files

501

if ishidden(filename):

502

continue

503

504

if os.path.isdir(filename):

505

for root, dirs, files in os.walk(filename):

506

i = 0

507

for d in dirs:

508

if ishidden(d):

509

del dirs[i]

510

else:

511

i += 1

512

513

for file in files:

514

if os.path.islink(file):

515

continue

516

if glob_match.match(file):

517

continue

518

parse_file(os.path.join(root, file), colors, summary)

519

520

continue

521

522

parse_file(filename, colors, summary)

523

524

if summary:

525

print("\n-------8<-------\nSUMMARY:")

526

print(summary)

527

528

if __name__ == '__main__':

529

sys.exit(main(*sys.argv))