~mfo/serverguide/ibft : contents of scripts/ascii

~mfo/serverguide/ibft : (revision 372)
#! /usr/bin/env python3
#
# ascii_checker.py 2016.03.22:
#	Originaly by Byte Commander at the request of Doug Smythies
#	http://askubuntu.com/users/367990/byte-commander
#
#	Used for checking source code for non-standard characters, which
#	can lead to troubles.
#	For checking that the master C xml files are ascii only do:
#	scripts/scripts/ascii_checker.py --only-matching --encoding ascii serverguide/C/*.xml
#	alternatively, the no script method is:
#	LANG=C grep -P -H -n "[\x80-\xFF]" serverguide/C/*.xml
#	For checking that all the files, translated ones included, are utf-8 only do:
#	scripts/ascii_checker.py --only-matching --encoding utf-8 serverguide/*/*.xml
#	For more information do:
#	scripts/ascii_checker.py --help
#
#	To Do: Some groups of space characters could be replaced by tabs.
#
import sys
import shutil
import argparse

argparser = argparse.ArgumentParser(description="Show all lines of a FILE "
            "containing characters that don't match the selected ENCODING.")
argparser.add_argument("files", action="store", nargs="+", metavar="FILE",
                       help="the file to be examined")
argparser.add_argument("-e", "--encoding", action="store", default="ascii",
                       dest="codec", metavar="ENCODING",
                       help="file encoding to test (default 'ascii')")
out_group = argparser.add_mutually_exclusive_group()
out_group.add_argument("-s", "--summary", action="store_const", const="summary",
                       dest="output", help="only print the summary")
out_group.add_argument("-c", "--count", action="store_const", const="count",
                       dest="output", help="only print the detected line count")
out_group.add_argument("-l", "--lines", action="store_const", const="lines",
                       dest="output", help="only print the detected lines")
argparser.add_argument("-m", "--only-matching", action="store_true",
                       default=False, dest="only_matching",
                       help="hide files without matching lines from output")
argparser.add_argument("-w", "--no-warnings", action="store_false", default=True,
                       dest="warn", help="hide warnings from output")
argparser.add_argument("-n", "--no-numbers", action="store_false", default=True,
                       dest="lnum", help="do not show line numbers in output")
argparser.add_argument("-f", "--fit-width", action="store", default=-1,
                       type=int, dest="fit_width", metavar="N",
                       help="trim lines to N characters, or terminal width if "
                       "N=0; non-printable characters like tabs will be removed")
argparser.add_argument("-t", "--title", action="store_true", default=False,
                       dest="title", help="print title line above each file")
argparser.set_defaults(output="full")
args = argparser.parse_args()

max_width = args.fit_width if args.fit_width > 0 \
                else None if args.fit_width < 0 \
                else shutil.get_terminal_size().columns

def process_file(f):
    try:
        with open(f, mode="rb") as file:
            lines = file.readlines()
    except FileNotFoundError:
        if args.warn:
            print("WARNING! The file '{}' does not exist.".format(f),
                  file=sys.stderr)
        return
    except IsADirectoryError:
        if args.warn:
            print("WARNING! '{}' is a directory. Skipping it."
                  .format(f), file=sys.stderr)
        return
    except PermissionError:
        if args.warn:
            print("WARNING! No permission to read '{}'."
                  .format(f), file=sys.stderr)
        return

    if args.title:
        if f != args.files[0]:
            print()
        print("*" * 20, f, "*" * 20)

    counter = 0
    for n, line in enumerate(lines, 1):
        try:
            l = line.decode(args.codec, "strict")
        except UnicodeDecodeError:
            if args.output in ("full", "lines"):
                output = line.decode(args.codec, "replace")
                if args.lnum:
                    output = "{:>6d}: {}".format(n, output)
                if max_width:
                    output = "".join(c for c in output if c.isprintable())
                    print(output[:max_width])
                else:
                    print(output)
            counter += 1
        except LookupError:
            print("ERROR! The encoding '{}' is unfortunately not available."
                   .format(args.codec), file=sys.stderr)
            exit(2)

    if not args.only_matching or counter > 0:
        if args.output in ("full", "summary", "quiet"):
            if args.output == "full" and counter > 0:
                print("\n" + "-" * 80)
            print("{} lines in '{}', thereof {} lines with non-{} characters."
                   .format(len(lines), f, counter, args.codec.upper()))
            if args.output == "full" and counter > 0:
                print()
        elif args.output == "count":
            print(counter)

try:
    for f in args.files:
        process_file(f)
except KeyboardInterrupt:
    print("<<< Aborted by user! >>>", file=sys.stderr)