6
from argparse import ArgumentParser, FileType
8
VALID_XML_CHARS = frozenset([0x9, 0xA, 0xD] +
9
list(range(0x20, 0xD7FF)) +
10
list(range(0xE000, 0xFFFD)) +
11
list(range(0x10000, 0x10FFFF)))
14
def is_valid_xml_char(ch):
15
# Is this character valid in XML?
16
# http://www.w3.org/TR/xml/#charsets
17
return ord(ch) in VALID_XML_CHARS
21
parser = ArgumentParser("Receives as input some text and outputs "
22
"the same text without characters which are "
23
"not valid in the XML specification.")
24
parser.add_argument('input_file',
27
help='The name of the file to sanitize.')
28
args = parser.parse_args()
31
text = ''.join([c for c in args.input_file.read() if
32
is_valid_xml_char(c)])
35
with io.TextIOWrapper(
36
sys.stdin.buffer, encoding='UTF-8', errors="ignore") as stdin:
37
text = ''.join([c for c in stdin.read() if is_valid_xml_char(c)])
41
if __name__ == "__main__":
44
except Exception as err:
45
if err.errno != errno.EPIPE: