1
""" Standard "encodings" Package
3
Standard Python encoding modules are stored in this package
6
Codec modules must have names corresponding to normalized encoding
7
names as defined in the normalize_encoding() function below, e.g.
8
'utf-8' must be implemented by the module 'utf_8.py'.
10
Each codec module must export the following interface:
12
* getregentry() -> codecs.CodecInfo object
13
The getregentry() API must a CodecInfo object with encoder, decoder,
14
incrementalencoder, incrementaldecoder, streamwriter and streamreader
15
atttributes which adhere to the Python Codec Interface Standard.
17
In addition, a module may optionally also define the following
18
APIs which are then used by the package's codec search function:
20
* getaliases() -> sequence of encoding name strings to use as aliases
22
Alias names returned by getaliases() must be normalized encoding
23
names as defined by normalize_encoding().
25
Written by Marc-Andre Lemburg (mal@lemburg.com).
27
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
35
_unknown = '--unknown--'
37
_aliases = aliases.aliases
39
class CodecRegistryError(LookupError, SystemError):
42
def normalize_encoding(encoding):
44
""" Normalize an encoding name.
46
Normalization works as follows: all non-alphanumeric
47
characters except the dot used for Python package names are
48
collapsed and replaced with a single underscore, e.g. ' -;#'
49
becomes '_'. Leading and trailing underscores are removed.
51
Note that encoding names should be ASCII only; if they do use
52
non-ASCII characters, these must be Latin-1 compatible.
55
if isinstance(encoding, bytes):
56
encoding = str(encoding, "ascii")
60
if c.isalnum() or c == '.':
69
def search_function(encoding):
72
entry = _cache.get(encoding, _unknown)
73
if entry is not _unknown:
78
# First try to find an alias for the normalized encoding
79
# name and lookup the module using the aliased name, then try to
80
# lookup the module using the standard import scheme, i.e. first
81
# try in the encodings package, then at top-level.
83
norm_encoding = normalize_encoding(encoding)
84
aliased_encoding = _aliases.get(norm_encoding) or \
85
_aliases.get(norm_encoding.replace('.', '_'))
86
if aliased_encoding is not None:
87
modnames = [aliased_encoding,
90
modnames = [norm_encoding]
91
for modname in modnames:
92
if not modname or '.' in modname:
95
# Import is absolute to prevent the possibly malicious import of a
96
# module with side-effects that is not in the 'encodings' package.
97
mod = __import__('encodings.' + modname, fromlist=_import_tail,
107
getregentry = mod.getregentry
108
except AttributeError:
114
_cache[encoding] = None
117
# Now ask the module for the registry entry
118
entry = getregentry()
119
if not isinstance(entry, codecs.CodecInfo):
120
if not 4 <= len(entry) <= 7:
121
raise CodecRegistryError('module "%s" (%s) failed to register'
122
% (mod.__name__, mod.__file__))
123
if not hasattr(entry[0], '__call__') or \
124
not hasattr(entry[1], '__call__') or \
125
(entry[2] is not None and not hasattr(entry[2], '__call__')) or \
126
(entry[3] is not None and not hasattr(entry[3], '__call__')) or \
127
(len(entry) > 4 and entry[4] is not None and not hasattr(entry[4], '__call__')) or \
128
(len(entry) > 5 and entry[5] is not None and not hasattr(entry[5], '__call__')):
129
raise CodecRegistryError('incompatible codecs in module "%s" (%s)'
130
% (mod.__name__, mod.__file__))
131
if len(entry)<7 or entry[6] is None:
132
entry += (None,)*(6-len(entry)) + (mod.__name__.split(".", 1)[1],)
133
entry = codecs.CodecInfo(*entry)
135
# Cache the codec registry entry
136
_cache[encoding] = entry
138
# Register its aliases (without overwriting previously registered
141
codecaliases = mod.getaliases()
142
except AttributeError:
145
for alias in codecaliases:
146
if alias not in _aliases:
147
_aliases[alias] = modname
149
# Return the registry entry
152
# Register the search_function in the Python codec registry
153
codecs.register(search_function)