1
""" Codec for the Punicode encoding, as specified in RFC 3492
3
Written by Martin v. Löwis.
8
##################### Encoding #####################################
11
"""3.1 Basic code point segregation"""
19
extended = sorted(extended)
20
return bytes(base), extended
22
def selective_len(str, max):
23
"""Return the length of str, considering only characters below max."""
30
def selective_find(str, char, index, pos):
31
"""Return a pair (index, pos), indicating the next occurrence of
32
char in str. index is the position of the character considering
33
only ordinals up to and including char, and pos is the position in
34
the full string. index/pos is the starting position in the full
48
def insertion_unsort(str, extended):
49
"""3.2 Insertion unsort coding"""
56
curlen = selective_len(str, char)
57
delta = (curlen+1) * (char - oldchar)
59
index,pos = selective_find(str,c,index,pos)
62
delta += index - oldindex
63
result.append(delta-1)
71
# Punycode parameters: tmin = 1, tmax = 26, base = 36
72
res = 36 * (j + 1) - bias
74
if res > 26: return 26
77
digits = b"abcdefghijklmnopqrstuvwxyz0123456789"
78
def generate_generalized_integer(N, bias):
79
"""3.3 Generalized variable-length integers"""
85
result.append(digits[N])
87
result.append(digits[t + ((N - t) % (36 - t))])
88
N = (N - t) // (36 - t)
91
def adapt(delta, first, numchars):
96
delta += delta // numchars
97
# ((base - tmin) * tmax) // 2 == 455
100
delta = delta // 35 # base - tmin
102
bias = divisions + (36 * delta // (delta + 38))
106
def generate_integers(baselen, deltas):
107
"""3.4 Bias adaptation"""
108
# Punycode parameters: initial bias = 72, damp = 700, skew = 38
111
for points, delta in enumerate(deltas):
112
s = generate_generalized_integer(delta, bias)
114
bias = adapt(delta, points==0, baselen+points+1)
117
def punycode_encode(text):
118
base, extended = segregate(text)
119
deltas = insertion_unsort(text, extended)
120
extended = generate_integers(len(base), deltas)
122
return base + b"-" + extended
125
##################### Decoding #####################################
127
def decode_generalized_number(extended, extpos, bias, errors):
128
"""3.3 Generalized variable-length integers"""
134
char = ord(extended[extpos])
136
if errors == "strict":
137
raise UnicodeError("incomplete punicode string")
138
return extpos + 1, None
140
if 0x41 <= char <= 0x5A: # A-Z
142
elif 0x30 <= char <= 0x39:
143
digit = char - 22 # 0x30-26
144
elif errors == "strict":
145
raise UnicodeError("Invalid extended code point '%s'"
152
return extpos, result
157
def insertion_sort(base, extended, errors):
158
"""3.2 Insertion unsort coding"""
163
while extpos < len(extended):
164
newpos, delta = decode_generalized_number(extended, extpos,
167
# There was an error in decoding. We can't continue because
168
# synchronization is lost.
171
char += pos // (len(base) + 1)
173
if errors == "strict":
174
raise UnicodeError("Invalid character U+%x" % char)
176
pos = pos % (len(base) + 1)
177
base = base[:pos] + chr(char) + base[pos:]
178
bias = adapt(delta, (extpos == 0), len(base))
182
def punycode_decode(text, errors):
183
if isinstance(text, str):
184
text = text.encode("ascii")
185
if isinstance(text, memoryview):
187
pos = text.rfind(b"-")
190
extended = str(text, "ascii").upper()
192
base = str(text[:pos], "ascii", errors)
193
extended = str(text[pos+1:], "ascii").upper()
194
return insertion_sort(base, extended, errors)
198
class Codec(codecs.Codec):
200
def encode(self, input, errors='strict'):
201
res = punycode_encode(input)
202
return res, len(input)
204
def decode(self, input, errors='strict'):
205
if errors not in ('strict', 'replace', 'ignore'):
206
raise UnicodeError("Unsupported error handling "+errors)
207
res = punycode_decode(input, errors)
208
return res, len(input)
210
class IncrementalEncoder(codecs.IncrementalEncoder):
211
def encode(self, input, final=False):
212
return punycode_encode(input)
214
class IncrementalDecoder(codecs.IncrementalDecoder):
215
def decode(self, input, final=False):
216
if self.errors not in ('strict', 'replace', 'ignore'):
217
raise UnicodeError("Unsupported error handling "+self.errors)
218
return punycode_decode(input, self.errors)
220
class StreamWriter(Codec,codecs.StreamWriter):
223
class StreamReader(Codec,codecs.StreamReader):
226
### encodings module API
229
return codecs.CodecInfo(
231
encode=Codec().encode,
232
decode=Codec().decode,
233
incrementalencoder=IncrementalEncoder,
234
incrementaldecoder=IncrementalDecoder,
235
streamwriter=StreamWriter,
236
streamreader=StreamReader,