1
# -*- test-case-name: twisted.words.test.test_jabberxmppstringprep -*-
3
# Copyright (c) 2001-2005 Twisted Matrix Laboratories.
4
# See LICENSE for details.
7
from zope.interface import Interface, implements
9
if sys.version_info < (2,3,2):
13
dots = re.compile(u"[\u002E\u3002\uFF0E\uFF61]")
14
def nameprep(self, label):
21
warnings.warn("Accented and non-Western Jabber IDs will not be properly "
22
"case-folded with this version of Python, resulting in "
23
"incorrect protocol-level behavior. It is strongly "
24
"recommended you upgrade to Python 2.3.2 or newer if you "
25
"intend to use Twisted's Jabber support.")
30
from encodings import idna
36
class ILookupTable(Interface):
37
""" Interface for character lookup classes. """
40
""" Return whether character is in this table. """
42
class IMappingTable(Interface):
43
""" Interface for character mapping classes. """
46
""" Return mapping for character. """
48
class LookupTableFromFunction:
50
implements(ILookupTable)
52
def __init__(self, in_table_function):
53
self.lookup = in_table_function
57
implements(ILookupTable)
59
def __init__(self, table):
63
return c in self._table
65
class MappingTableFromFunction:
67
implements(IMappingTable)
69
def __init__(self, map_table_function):
70
self.map = map_table_function
72
class EmptyMappingTable:
74
implements(IMappingTable)
76
def __init__(self, in_table_function):
77
self._in_table_function = in_table_function
80
if self._in_table_function(c):
86
def __init__(self, mappings=[], normalize=True, prohibiteds=[],
87
check_unassigneds=True, check_bidi=True):
88
self.mappings = mappings
89
self.normalize = normalize
90
self.prohibiteds = prohibiteds
91
self.do_check_unassigneds = check_unassigneds
92
self.do_check_bidi = check_bidi
94
def prepare(self, string):
95
result = self.map(string)
97
result = unicodedata.normalize("NFKC", result)
98
self.check_prohibiteds(result)
99
if self.do_check_unassigneds:
100
self.check_unassigneds(result)
101
if self.do_check_bidi:
102
self.check_bidirectionals(result)
105
def map(self, string):
111
for mapping in self.mappings:
112
result_c = mapping.map(c)
116
if result_c is not None:
117
result.append(result_c)
119
return u"".join(result)
121
def check_prohibiteds(self, string):
123
for table in self.prohibiteds:
125
raise UnicodeError, "Invalid character %s" % repr(c)
127
def check_unassigneds(self, string):
129
if stringprep.in_table_a1(c):
130
raise UnicodeError, "Unassigned code point %s" % repr(c)
132
def check_bidirectionals(self, string):
134
found_RandALCat = False
137
if stringprep.in_table_d1(c):
138
found_RandALCat = True
139
if stringprep.in_table_d2(c):
142
if found_LCat and found_RandALCat:
143
raise UnicodeError, "Violation of BIDI Requirement 2"
145
if found_RandALCat and not (stringprep.in_table_d1(string[0]) and
146
stringprep.in_table_d1(string[-1])):
147
raise UnicodeError, "Violation of BIDI Requirement 3"
151
""" Implements preparation of internationalized domain names.
153
This class implements preparing internationalized domain names using the
154
rules defined in RFC 3491, section 4 (Conversion operations).
156
We do not perform step 4 since we deal with unicode representations of
157
domain names and do not convert from or to ASCII representations using
158
punycode encoding. When such a conversion is needed, the L{idna} standard
159
library provides the C{ToUnicode()} and C{ToASCII()} functions. Note that
160
L{idna} itself assumes UseSTD3ASCIIRules to be false.
162
The following steps are performed by C{prepare()}:
164
- Split the domain name in labels at the dots (RFC 3490, 3.1)
165
- Apply nameprep proper on each label (RFC 3491)
166
- Enforce the restrictions on ASCII characters in host names by
167
assuming STD3ASCIIRules to be true. (STD 3)
168
- Rejoin the labels using the label separator U+002E (full stop).
172
# Prohibited characters.
173
prohibiteds = [unichr(n) for n in range(0x00, 0x2c + 1) +
174
range(0x2e, 0x2f + 1) +
175
range(0x3a, 0x40 + 1) +
176
range(0x5b, 0x60 + 1) +
177
range(0x7b, 0x7f + 1) ]
179
def prepare(self, string):
182
labels = idna.dots.split(string)
184
if labels and len(labels[-1]) == 0:
191
result.append(self.nameprep(label))
193
return ".".join(result) + trailing_dot
195
def check_prohibiteds(self, string):
197
if c in self.prohibiteds:
198
raise UnicodeError, "Invalid character %s" % repr(c)
200
def nameprep(self, label):
201
label = idna.nameprep(label)
202
self.check_prohibiteds(label)
204
raise UnicodeError, "Invalid leading hyphen-minus"
206
raise UnicodeError, "Invalid trailing hyphen-minus"
210
case_map = MappingTableFromFunction(lambda c: c.lower())
211
nodeprep = Profile(mappings=[case_map],
213
prohibiteds=[LookupTable([u' ', u'"', u'&', u"'", u'/',
214
u':', u'<', u'>', u'@'])],
215
check_unassigneds=False,
218
resourceprep = Profile(normalize=False,
219
check_unassigneds=False,
223
C_11 = LookupTableFromFunction(stringprep.in_table_c11)
224
C_12 = LookupTableFromFunction(stringprep.in_table_c12)
225
C_21 = LookupTableFromFunction(stringprep.in_table_c21)
226
C_22 = LookupTableFromFunction(stringprep.in_table_c22)
227
C_3 = LookupTableFromFunction(stringprep.in_table_c3)
228
C_4 = LookupTableFromFunction(stringprep.in_table_c4)
229
C_5 = LookupTableFromFunction(stringprep.in_table_c5)
230
C_6 = LookupTableFromFunction(stringprep.in_table_c6)
231
C_7 = LookupTableFromFunction(stringprep.in_table_c7)
232
C_8 = LookupTableFromFunction(stringprep.in_table_c8)
233
C_9 = LookupTableFromFunction(stringprep.in_table_c9)
235
B_1 = EmptyMappingTable(stringprep.in_table_b1)
236
B_2 = MappingTableFromFunction(stringprep.map_table_b2)
238
nodeprep = Profile(mappings=[B_1, B_2],
239
prohibiteds=[C_11, C_12, C_21, C_22,
240
C_3, C_4, C_5, C_6, C_7, C_8, C_9,
241
LookupTable([u'"', u'&', u"'", u'/',
242
u':', u'<', u'>', u'@'])])
244
resourceprep = Profile(mappings=[B_1,],
245
prohibiteds=[C_12, C_21, C_22,
246
C_3, C_4, C_5, C_6, C_7, C_8, C_9])
248
nameprep = NamePrep()