1
from cherrypy.test import test
2
test.prefer_parent_path()
6
from httplib import IncompleteRead
8
europoundUnicode = u'\x80\xa3'
9
europoundUtf8 = u'\x80\xa3'.encode('utf-8')
10
sing = u"\u6bdb\u6cfd\u4e1c: Sing, Little Birdie?"
6
from cherrypy._cpcompat import BytesIO, IncompleteRead, ntob, ntou
8
europoundUnicode = ntou('\x80\xa3')
9
sing = ntou("\u6bdb\u6cfd\u4e1c: Sing, Little Birdie?", 'escape')
11
10
sing8 = sing.encode('utf-8')
12
11
sing16 = sing.encode('utf-16')
17
def index(self, param):
18
assert param == europoundUnicode
19
yield europoundUnicode
24
mao_zedong.exposed = True
29
utf8._cp_config = {'tools.encode.encoding': 'utf-8'}
31
def reqparams(self, *args, **kwargs):
32
return repr(cherrypy.request.params)
33
reqparams.exposed = True
41
# Test for ticket #147, where yield showed no exceptions (content-
42
# encoding was still gzip even though traceback wasn't zipped).
44
yield "Here be dragons"
47
def noshow_stream(self):
48
# Test for ticket #147, where yield showed no exceptions (content-
49
# encoding was still gzip even though traceback wasn't zipped).
51
yield "Here be dragons"
52
noshow_stream.exposed = True
53
noshow_stream._cp_config = {'response.stream': True}
55
cherrypy.config.update({
56
'environment': 'test_suite',
57
'tools.encode.on': True,
58
'tools.decode.on': True,
63
cherrypy.tree.mount(root, config={'/gzip': {'tools.gzip.on': True}})
67
14
from cherrypy.test import helper
70
17
class EncodingTests(helper.CPWebCase):
72
def testDecoding(self):
21
def index(self, param):
22
assert param == europoundUnicode, "%r != %r" % (param, europoundUnicode)
23
yield europoundUnicode
28
mao_zedong.exposed = True
33
utf8._cp_config = {'tools.encode.encoding': 'utf-8'}
35
def cookies_and_headers(self):
36
# if the headers have non-ascii characters and a cookie has
37
# any part which is unicode (even ascii), the response
39
cherrypy.response.cookie['candy'] = 'bar'
40
cherrypy.response.cookie['candy']['domain'] = 'cherrypy.org'
41
cherrypy.response.headers['Some-Header'] = 'My d\xc3\xb6g has fleas'
43
cookies_and_headers.exposed = True
45
def reqparams(self, *args, **kwargs):
46
return ntob(', ').join([": ".join((k, v)).encode('utf8')
47
for k, v in cherrypy.request.params.items()])
48
reqparams.exposed = True
50
def nontext(self, *args, **kwargs):
51
cherrypy.response.headers['Content-Type'] = 'application/binary'
52
return '\x00\x01\x02\x03'
53
nontext.exposed = True
54
nontext._cp_config = {'tools.encode.text_only': False,
55
'tools.encode.add_charset': True,
64
# Test for ticket #147, where yield showed no exceptions (content-
65
# encoding was still gzip even though traceback wasn't zipped).
67
yield "Here be dragons"
69
# Turn encoding off so the gzip tool is the one doing the collapse.
70
noshow._cp_config = {'tools.encode.on': False}
72
def noshow_stream(self):
73
# Test for ticket #147, where yield showed no exceptions (content-
74
# encoding was still gzip even though traceback wasn't zipped).
76
yield "Here be dragons"
77
noshow_stream.exposed = True
78
noshow_stream._cp_config = {'response.stream': True}
81
def extra_charset(self, *args, **kwargs):
82
return ', '.join([": ".join((k, v))
83
for k, v in cherrypy.request.params.items()])
84
extra_charset.exposed = True
85
extra_charset._cp_config = {
86
'tools.decode.on': True,
87
'tools.decode.default_encoding': ['utf-16'],
90
def force_charset(self, *args, **kwargs):
91
return ', '.join([": ".join((k, v))
92
for k, v in cherrypy.request.params.items()])
93
force_charset.exposed = True
94
force_charset._cp_config = {
95
'tools.decode.on': True,
96
'tools.decode.encoding': 'utf-16',
101
root.decode = Decode()
102
cherrypy.tree.mount(root, config={'/gzip': {'tools.gzip.on': True}})
103
setup_server = staticmethod(setup_server)
105
def test_query_string_decoding(self):
73
106
europoundUtf8 = europoundUnicode.encode('utf-8')
74
self.getPage('/?param=%s' % europoundUtf8)
107
self.getPage(ntob('/?param=') + europoundUtf8)
75
108
self.assertBody(europoundUtf8)
77
# Make sure that encoded utf8 gets parsed correctly
110
# Encoded utf8 query strings MUST be parsed correctly.
111
# Here, q is the POUND SIGN U+00A3 encoded in utf8 and then %HEX
78
112
self.getPage("/reqparams?q=%C2%A3")
79
self.assertBody(r"{'q': u'\xa3'}")
113
# The return value will be encoded as utf8.
114
self.assertBody(ntob("q: \xc2\xa3"))
116
# Query strings that are incorrectly encoded MUST raise 404.
117
# Here, q is the POUND SIGN U+00A3 encoded in latin1 and then %HEX
118
self.getPage("/reqparams?q=%A3")
119
self.assertStatus(404)
120
self.assertErrorPage(404,
121
"The given query string could not be processed. Query "
122
"strings for this resource must be encoded with 'utf8'.")
124
def test_urlencoded_decoding(self):
125
# Test the decoding of an application/x-www-form-urlencoded entity.
126
europoundUtf8 = europoundUnicode.encode('utf-8')
127
body=ntob("param=") + europoundUtf8
128
self.getPage('/', method='POST',
129
headers=[("Content-Type", "application/x-www-form-urlencoded"),
130
("Content-Length", str(len(body))),
133
self.assertBody(europoundUtf8)
135
# Encoded utf8 entities MUST be parsed and decoded correctly.
136
# Here, q is the POUND SIGN U+00A3 encoded in utf8
137
body = ntob("q=\xc2\xa3")
138
self.getPage('/reqparams', method='POST',
139
headers=[("Content-Type", "application/x-www-form-urlencoded"),
140
("Content-Length", str(len(body))),
143
self.assertBody(ntob("q: \xc2\xa3"))
145
# ...and in utf16, which is not in the default attempt_charsets list:
146
body = ntob("\xff\xfeq\x00=\xff\xfe\xa3\x00")
147
self.getPage('/reqparams', method='POST',
148
headers=[("Content-Type", "application/x-www-form-urlencoded;charset=utf-16"),
149
("Content-Length", str(len(body))),
152
self.assertBody(ntob("q: \xc2\xa3"))
154
# Entities that are incorrectly encoded MUST raise 400.
155
# Here, q is the POUND SIGN U+00A3 encoded in utf16, but
156
# the Content-Type incorrectly labels it utf-8.
157
body = ntob("\xff\xfeq\x00=\xff\xfe\xa3\x00")
158
self.getPage('/reqparams', method='POST',
159
headers=[("Content-Type", "application/x-www-form-urlencoded;charset=utf-8"),
160
("Content-Length", str(len(body))),
163
self.assertStatus(400)
164
self.assertErrorPage(400,
165
"The request entity could not be decoded. The following charsets "
166
"were attempted: ['utf-8']")
168
def test_decode_tool(self):
169
# An extra charset should be tried first, and succeed if it matches.
170
# Here, we add utf-16 as a charset and pass a utf-16 body.
171
body = ntob("\xff\xfeq\x00=\xff\xfe\xa3\x00")
172
self.getPage('/decode/extra_charset', method='POST',
173
headers=[("Content-Type", "application/x-www-form-urlencoded"),
174
("Content-Length", str(len(body))),
177
self.assertBody(ntob("q: \xc2\xa3"))
179
# An extra charset should be tried first, and continue to other default
180
# charsets if it doesn't match.
181
# Here, we add utf-16 as a charset but still pass a utf-8 body.
182
body = ntob("q=\xc2\xa3")
183
self.getPage('/decode/extra_charset', method='POST',
184
headers=[("Content-Type", "application/x-www-form-urlencoded"),
185
("Content-Length", str(len(body))),
188
self.assertBody(ntob("q: \xc2\xa3"))
190
# An extra charset should error if force is True and it doesn't match.
191
# Here, we force utf-16 as a charset but still pass a utf-8 body.
192
body = ntob("q=\xc2\xa3")
193
self.getPage('/decode/force_charset', method='POST',
194
headers=[("Content-Type", "application/x-www-form-urlencoded"),
195
("Content-Length", str(len(body))),
198
self.assertErrorPage(400,
199
"The request entity could not be decoded. The following charsets "
200
"were attempted: ['utf-16']")
202
def test_multipart_decoding(self):
203
# Test the decoding of a multipart entity when the charset (utf16) is
205
body=ntob('\r\n'.join(['--X',
206
'Content-Type: text/plain;charset=utf-16',
207
'Content-Disposition: form-data; name="text"',
209
'\xff\xfea\x00b\x00\x1c c\x00',
211
'Content-Type: text/plain;charset=utf-16',
212
'Content-Disposition: form-data; name="submit"',
214
'\xff\xfeC\x00r\x00e\x00a\x00t\x00e\x00',
216
self.getPage('/reqparams', method='POST',
217
headers=[("Content-Type", "multipart/form-data;boundary=X"),
218
("Content-Length", str(len(body))),
221
self.assertBody(ntob("text: ab\xe2\x80\x9cc, submit: Create"))
223
def test_multipart_decoding_no_charset(self):
224
# Test the decoding of a multipart entity when the charset (utf8) is
225
# NOT explicitly given, but is in the list of charsets to attempt.
226
body=ntob('\r\n'.join(['--X',
227
'Content-Disposition: form-data; name="text"',
231
'Content-Disposition: form-data; name="submit"',
235
self.getPage('/reqparams', method='POST',
236
headers=[("Content-Type", "multipart/form-data;boundary=X"),
237
("Content-Length", str(len(body))),
240
self.assertBody(ntob("text: \xe2\x80\x9c, submit: Create"))
242
def test_multipart_decoding_no_successful_charset(self):
243
# Test the decoding of a multipart entity when the charset (utf16) is
244
# NOT explicitly given, and is NOT in the list of charsets to attempt.
245
body=ntob('\r\n'.join(['--X',
246
'Content-Disposition: form-data; name="text"',
248
'\xff\xfea\x00b\x00\x1c c\x00',
250
'Content-Disposition: form-data; name="submit"',
252
'\xff\xfeC\x00r\x00e\x00a\x00t\x00e\x00',
254
self.getPage('/reqparams', method='POST',
255
headers=[("Content-Type", "multipart/form-data;boundary=X"),
256
("Content-Length", str(len(body))),
259
self.assertStatus(400)
260
self.assertErrorPage(400,
261
"The request entity could not be decoded. The following charsets "
262
"were attempted: ['us-ascii', 'utf-8']")
264
def test_nontext(self):
265
self.getPage('/nontext')
266
self.assertHeader('Content-Type', 'application/binary;charset=utf-8')
267
self.assertBody('\x00\x01\x02\x03')
81
269
def testEncoding(self):
82
270
# Default encoding should be utf-8