1
# -*- coding: utf-8 -*-
3
# Copyright (C) 2006 Edgewall Software
6
# This software is licensed as described in the file COPYING, which
7
# you should have received as part of this distribution. The terms
8
# are also available at http://genshi.edgewall.org/wiki/License.
10
# This software consists of voluntary contributions made by many
11
# individuals. For the exact contribution history, see the revision
12
# history and logs, available at http://genshi.edgewall.org/log/.
17
from genshi.input import HTML, ParseError
18
from genshi.filters.html import HTMLFormFiller, HTMLSanitizer
21
class HTMLFormFillerTestCase(unittest.TestCase):
23
def test_fill_input_text_no_value(self):
24
html = HTML("""<form><p>
25
<input type="text" name="foo" />
26
</p></form>""") | HTMLFormFiller()
27
self.assertEquals("""<form><p>
28
<input type="text" name="foo"/>
29
</p></form>""", unicode(html))
31
def test_fill_input_text_single_value(self):
32
html = HTML("""<form><p>
33
<input type="text" name="foo" />
34
</p></form>""") | HTMLFormFiller(data={'foo': 'bar'})
35
self.assertEquals("""<form><p>
36
<input type="text" name="foo" value="bar"/>
37
</p></form>""", unicode(html))
39
def test_fill_input_text_multi_value(self):
40
html = HTML("""<form><p>
41
<input type="text" name="foo" />
42
</p></form>""") | HTMLFormFiller(data={'foo': ['bar']})
43
self.assertEquals("""<form><p>
44
<input type="text" name="foo" value="bar"/>
45
</p></form>""", unicode(html))
47
def test_fill_input_hidden_no_value(self):
48
html = HTML("""<form><p>
49
<input type="hidden" name="foo" />
50
</p></form>""") | HTMLFormFiller()
51
self.assertEquals("""<form><p>
52
<input type="hidden" name="foo"/>
53
</p></form>""", unicode(html))
55
def test_fill_input_hidden_single_value(self):
56
html = HTML("""<form><p>
57
<input type="hidden" name="foo" />
58
</p></form>""") | HTMLFormFiller(data={'foo': 'bar'})
59
self.assertEquals("""<form><p>
60
<input type="hidden" name="foo" value="bar"/>
61
</p></form>""", unicode(html))
63
def test_fill_input_hidden_multi_value(self):
64
html = HTML("""<form><p>
65
<input type="hidden" name="foo" />
66
</p></form>""") | HTMLFormFiller(data={'foo': ['bar']})
67
self.assertEquals("""<form><p>
68
<input type="hidden" name="foo" value="bar"/>
69
</p></form>""", unicode(html))
71
def test_fill_textarea_no_value(self):
72
html = HTML("""<form><p>
73
<textarea name="foo"></textarea>
74
</p></form>""") | HTMLFormFiller()
75
self.assertEquals("""<form><p>
76
<textarea name="foo"/>
77
</p></form>""", unicode(html))
79
def test_fill_textarea_single_value(self):
80
html = HTML("""<form><p>
81
<textarea name="foo"></textarea>
82
</p></form>""") | HTMLFormFiller(data={'foo': 'bar'})
83
self.assertEquals("""<form><p>
84
<textarea name="foo">bar</textarea>
85
</p></form>""", unicode(html))
87
def test_fill_textarea_multi_value(self):
88
html = HTML("""<form><p>
89
<textarea name="foo"></textarea>
90
</p></form>""") | HTMLFormFiller(data={'foo': ['bar']})
91
self.assertEquals("""<form><p>
92
<textarea name="foo">bar</textarea>
93
</p></form>""", unicode(html))
95
def test_fill_input_checkbox_no_value(self):
96
html = HTML("""<form><p>
97
<input type="checkbox" name="foo" />
98
</p></form>""") | HTMLFormFiller()
99
self.assertEquals("""<form><p>
100
<input type="checkbox" name="foo"/>
101
</p></form>""", unicode(html))
103
def test_fill_input_checkbox_single_value_auto(self):
104
html = HTML("""<form><p>
105
<input type="checkbox" name="foo" />
107
self.assertEquals("""<form><p>
108
<input type="checkbox" name="foo"/>
109
</p></form>""", unicode(html | HTMLFormFiller(data={'foo': ''})))
110
self.assertEquals("""<form><p>
111
<input type="checkbox" name="foo" checked="checked"/>
112
</p></form>""", unicode(html | HTMLFormFiller(data={'foo': 'on'})))
114
def test_fill_input_checkbox_single_value_defined(self):
115
html = HTML("""<form><p>
116
<input type="checkbox" name="foo" value="1" />
118
self.assertEquals("""<form><p>
119
<input type="checkbox" name="foo" value="1" checked="checked"/>
120
</p></form>""", unicode(html | HTMLFormFiller(data={'foo': '1'})))
121
self.assertEquals("""<form><p>
122
<input type="checkbox" name="foo" value="1"/>
123
</p></form>""", unicode(html | HTMLFormFiller(data={'foo': '2'})))
125
def test_fill_input_checkbox_multi_value_auto(self):
126
html = HTML("""<form><p>
127
<input type="checkbox" name="foo" />
129
self.assertEquals("""<form><p>
130
<input type="checkbox" name="foo"/>
131
</p></form>""", unicode(html | HTMLFormFiller(data={'foo': []})))
132
self.assertEquals("""<form><p>
133
<input type="checkbox" name="foo" checked="checked"/>
134
</p></form>""", unicode(html | HTMLFormFiller(data={'foo': ['on']})))
136
def test_fill_input_checkbox_multi_value_defined(self):
137
html = HTML("""<form><p>
138
<input type="checkbox" name="foo" value="1" />
140
self.assertEquals("""<form><p>
141
<input type="checkbox" name="foo" value="1" checked="checked"/>
142
</p></form>""", unicode(html | HTMLFormFiller(data={'foo': ['1']})))
143
self.assertEquals("""<form><p>
144
<input type="checkbox" name="foo" value="1"/>
145
</p></form>""", unicode(html | HTMLFormFiller(data={'foo': ['2']})))
147
def test_fill_input_radio_no_value(self):
148
html = HTML("""<form><p>
149
<input type="radio" name="foo" />
150
</p></form>""") | HTMLFormFiller()
151
self.assertEquals("""<form><p>
152
<input type="radio" name="foo"/>
153
</p></form>""", unicode(html))
155
def test_fill_input_radio_single_value(self):
156
html = HTML("""<form><p>
157
<input type="radio" name="foo" value="1" />
159
self.assertEquals("""<form><p>
160
<input type="radio" name="foo" value="1" checked="checked"/>
161
</p></form>""", unicode(html | HTMLFormFiller(data={'foo': '1'})))
162
self.assertEquals("""<form><p>
163
<input type="radio" name="foo" value="1"/>
164
</p></form>""", unicode(html | HTMLFormFiller(data={'foo': '2'})))
166
def test_fill_input_radio_multi_value(self):
167
html = HTML("""<form><p>
168
<input type="radio" name="foo" value="1" />
170
self.assertEquals("""<form><p>
171
<input type="radio" name="foo" value="1" checked="checked"/>
172
</p></form>""", unicode(html | HTMLFormFiller(data={'foo': ['1']})))
173
self.assertEquals("""<form><p>
174
<input type="radio" name="foo" value="1"/>
175
</p></form>""", unicode(html | HTMLFormFiller(data={'foo': ['2']})))
177
def test_fill_select_no_value_auto(self):
178
html = HTML("""<form><p>
184
</p></form>""") | HTMLFormFiller()
185
self.assertEquals("""<form><p>
191
</p></form>""", unicode(html))
193
def test_fill_select_no_value_defined(self):
194
html = HTML("""<form><p>
196
<option value="1">1</option>
197
<option value="2">2</option>
198
<option value="3">3</option>
200
</p></form>""") | HTMLFormFiller()
201
self.assertEquals("""<form><p>
203
<option value="1">1</option>
204
<option value="2">2</option>
205
<option value="3">3</option>
207
</p></form>""", unicode(html))
209
def test_fill_select_single_value_auto(self):
210
html = HTML("""<form><p>
216
</p></form>""") | HTMLFormFiller(data={'foo': '1'})
217
self.assertEquals("""<form><p>
219
<option selected="selected">1</option>
223
</p></form>""", unicode(html))
225
def test_fill_select_single_value_defined(self):
226
html = HTML("""<form><p>
228
<option value="1">1</option>
229
<option value="2">2</option>
230
<option value="3">3</option>
232
</p></form>""") | HTMLFormFiller(data={'foo': '1'})
233
self.assertEquals("""<form><p>
235
<option value="1" selected="selected">1</option>
236
<option value="2">2</option>
237
<option value="3">3</option>
239
</p></form>""", unicode(html))
241
def test_fill_select_multi_value_auto(self):
242
html = HTML("""<form><p>
243
<select name="foo" multiple>
248
</p></form>""") | HTMLFormFiller(data={'foo': ['1', '3']})
249
self.assertEquals("""<form><p>
250
<select name="foo" multiple="multiple">
251
<option selected="selected">1</option>
253
<option selected="selected">3</option>
255
</p></form>""", unicode(html))
257
def test_fill_select_multi_value_defined(self):
258
html = HTML("""<form><p>
259
<select name="foo" multiple>
260
<option value="1">1</option>
261
<option value="2">2</option>
262
<option value="3">3</option>
264
</p></form>""") | HTMLFormFiller(data={'foo': ['1', '3']})
265
self.assertEquals("""<form><p>
266
<select name="foo" multiple="multiple">
267
<option value="1" selected="selected">1</option>
268
<option value="2">2</option>
269
<option value="3" selected="selected">3</option>
271
</p></form>""", unicode(html))
274
class HTMLSanitizerTestCase(unittest.TestCase):
276
def test_sanitize_unchanged(self):
277
html = HTML('<a href="#">fo<br />o</a>')
278
self.assertEquals(u'<a href="#">fo<br/>o</a>',
279
unicode(html | HTMLSanitizer()))
281
def test_sanitize_escape_text(self):
282
html = HTML('<a href="#">fo&</a>')
283
self.assertEquals(u'<a href="#">fo&</a>',
284
unicode(html | HTMLSanitizer()))
285
html = HTML('<a href="#"><foo></a>')
286
self.assertEquals(u'<a href="#"><foo></a>',
287
unicode(html | HTMLSanitizer()))
289
def test_sanitize_entityref_text(self):
290
html = HTML('<a href="#">foö</a>')
291
self.assertEquals(u'<a href="#">foö</a>',
292
unicode(html | HTMLSanitizer()))
294
def test_sanitize_escape_attr(self):
295
html = HTML('<div title="<foo>"></div>')
296
self.assertEquals(u'<div title="<foo>"/>',
297
unicode(html | HTMLSanitizer()))
299
def test_sanitize_close_empty_tag(self):
300
html = HTML('<a href="#">fo<br>o</a>')
301
self.assertEquals(u'<a href="#">fo<br/>o</a>',
302
unicode(html | HTMLSanitizer()))
304
def test_sanitize_invalid_entity(self):
305
html = HTML('&junk;')
306
self.assertEquals('&junk;', unicode(html | HTMLSanitizer()))
308
def test_sanitize_remove_script_elem(self):
309
html = HTML('<script>alert("Foo")</script>')
310
self.assertEquals(u'', unicode(html | HTMLSanitizer()))
311
html = HTML('<SCRIPT SRC="http://example.com/"></SCRIPT>')
312
self.assertEquals(u'', unicode(html | HTMLSanitizer()))
313
self.assertRaises(ParseError, HTML, '<SCR\0IPT>alert("foo")</SCR\0IPT>')
314
self.assertRaises(ParseError, HTML,
315
'<SCRIPT&XYZ SRC="http://example.com/"></SCRIPT>')
317
def test_sanitize_remove_onclick_attr(self):
318
html = HTML('<div onclick=\'alert("foo")\' />')
319
self.assertEquals(u'<div/>', unicode(html | HTMLSanitizer()))
321
def test_sanitize_remove_style_scripts(self):
322
sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style']))
323
# Inline style with url() using javascript: scheme
324
html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"))\'>')
325
self.assertEquals(u'<div/>', unicode(html | sanitizer))
326
# Inline style with url() using javascript: scheme, using control char
327
html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"))\'>')
328
self.assertEquals(u'<div/>', unicode(html | sanitizer))
329
# Inline style with url() using javascript: scheme, in quotes
330
html = HTML('<DIV STYLE=\'background: url("javascript:alert(foo)")\'>')
331
self.assertEquals(u'<div/>', unicode(html | sanitizer))
332
# IE expressions in CSS not allowed
333
html = HTML('<DIV STYLE=\'width: expression(alert("foo"));\'>')
334
self.assertEquals(u'<div/>', unicode(html | sanitizer))
335
html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"));'
337
self.assertEquals(u'<div style="color: #fff"/>',
338
unicode(html | sanitizer))
339
# Inline style with url() using javascript: scheme, using unicode
341
html = HTML('<DIV STYLE=\'background: \\75rl(javascript:alert("foo"))\'>')
342
self.assertEquals(u'<div/>', unicode(html | sanitizer))
343
html = HTML('<DIV STYLE=\'background: \\000075rl(javascript:alert("foo"))\'>')
344
self.assertEquals(u'<div/>', unicode(html | sanitizer))
345
html = HTML('<DIV STYLE=\'background: \\75 rl(javascript:alert("foo"))\'>')
346
self.assertEquals(u'<div/>', unicode(html | sanitizer))
347
html = HTML('<DIV STYLE=\'background: \\000075 rl(javascript:alert("foo"))\'>')
348
self.assertEquals(u'<div/>', unicode(html | sanitizer))
349
html = HTML('<DIV STYLE=\'background: \\000075\r\nrl(javascript:alert("foo"))\'>')
350
self.assertEquals(u'<div/>', unicode(html | sanitizer))
352
def test_sanitize_remove_src_javascript(self):
353
html = HTML('<img src=\'javascript:alert("foo")\'>')
354
self.assertEquals(u'<img/>', unicode(html | HTMLSanitizer()))
355
# Case-insensitive protocol matching
356
html = HTML('<IMG SRC=\'JaVaScRiPt:alert("foo")\'>')
357
self.assertEquals(u'<img/>', unicode(html | HTMLSanitizer()))
358
# Grave accents (not parsed)
359
self.assertRaises(ParseError, HTML,
360
'<IMG SRC=`javascript:alert("RSnake says, \'foo\'")`>')
361
# Protocol encoded using UTF-8 numeric entities
362
html = HTML('<IMG SRC=\'javascri'
363
'pt:alert("foo")\'>')
364
self.assertEquals(u'<img/>', unicode(html | HTMLSanitizer()))
365
# Protocol encoded using UTF-8 numeric entities without a semicolon
366
# (which is allowed because the max number of digits is used)
367
html = HTML('<IMG SRC=\'java'
368
'script'
369
':alert("foo")\'>')
370
self.assertEquals(u'<img/>', unicode(html | HTMLSanitizer()))
371
# Protocol encoded using UTF-8 numeric hex entities without a semicolon
372
# (which is allowed because the max number of digits is used)
373
html = HTML('<IMG SRC=\'javascri'
374
'pt:alert("foo")\'>')
375
self.assertEquals(u'<img/>', unicode(html | HTMLSanitizer()))
376
# Embedded tab character in protocol
377
html = HTML('<IMG SRC=\'jav\tascript:alert("foo");\'>')
378
self.assertEquals(u'<img/>', unicode(html | HTMLSanitizer()))
379
# Embedded tab character in protocol, but encoded this time
380
html = HTML('<IMG SRC=\'jav	ascript:alert("foo");\'>')
381
self.assertEquals(u'<img/>', unicode(html | HTMLSanitizer()))
385
suite = unittest.TestSuite()
386
suite.addTest(doctest.DocTestSuite(HTMLFormFiller.__module__))
387
suite.addTest(unittest.makeSuite(HTMLFormFillerTestCase, 'test'))
388
suite.addTest(unittest.makeSuite(HTMLSanitizerTestCase, 'test'))
391
if __name__ == '__main__':
392
unittest.main(defaultTest='suite')