~ubuntu-branches/ubuntu/raring/genshi/raring-proposed

« back to all changes in this revision

Viewing changes to genshi/filters/tests/html.py

  • Committer: Bazaar Package Importer
  • Author(s): Arnaud Fontaine
  • Date: 2007-04-16 17:49:03 UTC
  • mfrom: (1.1.2 upstream)
  • Revision ID: james.westby@ubuntu.com-20070416174903-x2p3n9g890v18d0m
Tags: 0.4-1
* New upstream release.
* Remove useless python-markup transition package.
* Add Provides against python-markup.
* Add doc-base.
* Add depends against python-xml.
* Add suggests to python-setuptools.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# -*- coding: utf-8 -*-
 
2
#
 
3
# Copyright (C) 2006 Edgewall Software
 
4
# All rights reserved.
 
5
#
 
6
# This software is licensed as described in the file COPYING, which
 
7
# you should have received as part of this distribution. The terms
 
8
# are also available at http://genshi.edgewall.org/wiki/License.
 
9
#
 
10
# This software consists of voluntary contributions made by many
 
11
# individuals. For the exact contribution history, see the revision
 
12
# history and logs, available at http://genshi.edgewall.org/log/.
 
13
 
 
14
import doctest
 
15
import unittest
 
16
 
 
17
from genshi.input import HTML, ParseError
 
18
from genshi.filters.html import HTMLFormFiller, HTMLSanitizer
 
19
 
 
20
 
 
21
class HTMLFormFillerTestCase(unittest.TestCase):
 
22
 
 
23
    def test_fill_input_text_no_value(self):
 
24
        html = HTML("""<form><p>
 
25
          <input type="text" name="foo" />
 
26
        </p></form>""") | HTMLFormFiller()
 
27
        self.assertEquals("""<form><p>
 
28
          <input type="text" name="foo"/>
 
29
        </p></form>""", unicode(html))
 
30
 
 
31
    def test_fill_input_text_single_value(self):
 
32
        html = HTML("""<form><p>
 
33
          <input type="text" name="foo" />
 
34
        </p></form>""") | HTMLFormFiller(data={'foo': 'bar'})
 
35
        self.assertEquals("""<form><p>
 
36
          <input type="text" name="foo" value="bar"/>
 
37
        </p></form>""", unicode(html))
 
38
 
 
39
    def test_fill_input_text_multi_value(self):
 
40
        html = HTML("""<form><p>
 
41
          <input type="text" name="foo" />
 
42
        </p></form>""") | HTMLFormFiller(data={'foo': ['bar']})
 
43
        self.assertEquals("""<form><p>
 
44
          <input type="text" name="foo" value="bar"/>
 
45
        </p></form>""", unicode(html))
 
46
 
 
47
    def test_fill_input_hidden_no_value(self):
 
48
        html = HTML("""<form><p>
 
49
          <input type="hidden" name="foo" />
 
50
        </p></form>""") | HTMLFormFiller()
 
51
        self.assertEquals("""<form><p>
 
52
          <input type="hidden" name="foo"/>
 
53
        </p></form>""", unicode(html))
 
54
 
 
55
    def test_fill_input_hidden_single_value(self):
 
56
        html = HTML("""<form><p>
 
57
          <input type="hidden" name="foo" />
 
58
        </p></form>""") | HTMLFormFiller(data={'foo': 'bar'})
 
59
        self.assertEquals("""<form><p>
 
60
          <input type="hidden" name="foo" value="bar"/>
 
61
        </p></form>""", unicode(html))
 
62
 
 
63
    def test_fill_input_hidden_multi_value(self):
 
64
        html = HTML("""<form><p>
 
65
          <input type="hidden" name="foo" />
 
66
        </p></form>""") | HTMLFormFiller(data={'foo': ['bar']})
 
67
        self.assertEquals("""<form><p>
 
68
          <input type="hidden" name="foo" value="bar"/>
 
69
        </p></form>""", unicode(html))
 
70
 
 
71
    def test_fill_textarea_no_value(self):
 
72
        html = HTML("""<form><p>
 
73
          <textarea name="foo"></textarea>
 
74
        </p></form>""") | HTMLFormFiller()
 
75
        self.assertEquals("""<form><p>
 
76
          <textarea name="foo"/>
 
77
        </p></form>""", unicode(html))
 
78
 
 
79
    def test_fill_textarea_single_value(self):
 
80
        html = HTML("""<form><p>
 
81
          <textarea name="foo"></textarea>
 
82
        </p></form>""") | HTMLFormFiller(data={'foo': 'bar'})
 
83
        self.assertEquals("""<form><p>
 
84
          <textarea name="foo">bar</textarea>
 
85
        </p></form>""", unicode(html))
 
86
 
 
87
    def test_fill_textarea_multi_value(self):
 
88
        html = HTML("""<form><p>
 
89
          <textarea name="foo"></textarea>
 
90
        </p></form>""") | HTMLFormFiller(data={'foo': ['bar']})
 
91
        self.assertEquals("""<form><p>
 
92
          <textarea name="foo">bar</textarea>
 
93
        </p></form>""", unicode(html))
 
94
 
 
95
    def test_fill_input_checkbox_no_value(self):
 
96
        html = HTML("""<form><p>
 
97
          <input type="checkbox" name="foo" />
 
98
        </p></form>""") | HTMLFormFiller()
 
99
        self.assertEquals("""<form><p>
 
100
          <input type="checkbox" name="foo"/>
 
101
        </p></form>""", unicode(html))
 
102
 
 
103
    def test_fill_input_checkbox_single_value_auto(self):
 
104
        html = HTML("""<form><p>
 
105
          <input type="checkbox" name="foo" />
 
106
        </p></form>""")
 
107
        self.assertEquals("""<form><p>
 
108
          <input type="checkbox" name="foo"/>
 
109
        </p></form>""", unicode(html | HTMLFormFiller(data={'foo': ''})))
 
110
        self.assertEquals("""<form><p>
 
111
          <input type="checkbox" name="foo" checked="checked"/>
 
112
        </p></form>""", unicode(html | HTMLFormFiller(data={'foo': 'on'})))
 
113
 
 
114
    def test_fill_input_checkbox_single_value_defined(self):
 
115
        html = HTML("""<form><p>
 
116
          <input type="checkbox" name="foo" value="1" />
 
117
        </p></form>""")
 
118
        self.assertEquals("""<form><p>
 
119
          <input type="checkbox" name="foo" value="1" checked="checked"/>
 
120
        </p></form>""", unicode(html | HTMLFormFiller(data={'foo': '1'})))
 
121
        self.assertEquals("""<form><p>
 
122
          <input type="checkbox" name="foo" value="1"/>
 
123
        </p></form>""", unicode(html | HTMLFormFiller(data={'foo': '2'})))
 
124
 
 
125
    def test_fill_input_checkbox_multi_value_auto(self):
 
126
        html = HTML("""<form><p>
 
127
          <input type="checkbox" name="foo" />
 
128
        </p></form>""")
 
129
        self.assertEquals("""<form><p>
 
130
          <input type="checkbox" name="foo"/>
 
131
        </p></form>""", unicode(html | HTMLFormFiller(data={'foo': []})))
 
132
        self.assertEquals("""<form><p>
 
133
          <input type="checkbox" name="foo" checked="checked"/>
 
134
        </p></form>""", unicode(html | HTMLFormFiller(data={'foo': ['on']})))
 
135
 
 
136
    def test_fill_input_checkbox_multi_value_defined(self):
 
137
        html = HTML("""<form><p>
 
138
          <input type="checkbox" name="foo" value="1" />
 
139
        </p></form>""")
 
140
        self.assertEquals("""<form><p>
 
141
          <input type="checkbox" name="foo" value="1" checked="checked"/>
 
142
        </p></form>""", unicode(html | HTMLFormFiller(data={'foo': ['1']})))
 
143
        self.assertEquals("""<form><p>
 
144
          <input type="checkbox" name="foo" value="1"/>
 
145
        </p></form>""", unicode(html | HTMLFormFiller(data={'foo': ['2']})))
 
146
 
 
147
    def test_fill_input_radio_no_value(self):
 
148
        html = HTML("""<form><p>
 
149
          <input type="radio" name="foo" />
 
150
        </p></form>""") | HTMLFormFiller()
 
151
        self.assertEquals("""<form><p>
 
152
          <input type="radio" name="foo"/>
 
153
        </p></form>""", unicode(html))
 
154
 
 
155
    def test_fill_input_radio_single_value(self):
 
156
        html = HTML("""<form><p>
 
157
          <input type="radio" name="foo" value="1" />
 
158
        </p></form>""")
 
159
        self.assertEquals("""<form><p>
 
160
          <input type="radio" name="foo" value="1" checked="checked"/>
 
161
        </p></form>""", unicode(html | HTMLFormFiller(data={'foo': '1'})))
 
162
        self.assertEquals("""<form><p>
 
163
          <input type="radio" name="foo" value="1"/>
 
164
        </p></form>""", unicode(html | HTMLFormFiller(data={'foo': '2'})))
 
165
 
 
166
    def test_fill_input_radio_multi_value(self):
 
167
        html = HTML("""<form><p>
 
168
          <input type="radio" name="foo" value="1" />
 
169
        </p></form>""")
 
170
        self.assertEquals("""<form><p>
 
171
          <input type="radio" name="foo" value="1" checked="checked"/>
 
172
        </p></form>""", unicode(html | HTMLFormFiller(data={'foo': ['1']})))
 
173
        self.assertEquals("""<form><p>
 
174
          <input type="radio" name="foo" value="1"/>
 
175
        </p></form>""", unicode(html | HTMLFormFiller(data={'foo': ['2']})))
 
176
 
 
177
    def test_fill_select_no_value_auto(self):
 
178
        html = HTML("""<form><p>
 
179
          <select name="foo">
 
180
            <option>1</option>
 
181
            <option>2</option>
 
182
            <option>3</option>
 
183
          </select>
 
184
        </p></form>""") | HTMLFormFiller()
 
185
        self.assertEquals("""<form><p>
 
186
          <select name="foo">
 
187
            <option>1</option>
 
188
            <option>2</option>
 
189
            <option>3</option>
 
190
          </select>
 
191
        </p></form>""", unicode(html))
 
192
 
 
193
    def test_fill_select_no_value_defined(self):
 
194
        html = HTML("""<form><p>
 
195
          <select name="foo">
 
196
            <option value="1">1</option>
 
197
            <option value="2">2</option>
 
198
            <option value="3">3</option>
 
199
          </select>
 
200
        </p></form>""") | HTMLFormFiller()
 
201
        self.assertEquals("""<form><p>
 
202
          <select name="foo">
 
203
            <option value="1">1</option>
 
204
            <option value="2">2</option>
 
205
            <option value="3">3</option>
 
206
          </select>
 
207
        </p></form>""", unicode(html))
 
208
 
 
209
    def test_fill_select_single_value_auto(self):
 
210
        html = HTML("""<form><p>
 
211
          <select name="foo">
 
212
            <option>1</option>
 
213
            <option>2</option>
 
214
            <option>3</option>
 
215
          </select>
 
216
        </p></form>""") | HTMLFormFiller(data={'foo': '1'})
 
217
        self.assertEquals("""<form><p>
 
218
          <select name="foo">
 
219
            <option selected="selected">1</option>
 
220
            <option>2</option>
 
221
            <option>3</option>
 
222
          </select>
 
223
        </p></form>""", unicode(html))
 
224
 
 
225
    def test_fill_select_single_value_defined(self):
 
226
        html = HTML("""<form><p>
 
227
          <select name="foo">
 
228
            <option value="1">1</option>
 
229
            <option value="2">2</option>
 
230
            <option value="3">3</option>
 
231
          </select>
 
232
        </p></form>""") | HTMLFormFiller(data={'foo': '1'})
 
233
        self.assertEquals("""<form><p>
 
234
          <select name="foo">
 
235
            <option value="1" selected="selected">1</option>
 
236
            <option value="2">2</option>
 
237
            <option value="3">3</option>
 
238
          </select>
 
239
        </p></form>""", unicode(html))
 
240
 
 
241
    def test_fill_select_multi_value_auto(self):
 
242
        html = HTML("""<form><p>
 
243
          <select name="foo" multiple>
 
244
            <option>1</option>
 
245
            <option>2</option>
 
246
            <option>3</option>
 
247
          </select>
 
248
        </p></form>""") | HTMLFormFiller(data={'foo': ['1', '3']})
 
249
        self.assertEquals("""<form><p>
 
250
          <select name="foo" multiple="multiple">
 
251
            <option selected="selected">1</option>
 
252
            <option>2</option>
 
253
            <option selected="selected">3</option>
 
254
          </select>
 
255
        </p></form>""", unicode(html))
 
256
 
 
257
    def test_fill_select_multi_value_defined(self):
 
258
        html = HTML("""<form><p>
 
259
          <select name="foo" multiple>
 
260
            <option value="1">1</option>
 
261
            <option value="2">2</option>
 
262
            <option value="3">3</option>
 
263
          </select>
 
264
        </p></form>""") | HTMLFormFiller(data={'foo': ['1', '3']})
 
265
        self.assertEquals("""<form><p>
 
266
          <select name="foo" multiple="multiple">
 
267
            <option value="1" selected="selected">1</option>
 
268
            <option value="2">2</option>
 
269
            <option value="3" selected="selected">3</option>
 
270
          </select>
 
271
        </p></form>""", unicode(html))
 
272
 
 
273
 
 
274
class HTMLSanitizerTestCase(unittest.TestCase):
 
275
 
 
276
    def test_sanitize_unchanged(self):
 
277
        html = HTML('<a href="#">fo<br />o</a>')
 
278
        self.assertEquals(u'<a href="#">fo<br/>o</a>',
 
279
                          unicode(html | HTMLSanitizer()))
 
280
 
 
281
    def test_sanitize_escape_text(self):
 
282
        html = HTML('<a href="#">fo&amp;</a>')
 
283
        self.assertEquals(u'<a href="#">fo&amp;</a>',
 
284
                          unicode(html | HTMLSanitizer()))
 
285
        html = HTML('<a href="#">&lt;foo&gt;</a>')
 
286
        self.assertEquals(u'<a href="#">&lt;foo&gt;</a>',
 
287
                          unicode(html | HTMLSanitizer()))
 
288
 
 
289
    def test_sanitize_entityref_text(self):
 
290
        html = HTML('<a href="#">fo&ouml;</a>')
 
291
        self.assertEquals(u'<a href="#">foö</a>',
 
292
                          unicode(html | HTMLSanitizer()))
 
293
 
 
294
    def test_sanitize_escape_attr(self):
 
295
        html = HTML('<div title="&lt;foo&gt;"></div>')
 
296
        self.assertEquals(u'<div title="&lt;foo&gt;"/>',
 
297
                          unicode(html | HTMLSanitizer()))
 
298
 
 
299
    def test_sanitize_close_empty_tag(self):
 
300
        html = HTML('<a href="#">fo<br>o</a>')
 
301
        self.assertEquals(u'<a href="#">fo<br/>o</a>',
 
302
                          unicode(html | HTMLSanitizer()))
 
303
 
 
304
    def test_sanitize_invalid_entity(self):
 
305
        html = HTML('&junk;')
 
306
        self.assertEquals('&amp;junk;', unicode(html | HTMLSanitizer()))
 
307
 
 
308
    def test_sanitize_remove_script_elem(self):
 
309
        html = HTML('<script>alert("Foo")</script>')
 
310
        self.assertEquals(u'', unicode(html | HTMLSanitizer()))
 
311
        html = HTML('<SCRIPT SRC="http://example.com/"></SCRIPT>')
 
312
        self.assertEquals(u'', unicode(html | HTMLSanitizer()))
 
313
        self.assertRaises(ParseError, HTML, '<SCR\0IPT>alert("foo")</SCR\0IPT>')
 
314
        self.assertRaises(ParseError, HTML,
 
315
                          '<SCRIPT&XYZ SRC="http://example.com/"></SCRIPT>')
 
316
 
 
317
    def test_sanitize_remove_onclick_attr(self):
 
318
        html = HTML('<div onclick=\'alert("foo")\' />')
 
319
        self.assertEquals(u'<div/>', unicode(html | HTMLSanitizer()))
 
320
 
 
321
    def test_sanitize_remove_style_scripts(self):
 
322
        sanitizer = HTMLSanitizer(safe_attrs=HTMLSanitizer.SAFE_ATTRS | set(['style']))
 
323
        # Inline style with url() using javascript: scheme
 
324
        html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"))\'>')
 
325
        self.assertEquals(u'<div/>', unicode(html | sanitizer))
 
326
        # Inline style with url() using javascript: scheme, using control char
 
327
        html = HTML('<DIV STYLE=\'background: url(&#1;javascript:alert("foo"))\'>')
 
328
        self.assertEquals(u'<div/>', unicode(html | sanitizer))
 
329
        # Inline style with url() using javascript: scheme, in quotes
 
330
        html = HTML('<DIV STYLE=\'background: url("javascript:alert(foo)")\'>')
 
331
        self.assertEquals(u'<div/>', unicode(html | sanitizer))
 
332
        # IE expressions in CSS not allowed
 
333
        html = HTML('<DIV STYLE=\'width: expression(alert("foo"));\'>')
 
334
        self.assertEquals(u'<div/>', unicode(html | sanitizer))
 
335
        html = HTML('<DIV STYLE=\'background: url(javascript:alert("foo"));'
 
336
                                 'color: #fff\'>')
 
337
        self.assertEquals(u'<div style="color: #fff"/>',
 
338
                          unicode(html | sanitizer))
 
339
        # Inline style with url() using javascript: scheme, using unicode
 
340
        # escapes
 
341
        html = HTML('<DIV STYLE=\'background: \\75rl(javascript:alert("foo"))\'>')
 
342
        self.assertEquals(u'<div/>', unicode(html | sanitizer))
 
343
        html = HTML('<DIV STYLE=\'background: \\000075rl(javascript:alert("foo"))\'>')
 
344
        self.assertEquals(u'<div/>', unicode(html | sanitizer))
 
345
        html = HTML('<DIV STYLE=\'background: \\75 rl(javascript:alert("foo"))\'>')
 
346
        self.assertEquals(u'<div/>', unicode(html | sanitizer))
 
347
        html = HTML('<DIV STYLE=\'background: \\000075 rl(javascript:alert("foo"))\'>')
 
348
        self.assertEquals(u'<div/>', unicode(html | sanitizer))
 
349
        html = HTML('<DIV STYLE=\'background: \\000075\r\nrl(javascript:alert("foo"))\'>')
 
350
        self.assertEquals(u'<div/>', unicode(html | sanitizer))
 
351
 
 
352
    def test_sanitize_remove_src_javascript(self):
 
353
        html = HTML('<img src=\'javascript:alert("foo")\'>')
 
354
        self.assertEquals(u'<img/>', unicode(html | HTMLSanitizer()))
 
355
        # Case-insensitive protocol matching
 
356
        html = HTML('<IMG SRC=\'JaVaScRiPt:alert("foo")\'>')
 
357
        self.assertEquals(u'<img/>', unicode(html | HTMLSanitizer()))
 
358
        # Grave accents (not parsed)
 
359
        self.assertRaises(ParseError, HTML,
 
360
                          '<IMG SRC=`javascript:alert("RSnake says, \'foo\'")`>')
 
361
        # Protocol encoded using UTF-8 numeric entities
 
362
        html = HTML('<IMG SRC=\'&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;'
 
363
                    '&#112;&#116;&#58;alert("foo")\'>')
 
364
        self.assertEquals(u'<img/>', unicode(html | HTMLSanitizer()))
 
365
        # Protocol encoded using UTF-8 numeric entities without a semicolon
 
366
        # (which is allowed because the max number of digits is used)
 
367
        html = HTML('<IMG SRC=\'&#0000106&#0000097&#0000118&#0000097'
 
368
                    '&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116'
 
369
                    '&#0000058alert("foo")\'>')
 
370
        self.assertEquals(u'<img/>', unicode(html | HTMLSanitizer()))
 
371
        # Protocol encoded using UTF-8 numeric hex entities without a semicolon
 
372
        # (which is allowed because the max number of digits is used)
 
373
        html = HTML('<IMG SRC=\'&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69'
 
374
                    '&#x70&#x74&#x3A;alert("foo")\'>')
 
375
        self.assertEquals(u'<img/>', unicode(html | HTMLSanitizer()))
 
376
        # Embedded tab character in protocol
 
377
        html = HTML('<IMG SRC=\'jav\tascript:alert("foo");\'>')
 
378
        self.assertEquals(u'<img/>', unicode(html | HTMLSanitizer()))
 
379
        # Embedded tab character in protocol, but encoded this time
 
380
        html = HTML('<IMG SRC=\'jav&#x09;ascript:alert("foo");\'>')
 
381
        self.assertEquals(u'<img/>', unicode(html | HTMLSanitizer()))
 
382
 
 
383
 
 
384
def suite():
 
385
    suite = unittest.TestSuite()
 
386
    suite.addTest(doctest.DocTestSuite(HTMLFormFiller.__module__))
 
387
    suite.addTest(unittest.makeSuite(HTMLFormFillerTestCase, 'test'))
 
388
    suite.addTest(unittest.makeSuite(HTMLSanitizerTestCase, 'test'))
 
389
    return suite
 
390
 
 
391
if __name__ == '__main__':
 
392
    unittest.main(defaultTest='suite')