5
5
>>> from lxml.html.clean import clean, clean_html, Cleaner
6
6
>>> from lxml.html import usedoctest
8
>>> def tostring(el): # work-around for Py3 'bytes' type
9
... from lxml.html import tostring
11
... if not isinstance(s, str):
12
... s = s.decode('UTF-8')
8
15
>>> doc_embed = '''<div>
9
16
... <embed src="http://www.youtube.com/v/183tVH1CZpA" type="application/x-shockwave-flash"></embed>
10
17
... <embed src="http://anothersite.com/v/another"></embed>
11
18
... <script src="http://www.youtube.com/example.js"></script>
12
19
... <script src="/something-else.js"></script>
14
>>> print tostring(fromstring(doc_embed))
21
>>> print(tostring(fromstring(doc_embed)))
16
23
<embed src="http://www.youtube.com/v/183tVH1CZpA" type="application/x-shockwave-flash"></embed>
17
24
<embed src="http://anothersite.com/v/another"></embed>
18
25
<script src="http://www.youtube.com/example.js"></script>
19
26
<script src="/something-else.js"></script>
21
>>> print Cleaner().clean_html(doc_embed)
28
>>> print(Cleaner().clean_html(doc_embed))
24
>>> print Cleaner(host_whitelist=['www.youtube.com']).clean_html(doc_embed)
31
>>> print(Cleaner(host_whitelist=['www.youtube.com']).clean_html(doc_embed))
26
33
<embed src="http://www.youtube.com/v/183tVH1CZpA" type="application/x-shockwave-flash"></embed>
28
>>> print Cleaner(host_whitelist=['www.youtube.com'], whitelist_tags=None).clean_html(doc_embed)
35
>>> print(Cleaner(host_whitelist=['www.youtube.com'], whitelist_tags=None).clean_html(doc_embed))
30
37
<embed src="http://www.youtube.com/v/183tVH1CZpA" type="application/x-shockwave-flash"></embed>
31
38
<script src="http://www.youtube.com/example.js"></script>