1
from bs4 import BeautifulSoup
6
class Demonstration(object):
7
def __init__(self, markup):
11
def run_against(self, *parser_names):
12
uniform_results = True
13
previous_output = None
14
for parser in parser_names:
16
soup = BeautifulSoup(self.markup, parser)
17
if markup.startswith("<div>"):
18
# Extract the interesting part
23
output = "[EXCEPTION] %s" % str(e)
24
self.results[parser] = output
25
if previous_output is None:
26
previous_output = output
27
elif previous_output != output:
28
uniform_results = False
29
return uniform_results
32
print "%s: %s" % ("Markup".rjust(13), self.markup.encode("utf8"))
33
for parser, output in self.results.items():
34
print "%s: %s" % (parser.rjust(13), output.encode("utf8"))
37
for markup in open("differences.txt"):
38
demo = Demonstration(markup.decode("utf8").strip().replace("\\n", "\n"))
39
is_uniform = demo.run_against("html.parser", "lxml", "html5lib")
41
uniform_results.append(demo)
43
different_results.append(demo)
45
print "Markup that's handled the same in every parser:"
46
for demo in uniform_results:
52
print "Markup that's not handled the same in every parser:"
53
for demo in different_results: