~leonardr/beautifulsoup/bs4

« back to all changes in this revision

Viewing changes to scripts/demo_differences.py

  • Committer: Leonard Richardson
  • Date: 2012-02-22 18:19:06 UTC
  • mto: This revision was merged to the branch mainline in revision 170.
  • Revision ID: leonard.richardson@canonical.com-20120222181906-svqr1x830fsowtvk
Added scripts.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
from bs4 import BeautifulSoup
 
2
 
 
3
different_results = []
 
4
uniform_results = []
 
5
 
 
6
class Demonstration(object):
 
7
    def __init__(self, markup):
 
8
        self.results = {}
 
9
        self.markup = markup
 
10
 
 
11
    def run_against(self, *parser_names):
 
12
        uniform_results = True
 
13
        previous_output = None
 
14
        for parser in parser_names:
 
15
            try:
 
16
                soup = BeautifulSoup(self.markup, parser)
 
17
                if markup.startswith("<div>"):
 
18
                    # Extract the interesting part
 
19
                    output = soup.div
 
20
                else:
 
21
                    output = soup
 
22
            except Exception, e:
 
23
                output = "[EXCEPTION] %s" % str(e)
 
24
            self.results[parser] = output
 
25
            if previous_output is None:
 
26
                previous_output = output
 
27
            elif previous_output != output:
 
28
                uniform_results = False
 
29
        return uniform_results
 
30
 
 
31
    def dump(self):
 
32
        print "%s: %s" % ("Markup".rjust(13), self.markup.encode("utf8"))
 
33
        for parser, output in self.results.items():
 
34
            print "%s: %s" % (parser.rjust(13), output.encode("utf8"))
 
35
 
 
36
 
 
37
for markup in open("differences.txt"):
 
38
    demo = Demonstration(markup.decode("utf8").strip().replace("\\n", "\n"))
 
39
    is_uniform = demo.run_against("html.parser", "lxml", "html5lib")
 
40
    if is_uniform:
 
41
        uniform_results.append(demo)
 
42
    else:
 
43
        different_results.append(demo)
 
44
 
 
45
print "Markup that's handled the same in every parser:"
 
46
for demo in uniform_results:
 
47
    demo.dump()
 
48
    print "-" * 80
 
49
print
 
50
print "=" * 80
 
51
print
 
52
print "Markup that's not handled the same in every parser:"
 
53
for demo in different_results:
 
54
    demo.dump()
 
55
    print "-" * 80