~vanvugt/ubuntu/oneiric/mediatomb/fix-770964-784431

« back to all changes in this revision

Viewing changes to scripts/readme_xhtml_div_extract.pl

  • Committer: Bazaar Package Importer
  • Author(s): Andres Mejia
  • Date: 2009-04-22 21:39:19 UTC
  • mto: (4.2.1 sid)
  • mto: This revision was merged to the branch mainline in revision 9.
  • Revision ID: james.westby@ubuntu.com-20090422213919-52m015y6gcpv1m1g
Tags: upstream-0.12.0~svn2018
ImportĀ upstreamĀ versionĀ 0.12.0~svn2018

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
#!/usr/bin/perl
 
2
 
 
3
use XML::DOM;
 
4
use XML::XQL;
 
5
use XML::XQL::DOM;
 
6
 
 
7
 
 
8
#binmode STDOUT, ":utf8";
 
9
 
 
10
sub my_tag_compression
 
11
{
 
12
    my ($tag, $elem) = @_;
 
13
    
 
14
    # Print empty br, hr and img tags like this: <br />
 
15
    return 2 if $tag =~ /^(br|hr|img)$/;
 
16
    
 
17
    # Print other empty tags like this: <empty></empty>
 
18
    return 1;
 
19
}
 
20
 
 
21
XML::DOM::setTagCompression (\&my_tag_compression);
 
22
 
 
23
my $parser = new XML::DOM::Parser;
 
24
#my $doc = $parser->parsefile ("readme.html");
 
25
my $doc = $parser->parse(\*STDIN);
 
26
 
 
27
my @res = $doc->xql('//div[h2/@class="title"]');
 
28
if (! @res)
 
29
{
 
30
    @res = $doc->xql('//div[h1/@class="title"]');
 
31
}
 
32
 
 
33
$res[0]->getParentNode()->removeChild($res[0]);
 
34
 
 
35
@res = $doc->xql("/html/body/div/*");
 
36
 
 
37
foreach (@res)
 
38
{
 
39
    print $_->toString();
 
40
}