5
use vars qw( @ISA @EXPORT @EXPORT_OK $VERSION );
8
@ISA = qw(Exporter DynaLoader);
12
bootstrap XML::Bare $VERSION;
15
@EXPORT_OK = qw(merge clean find_node del_node forcearray del_by_perl);
19
XML::Bare - Minimal XML parser implemented via a C state engine
29
$class = ref($class) || $class;
36
if( $self->{ 'text' } ) {
37
XML::Bare::c_parse( $self->{'text'} );
40
my $file = $self->{ 'file' };
41
my $res = open( XML, $file );
48
$self->{'text'} = <XML>;
51
XML::Bare::c_parse( $self->{'text'} );
59
return $ref if( ref( $ref ) eq 'ARRAY' );
66
# shift in the two array references as well as the field to merge on
70
my %hash = map { $_->{ $id } ? ( $_->{ $id }->{ 'value' } => $_ ) : ( 0 => 0 ) } @$a;
72
next if( !$one->{ $id } );
73
my $short = $hash{ $one->{ $id }->{ 'value' } };
75
foreach my $key ( keys %$one ) {
76
next if( $key eq '_pos' || $key eq 'id' );
77
my $cur = $short->{ $key };
78
my $add = $one->{ $key };
80
$short->{ $key } = $add;
83
my $type = ref( $cur );
84
if( $cur eq 'HASH' ) {
86
$short->{ $key } = \@arr;
89
if( ref( $add ) eq 'HASH' ) {
90
push( @{$short->{ $key }}, $add );
92
else { # we are merging an array
93
push( @{$short->{ $key }}, @$add );
97
# we need to deal with the case where this node
98
# is already there, either alone or as an array
105
my $ob = new XML::Bare( @_ );
106
my $root = $ob->parse();
107
$ob->{'file'} = $ob->{'save'} if( $ob->{'save'} && "$ob->{'save'}" ne "1" );
108
if( $ob->{'save'} ) {
112
return $ob->xml( $root );
115
# Load a file using XML::DOM, convert it to a hash, and return the hash
119
my $res = XML::Bare::xml2obj();#$self->xml2obj();
120
$self->{'structroot'} = XML::Bare::get_root();
123
if( defined( $self->{'scheme'} ) ) {
124
$self->{'xbs'} = new XML::Bare( %{ $self->{'scheme'} } );
126
if( defined( $self->{'xbs'} ) ) {
127
my $xbs = $self->{'xbs'};
128
my $ob = $xbs->parse();
129
$self->{'xbso'} = $ob;
133
if( $res < 0 ) { croak "Error at ".$self->lineinfo( -$res ); }
134
$self->{ 'xml' } = $res;
136
if( defined( $self->{'xbso'} ) ) {
137
my $ob = $self->{'xbso'};
138
my $cres = $self->check( $res, $ob );
144
return $self->{ 'xml' };
152
for( my $i=0;$i<$res;$i++ ) {
153
my $let = substr( $self->{'text'}, $i, 1 );
154
if( ord($let) == 10 ) {
159
my $part = substr( $self->{'text'}, $res, 10 );
162
return "line $line char $res \"$part\"";
173
if( ref( $scheme ) eq 'ARRAY' ) {
174
for my $one ( @$scheme ) {
175
my $res = $self->checkone( $node, $one, $parent );
176
return 0 if( !$res );
181
return $self->checkone( $node, $scheme, $parent );
192
for my $key ( keys %$node ) {
193
next if( substr( $key, 0, 1 ) eq '_' || $key eq 'att' || $key eq 'comment' );
194
if( $key eq 'value' ) {
195
my $val = $node->{ 'value' };
196
my $regexp = $scheme->{'value'};
198
#$regexp = "^($regexp)\$";
199
if( $val !~ m/^($regexp)$/ ) {
200
my $linfo = $self->lineinfo( $node->{'_i'} );
201
return "Value of '$parent' node ($val) does not match /$regexp/ [$linfo]";
206
my $sub = $node->{ $key };
207
my $ssub = $scheme->{ $key };
208
if( !$ssub ) { #&& ref( $schemesub ) ne 'HASH'
209
my $linfo = $self->lineinfo( $sub->{'_i'} );
210
return "Invalid node '$key' in xml [$linfo]";
212
if( ref( $sub ) eq 'HASH' ) {
213
my $res = $self->check( $sub, $ssub, $key );
214
return $res if( $res );
216
if( ref( $sub ) eq 'ARRAY' ) {
218
if( ref( $asub ) eq 'ARRAY' ) {
221
if( $asub->{'_t'} ) {
222
my $max = $asub->{'_max'} || 0;
223
if( $#$sub >= $max ) {
224
my $linfo = $self->lineinfo( $sub->[0]->{'_i'} );
225
return "Too many nodes of type '$key'; max $max; [$linfo]"
227
my $min = $asub->{'_min'} || 0;
228
if( ($#$sub+1)<$min ) {
229
my $linfo = $self->lineinfo( $sub->[0]->{'_i'} );
230
return "Not enough nodes of type '$key'; min $min [$linfo]"
233
for my $item ( @$sub ) {
234
my $res = $self->check( $item, $ssub, $key );
235
return $res if( $res );
239
if( my $dem = $scheme->{'_demand'} ) {
240
for my $req ( @{$scheme->{'_demand'}} ) {
241
my $ck = $node->{ $req };
243
my $linfo = $self->lineinfo( $node->{'_i'} );
244
return "Required node '$req' does not exist [$linfo]"
246
if( ref( $ck ) eq 'ARRAY' ) {
247
my $linfo = $self->lineinfo( $node->{'_i'} );
248
return "Required node '$req' is empty array [$linfo]" if( $#$ck == -1 );
256
sub readxbs { # xbs = xml bare schema
259
for my $key ( keys %$node ) {
260
next if( substr( $key, 0, 1 ) eq '_' || $key eq 'att' || $key eq 'comment' );
261
if( $key eq 'value' ) {
262
my $val = $node->{'value'};
263
delete $node->{'value'} if( $val =~ m/^\W*$/ );
266
my $sub = $node->{ $key };
268
if( $key =~ m/([a-z_]+)([^a-z_]+)/ ) {
286
$name = 'multi_'.$name;
290
elsif( $t =~ m/\{([0-9]+),([0-9]+)\}/ ) {
296
if( ref( $sub ) eq 'HASH' ) {
297
my $res = readxbs( $sub );
299
$sub->{'_min'} = $min;
300
$sub->{'_max'} = $max;
302
if( ref( $sub ) eq 'ARRAY' ) {
303
for my $item ( @$sub ) {
304
my $res = readxbs( $item );
306
$item->{'_min'} = $min;
307
$item->{'_max'} = $max;
311
push( @demand, $name ) if( $min );
312
$node->{$name} = $node->{$key};
313
delete $node->{$key};
316
if( ref( $sub ) eq 'HASH' ) {
322
if( ref( $sub ) eq 'ARRAY' ) {
323
for my $item ( @$sub ) {
331
push( @demand, $key );
335
$node->{'_demand'} = \@demand;
342
my $res = XML::Bare::xml2obj();#$self->xml2obj();
343
$self->{'structroot'} = XML::Bare::get_root();
347
die "Error at character ".(-$res);
349
$self->{ 'xml' } = $res;
351
return $self->{ 'xml' };
360
$node->{ 'multi_'.$name } = \%blank if( ! $node->{ 'multi_'.$name } );
361
$node->{ $name } = \@newar if( ! $node->{ $name } );
362
my $newnode = $self->new_node( @_ );
363
push( @{ $node->{ $name } }, $newnode );
374
$node->{ 'multi_'.$name } = \%blank if( ! $node->{ 'multi_'.$name } );
375
$node->{ $name } = \@newar if( ! $node->{ $name } );
376
my $newnode = $self->new_node( @_ );
379
for my $anode ( @{ $node->{ $name } } ) {
380
$anode->{'_pos'} = $cur if( !$anode->{'_pos'} );
383
my $opos = $prev->{'_pos'};
384
for my $anode ( @{ $node->{ $name } } ) {
385
$anode->{'_pos'}++ if( $anode->{'_pos'} > $opos );
387
$newnode->{'_pos'} = $opos + 1;
389
push( @{ $node->{ $name } }, $newnode );
397
$cond =~ s/-([a-z]+)/\$ob->\{'$1'\}->\{'value'\}/g;
399
foreach my $ob ( @$arr ) {
400
push( @res, $ob ) if( eval( $cond ) );
410
$node = $node->{ $name };
411
return 0 if( !$node );
412
if( ref( $node ) eq 'HASH' ) {
413
foreach my $key ( keys %match ) {
414
my $val = $match{ $key };
416
if( $node->{ $key }->{'value'} eq $val ) {
421
if( ref( $node ) eq 'ARRAY' ) {
422
for( my $i = 0; $i <= $#$node; $i++ ) {
423
my $one = $node->[ $i ];
424
foreach my $key ( keys %match ) {
425
my $val = $match{ $key };
426
croak('undefined value in find') unless defined $val;
427
if( $one->{ $key }->{'value'} eq $val ) {
428
return $node->[ $i ];
441
$node = $node->{ $name };
443
for( my $i = 0; $i <= $#$node; $i++ ) {
444
my $one = $node->[ $i ];
445
foreach my $key ( keys %match ) {
446
my $val = $match{ $key };
447
if( $one->{ $key }->{'value'} eq $val ) {
448
delete $node->[ $i ];
457
$cond =~ s/-value/\$ob->\{'value'\}/g;
458
$cond =~ s/-([a-z]+)/\$ob->\{'$1'\}->\{'value'\}/g;
460
for( my $i = 0; $i <= $#$arr; $i++ ) {
461
my $ob = $arr->[ $i ];
462
delete $arr->[ $i ] if( eval( $cond ) );
467
# Created a node of XML hash with the passed in variables already set
473
foreach $a ( keys %parts ) {
474
$newnode{ $a } = $self->newhash( $parts{$a} );
485
$hash{ 'value' } = $val;
494
foreach my $name ( keys %$root ) {
495
my $val = $root->{$name}{'value'} || '';
496
$ret{ $name } = $val;
501
# Save an XML hash tree into a file
504
my $file = $self->{ 'file' };
505
my $xml = $self->{ 'xml' };
509
print F $self->xml( $self->{'xml'} );
520
return obj2xml( \%hash, '', 0 );
524
return obj2xml( \%hash, '', 0 );
533
$level = 0 if( !$level );
534
$pad = '' if( $level == 1 );
538
return '' if( !$objs );
541
my $oba = $objs->{ $a };
542
my $obb = $objs->{ $b };
543
if( !$oba ) { return 0; }
544
if( !$obb ) { return 0; }
545
$oba = $oba->[0] if( ref( $oba ) eq 'ARRAY' );
546
$obb = $obb->[0] if( ref( $obb ) eq 'ARRAY' );
547
if( ref( $oba ) eq 'HASH' && ref( $obb ) eq 'HASH' ) {
548
my $posa = $oba->{'_pos'}*1;
549
my $posb = $obb->{'_pos'}*1;
550
if( !$posa ) { $posa = 0; }
551
if( !$posb ) { $posb = 0; }
552
return $posa <=> $posb;
557
my $obj = $objs->{ $i } || '';
558
my $type = ref( $obj );
559
if( $type eq 'ARRAY' ) {
564
my $oba = $a;#$obj->[ $a ];
565
my $obb = $b;#$obj->[ $b ];
566
if( !$oba ) { return 0; }
567
if( !$obb ) { return 0; }
568
if( ref( $oba ) eq 'HASH' && ref( $obb ) eq 'HASH' ) {
569
my $posa = $oba->{'_pos'};
570
my $posb = $obb->{'_pos'};
571
if( !$posa ) { $posa = 0; }
572
if( !$posb ) { $posb = 0; }
573
return $posa <=> $posb;
578
for my $j ( @dex2 ) {
579
$xml .= obj2xml( $j, $i, $pad.' ', $level+1, $#dex );
582
elsif( $type eq 'HASH' ) {
584
if( $obj->{ 'att' } ) {
585
$att .= ' ' . $i . '="' . $obj->{ 'value' } . '"';
588
$xml .= obj2xml( $obj , $i, $pad.' ', $level+1, $#dex );
592
if( $i eq 'comment' ) {
593
$xml .= '<!--' . $obj . '-->' . "\n";
595
elsif( $i eq 'value' ) {
596
if( $#dex < 3 && $level > 1 ) {
597
if( $obj && $obj =~ /[<>&;]/ ) {
598
$xml .= '<![CDATA[' . $obj . ']]>';
605
# $obj = "$pad $obj\n";
612
elsif( substr( $i, 0, 1 ) eq '_' ) {
615
$xml .= '<' . $i . '>' . $obj . '</' . $i . '>';
619
my $pad2 = $imm ? '' : $pad;
620
my $cr = $imm ? '' : "\n";
621
if( substr( $name, 0, 1 ) ne '_' ) {
623
$xml = $pad . '<' . $name . $att . '>' . $cr . $xml . $pad2 . '</' . $name . '>';
625
return $xml."\n" if( $level );
631
sub free_tree { my $self = shift; XML::Bare::free_tree_c( $self->{'structroot'} ); }
641
my $ob = new XML::Bare( text => '<xml><name>Bob</name></xml>' );
643
# Parse the xml into a hash tree
644
my $root = $ob->parse();
646
# Print the content of the name node
647
print $root->{xml}->{name}->{value};
651
# Load xml from a file ( assume same contents as first example )
652
my $ob2 = new XML::Bare( file => 'test.xml' );
654
my $root2 = $ob2->parse();
656
$root2->{xml}->{name}->{value} = 'Tim';
658
# Save the changes back to the file
663
# Load xml and verify against XBS ( XML Bare Schema )
664
my $xml_text = '<xml><item name=bob/></xml>''
665
my $schema_text = '<xml><item* name=[a-z]+></item*></xml>'
666
my $ob = new XML::Bare( text => $xml_text, schema => { text => $schema_text } );
667
$ob->parse(); # this will error out if schema is invalid
671
This module is a 'Bare' XML parser. It is implemented in C. The parser
672
itself is a simple state engine that is less than 500 lines of C. The
673
parser builds a C struct tree from input text. That C struct tree is
674
converted to a Perl hash by a Perl function that makes basic calls back
675
to the C to go through the nodes sequentially.
677
The parser itself will only cease parsing if it encounters tags that
678
are not closed properly. All other inputs will parse, even invalid
679
inputs. To allowing checking for validity, a schema checker is included
680
in the module as well.
682
The schema format is custom and is meant to be as simple as possible.
683
It is based loosely around the way multiplicity is handled in Perl
688
To demonstrate what sort of XML is supported, consider the following
689
examples. Each of the PERL statements evaluates to true.
693
=item * Node containing just text
696
PERL: $root->{xml}->{value} eq "blah";
700
XML: <xml><name>Bob</name></xml>
701
PERL: $root->{xml}->{name}->{value} eq "Bob";
703
=item * Attributes unquoted
705
XML: <xml><a href=index.htm>Link</a></xml>
706
PERL: $root->{xml}->{a}->{href}->{value} eq "index.htm";
708
=item * Attributes quoted
710
XML: <xml><a href="index.htm">Link</a></xml>
711
PERL: $root->{xml}->{a}->{href}->{value} eq "index.htm";
715
XML: <xml><raw><![CDATA[some raw $~<!bad xml<>]]></raw></xml>
716
PERL: $root->{xml}->{raw}->{value} eq "some raw \$~<!bad xml<>";
718
=item * Multiple nodes; form array
720
XML: <xml><item>1</item><item>2</item></xml>
721
PERL: $root->{xml}->{item}->[0]->{value} eq "1";
723
=item * Forcing array creation
725
XML: <xml><multi_item/><item>1</item></xml>
726
PERL: $root->{xml}->{item}->[0]->{value} eq "1";
728
=item * One comment supported per node
730
XML: <xml><!--test--></xml>
731
PERL: $root->{xml}->{comment} eq 'test';
735
=head2 Schema Checking
737
Schema checking is done by providing the module with an XBS (XML::Bare Schema) to check
738
the XML against. If the XML checks as valid against the schema, parsing will continue as
739
normal. If the XML is invalid, the parse function will die, providing information about
742
The following information is provided in the error message:
746
=item * The type of error
748
=item * Where the error occured ( line and char )
750
=item * A short snippet of the XML at the point of failure
754
=head2 XBS ( XML::Bare Schema ) Format
758
=item * Required nodes
763
=item * Optional nodes - allow one
767
or XBS: <xml><item?/></xml>
769
=item * Optional nodes - allow 0 or more
771
XML: <xml><item/></xml>
774
=item * Required nodes - allow 1 or more
776
XML: <xml><item/><item/></xml>
779
=item * Nodes - specified minimum and maximum number
781
XML: <xml><item/><item/></xml>
782
XBS: <xml item{1,2}/>
783
or XBS: <xml><item{1,2}/></xml>
784
or XBS: <xml><item{1,2}></item{1,2}></xml>
786
=item * Multiple acceptable node formats
788
XML: <xml><item type=box volume=20/><item type=line length=10/></xml>
789
XBS: <xml><item type=box volume/><item type=line length/></xml>
791
=item * Regular expressions checking for values
793
XML: <xml name=Bob dir=up num=10/>
794
XBS: <xml name=[A-Za-z]+ dir=up|down num=[0-9]+/>
796
=item * Require multi_ tags
798
XML: <xml><multi_item/></xml>
803
=head2 Parsed Hash Structure
805
The hash structure returned from XML parsing is created in a specific format.
806
Besides as described above, the structure contains some additional nodes in
807
order to preserve information that will allow that structure to be correctly
808
converted back to XML.
810
Nodes may contain the following 3 additional subnodes:
816
The character offset within the original parsed XML of where the node
817
begins. This is used to provide line information for errors when XML
818
fails a schema check.
822
This is a number indicating the ordering of nodes. It is used to allow
823
items in a perl hash to be sorted when writing back to xml. Note that
824
items are not sorted after parsing in order to save time if all you
825
are doing is reading and you do not care about the order.
827
In future versions of this module an option will be added to allow
828
you to sort your nodes so that you can read them in order.
829
( note that multiple nodes of the same name are stored in order )
833
This is a boolean value that exists and is 1 iff the node is an
838
=head2 Parsing Limitations / Features
842
=item * CDATA parsed correctly, but stripped if unneeded
844
Currently the contents of a node that are CDATA are read and
845
put into the value hash, but the hash structure does not have
846
a value indicating the node contains CDATA.
848
When converting back to XML, the contents of the value hash
849
are parsed to check for xml incompatible data using a regular
850
expression. If 'CDATA like' stuff is encountered, the node
853
=item * Node position stored, but hash remains unsorted
855
The ordering of nodes is noted using the '_pos' value, but
856
the hash itself is not ordered after parsing. Currently
857
items will be out of order when looking at them in the
860
Note that when converted back to XML, the nodes are then
861
sorted and output in the correct order to XML. Note that
862
nodes of the same name with the same parent will be
863
grouped together; the position of the first item to
864
appear will determine the output position of the group.
866
=item * Comments are parsed but only one is stored per node.
868
For each node, there can be a comment within it, and that
869
comment will be saved and output back when dumping to XML.
871
=item * Comments override output of immediate value
873
If a node contains only a comment node and a text value,
874
only the comment node will be displayed. This is in line
875
with treating a comment node as a node and only displaying
876
immediate values when a node contains no subnodes.
878
=item * PI sections are parsed, but discarded
880
=item * Unknown C<< <! >> sections are parsed, but discarded
882
=item * Attributes may use no quotes, single quotes, quotes
884
=item * Quoted attributes cannot contain escaped quotes
886
No escape character is recognized within quotes. As a result,
887
regular quotes cannot be stored to XML, or the written XML
888
will not be correct, due to all attributes always being written
891
=item * Attributes are always written back to XML with quotes
893
=item * Nodes cannot contain subnodes as well as an immediate value
895
Actually nodes can in fact contain a value as well, but that
896
value will be discarded if you write back to XML. That value is
897
equal to the first continuous string of text besides a subnode.
899
<node>text<subnode/>text2</node>
900
( the value of node is text )
902
<node><subnode/>text</node>
903
( the value of node is text )
908
( the value of node is "\n " )
912
=head2 Module Functions
916
=item * C<< $ob = new XML::Bare( text => "[some xml]" ) >>
918
Create a new XML object, with the given text as the xml source.
920
=item * C<< $object = new XML::Bare( file => "[filename]" ) >>
922
Create a new XML object, with the given filename/path as the xml source
924
=item * C<< $object = new XML::Bare( text => "[some xml]", file => "[filename]" ) >>
926
Create a new XML object, with the given text as the xml input, and the given
927
filename/path as the potential output ( used by save() )
929
=item * C<< $tree = $object->parse() >>
931
Parse the xml of the object and return a tree reference
933
=item * C<< $tree = $object->simple() >>
935
Alternate to the parse function which generates a tree similar to that
936
generated by XML::Simple. Note that the sets of nodes are turned into
937
arrays always, regardless of whether they have a 'name' attribute, unlike
940
Note that currently the generated tree cannot be used with any of the
941
functions in this module that operate upon trees. The function is provided
942
purely as a quick and dirty way to read simple XML files.
944
Also note that you cannot rely upon this function being contained in
945
future versions of XML::Bare; the function will likely be split off into
946
an optimized version meant purely to operate in this fashion.
948
=item * C<< $text = $object->xml( [root] ) >>
950
Take the hash tree in [root] and turn it into cleanly indented ( 2 spaces )
953
=item * C<< $object->save() >>
955
The the current tree in the object, cleanly indent it, and save it
956
to the file paramter specified when creating the object.
958
=item * C<< $text = XML::Bare::clean( text => "[some xml]" ) >>
960
Shortcut to creating an xml object and immediately turning it into clean xml text.
962
=item * C<< $text = XML::Bare::clean( file => "[filename]" ) >>
966
=item * C<< XML::Bare::clean( file => "[filename]", save => 1 ) >>
968
Clean up the xml in the file, saving the results back to the file
970
=item * C<< XML::Bare::clean( text => "[some xml]", save => "[filename]" ) >>
972
Clean up the xml provided, and save it into the specified file.
974
=item * C<< XML::Bare::clean( file => "[filename1]", save => "[filename2]" ) >>
976
Clean up the xml in filename1 and save the results to filename2.
978
=item * C<< $object->add_node( [node], [nodeset name], name => value, name2 => value2, ... ) >>
981
$object->add_node( $root->{xml}, 'item', name => 'Bob' );
990
=item * C<< $object->add_node_after( [node], [node to insert after], [nodeset name], name => value, ... ) >>
992
Same as add_node, but the new node is added immediately after the specificed node.
994
=item * C<< $object->del_node( [node], [nodeset name], name => value ) >>
1008
$xml->del_node( $root->{xml}, 'a', b=>'1' );
1017
=item * C<< $object->find_node( [node], [nodeset name], name => value ) >>
1033
$object->find_node( $root->{xml}, 'ob', key => '1' )->{val}->{value} = 'test';
1047
=item * C<< $object->find_by_perl( [nodeset], "[perl code]" ) >>
1049
find_by_perl evaluates some perl code for each node in a set of nodes, and
1050
returns the nodes where the perl code evaluates as true. In order to
1051
easily reference node values, node values can be directly referred
1052
to from within the perl code by the name of the node with a dash(-) in
1053
front of the name. See the example below.
1055
Note that this function returns an array reference as opposed to a single
1056
node unlike the find_node function.
1072
$object->find_by_perl( $root->{xml}->{ob}, "-key eq '1'" )->[0]->{val}->{value} = 'test';
1086
=item * C<< XML::Bare::merge( [nodeset1], [nodeset2], [id node name] ) >>
1088
Merges the nodes from nodeset2 into nodeset1, matching the contents of
1089
each node based up the content in the id node.
1094
my $ob1 = new XML::Bare( text => "
1103
my $ob2 = new XML::Bare( text => "
1113
my $root1 = $ob1->parse();
1114
my $root2 = $ob2->parse();
1115
merge( $root1->{'xml'}->{'a'}, $root2->{'xml'}->{'a'}, 'id' );
1116
print $ob1->xml( $root1 );
1130
=item * C<< XML::Bare::del_by_perl( ... ) >>
1132
Works exactly like find_by_perl, but deletes whatever matches.
1134
=item * C<< XML::Bare::forcearray( [noderef] ) >>
1136
Turns the node reference into an array reference, whether that
1137
node is just a single node, or is already an array reference.
1139
=item * C<< XML::Bare::new_node( ... ) >>
1141
Creates a new node...
1143
=item * C<< XML::Bare::newhash( ... ) >>
1145
Creates a new hash with the specified value.
1147
=item * C<< XML::Bare::simplify( [noderef] ) >>
1149
Take a node with children that have immediate values and
1150
creates a hashref to reference those values by the name of
1155
=head2 Functions Used Internally
1159
=item * C<< XML::Bare::check() >>
1160
=item * C<< XML::Bare::checkone() >>
1161
=item * C<< XML::Bare::readxbs() >>
1162
=item * C<< XML::Bare::lineinfo() >>
1163
=item * C<< XML::Bare::c_parse() >>
1164
=item * C<< XML::Bare::c_parsefile() >>
1165
=item * C<< XML::Bare::free_tree() >>
1166
=item * C<< XML::Bare::free_tree_c() >>
1167
=item * C<< XML::Bare::xml2obj() >>
1168
=itme * C<< XML::Bare::xml2obj_simple() >>
1169
=item * C<< XML::Bare::obj2xml() >>
1170
=item * C<< XML::Bare::get_root() >>
1176
In comparison to other available perl xml parsers that create trees, XML::Bare
1177
is extremely fast. In order to measure the performance of loading and parsing
1178
compared to the alternatives, a templated speed comparison mechanism has been
1179
created and included with XML::Bare.
1181
The include makebench.pl file runs when you make the module and creates perl
1182
files within the bench directory corresponding to the .tmpl contained there.
1184
Currently there are three types of modules that can be tested against,
1185
executable parsers ( exe.tmpl ), tree parsers ( tree.tmpl ), and parsers
1186
that do not generated trees ( notree.tmpl ).
1188
A full list of modules currently tested against is as follows:
1192
=item * Tiny XML (exe)
1198
=item * XML::LibXML (notree)
1200
=item * XML::Parser (notree)
1202
=item * XML::Parser::Expat (notree)
1204
=item * XML::Descent (notree)
1206
=item * XML::Parser::EasyTree
1208
=item * XML::Handler::Trees
1214
=item * XML::Simple using XML::Parser
1216
=item * XML::Simple using XML::SAX::PurePerl
1218
=item * XML::Simple using XML::LibXML::SAX::Parser
1220
=item * XML::Simple using XML::Bare::SAX::Parser
1224
=item * XML::Trivial
1226
=item * XML::SAX::Simple
1228
=item * XML::Grove::Builder
1230
=item * XML::XPath::XMLParser
1236
To run the comparisons, run the appropriate perl file within the
1237
bench directory. (exe.pl, tree.pl, or notree.pl )
1239
The script measures the milliseconds of loading and parsing, and
1240
compares the time against the time of XML::Bare. So a 7 means
1241
it takes 7 times as long as XML::Bare.
1243
Here is a combined table of the script run against each alternative
1244
using the included test.xml:
1246
-Module- load parse total
1248
XML::TreePP 2.3063 33.1776 6.1598
1249
XML::Parser::EasyTree 4.9405 25.7278 7.4571
1250
XML::Handler::Trees 7.2303 26.5688 9.6447
1251
XML::Trivial 5.0636 12.4715 7.3046
1252
XML::Smart 6.8138 78.7939 15.8296
1253
XML::Simple (XML::Parser) 2.3346 50.4772 10.7455
1254
XML::Simple (PurePerl) 2.361 261.4571 33.6524
1255
XML::Simple (LibXML) 2.3187 163.7501 23.1816
1256
XML::Simple (XML::Bare) 2.3252 59.1254 10.9163
1257
XML::SAX::Simple 8.7792 170.7313 28.3634
1258
XML::Twig 27.8266 56.4476 31.3594
1259
XML::Grove::Builder 7.1267 26.1672 9.4064
1260
XML::XPath::XMLParser 9.7783 35.5486 13.0002
1261
XML::LibXML (notree) 11.0038 4.5758 10.6881
1262
XML::Parser (notree) 4.4698 17.6448 5.8609
1263
XML::Parser::Expat(notree) 3.7681 50.0382 6.0069
1264
XML::Descent (notree) 6.0525 37.0265 11.0322
1265
Tiny XML (exe) 1.0095
1269
Here is a combined table of the script run against each alternative
1270
using the included feed2.xml:
1272
-Module- load parse total
1274
XML::TreePP 2.3068 23.7554 7.6921
1275
XML::Parser::EasyTree 4.8799 25.3691 9.6257
1276
XML::Handler::Trees 6.8545 33.1007 13.0575
1277
XML::Trivial 5.0105 32.0043 11.4113
1278
XML::Simple (XML::Parser) 2.3498 41.9007 12.3062
1279
XML::Simple (PurePerl) 2.3551 224.3027 51.7832
1280
XML::Simple (LibXML) 2.3617 88.8741 23.215
1281
XML::Simple (XML::Bare) 2.4319 37.7355 10.2343
1282
XML::Simple 2.7168 90.7203 26.7525
1283
XML::SAX::Simple 8.7386 94.8276 29.2166
1284
XML::Twig 28.3206 48.1014 33.1222
1285
XML::Grove::Builder 7.2021 30.7926 12.9334
1286
XML::XPath::XMLParser 9.6869 43.5032 17.4941
1287
XML::LibXML (notree) 11.0023 5.022 10.5214
1288
XML::Parser (notree) 4.3748 25.0213 5.9803
1289
XML::Parser::Expat(notree) 3.6555 51.6426 7.4316
1290
XML::Descent (notree) 5.9206 155.0289 18.7767
1291
Tiny XML (exe) 1.2212
1295
These results show that XML::Bare is, at least on the
1296
test machine, running all tests within cygwin, faster
1297
at loading and parsing than everything being tested
1300
The following things are shown as well:
1301
- XML::Bare can parse XML and create a hash tree
1302
in less time than it takes LibXML just to parse.
1303
- XML::Bare can parse XML and create a tree
1304
in less time than all three binary parsers take
1307
Note that the executable parsers are not perl modules
1308
and are timed using dummy programs that just uses the
1309
library to load and parse the example files. The
1310
executables are not included with this program. Any
1311
source modifications used to generate the shown test
1312
results can be found in the bench/src directory of
1317
Copyright (C) 2008 David Helkowski
1319
This program is free software; you can redistribute it and/or
1320
modify it under the terms of the GNU General Public License as
1321
published by the Free Software Foundation; either version 2 of the
1322
License, or (at your option) any later version. You may also can
1323
redistribute it and/or modify it under the terms of the Perl
1326
This program is distributed in the hope that it will be useful,
1327
but WITHOUT ANY WARRANTY; without even the implied warranty of
1328
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1329
GNU General Public License for more details.
1330
____ ________ ________ _________
1331
/_ |\_____ \ \_____ \\______ \
1332
| | _(__ < _(__ < / /
1334
|___|/______ //______ / /____/