3
# ABSTRACT: Minimal XML parser implemented via a C state engine
9
use vars qw( @ISA @EXPORT @EXPORT_OK $VERSION );
13
@ISA = qw(Exporter DynaLoader);
15
our $VERSION = '0.47'; # VERSION
16
our $AUTHORITY = 'cpan:NIGELM'; # AUTHORITY
18
use vars qw($VERSION *AUTOLOAD);
20
*AUTOLOAD = \&XML::Bare::AUTOLOAD;
21
bootstrap XML::Bare $VERSION;
24
@EXPORT_OK = qw( xget merge clean add_node del_node find_node del_node forcearray del_by_perl xmlin xval );
30
if ( $self->{'text'} ) {
31
XML::Bare::c_parse( $self->{'text'} );
32
$self->{'structroot'} = XML::Bare::get_root();
35
my $res = open( my $XML, '<', $self->{'file'} );
42
$self->{'text'} = <$XML>;
45
XML::Bare::c_parse( $self->{'text'} );
46
$self->{'structroot'} = XML::Bare::get_root();
49
return $self if ( !wantarray );
50
return ( $self, $self->parse() );
61
return map $_->{'value'}, @{%$hash}{@_};
66
return [] if ( !$ref );
67
return $ref if ( ref($ref) eq 'ARRAY' );
73
# shift in the two array references as well as the field to merge on
74
my ( $a, $b, $id ) = @_;
75
my %hash = map { $_->{$id} ? ( $_->{$id}->{'value'} => $_ ) : ( 0 => 0 ) } @$a;
77
next if ( !$one->{$id} );
78
my $short = $hash{ $one->{$id}->{'value'} };
80
foreach my $key ( keys %$one ) {
81
next if ( $key eq '_pos' || $key eq 'id' );
82
my $cur = $short->{$key};
83
my $add = $one->{$key};
84
if ( !$cur ) { $short->{$key} = $add; }
87
if ( $type eq 'HASH' ) {
89
$short->{$key} = \@arr;
92
if ( ref($add) eq 'HASH' ) {
93
push( @{ $short->{$key} }, $add );
95
else { # we are merging an array
96
push( @{ $short->{$key} }, @$add );
100
# we need to deal with the case where this node
101
# is already there, either alone or as an array
108
my $ob = new XML::Bare(@_);
109
my $root = $ob->parse();
110
if ( $ob->{'save'} ) {
111
$ob->{'file'} = $ob->{'save'} if ( "$ob->{'save'}" ne "1" );
115
return $ob->xml($root);
121
my $ob = new XML::Bare( text => $text );
122
my $simple = $ob->simple();
123
if ( !$ops{'keeproot'} ) {
124
my @keys = keys %$simple;
125
my $first = $keys[0];
126
$simple = $simple->{$first} if ($first);
133
my $ob = new XML::Bare(%ops);
134
return $ob->html( $ob->parse(), $ops{'root'} || 'xml' );
137
# Load a file using XML::DOM, convert it to a hash, and return the hash
141
my $res = XML::Bare::xml2obj();
142
$self->{'structroot'} = XML::Bare::get_root();
145
if ( defined( $self->{'scheme'} ) ) {
146
$self->{'xbs'} = new XML::Bare( %{ $self->{'scheme'} } );
148
if ( defined( $self->{'xbs'} ) ) {
149
my $xbs = $self->{'xbs'};
150
my $ob = $xbs->parse();
151
$self->{'xbso'} = $ob;
155
if ( $res < 0 ) { croak "Error at " . $self->lineinfo( -$res ); }
156
$self->{'xml'} = $res;
158
if ( defined( $self->{'xbso'} ) ) {
159
my $ob = $self->{'xbso'};
160
my $cres = $self->check( $res, $ob );
161
croak($cres) if ($cres);
164
return $self->{'xml'};
172
for ( my $i = 0; $i < $res; $i++ ) {
173
my $let = substr( $self->{'text'}, $i, 1 );
174
if ( ord($let) == 10 ) {
179
my $part = substr( $self->{'text'}, $res, 10 );
182
if ( $self->{'offset'} ) {
183
my $off = $self->{'offset'};
185
return "$off line $line char $res \"$part\"";
187
return "line $line char $res \"$part\"";
192
my ( $self, $node, $scheme, $parent ) = @_;
195
if ( ref($scheme) eq 'ARRAY' ) {
196
for my $one (@$scheme) {
197
my $res = $self->checkone( $node, $one, $parent );
198
return 0 if ( !$res );
202
else { return $self->checkone( $node, $scheme, $parent ); }
207
my ( $self, $node, $scheme, $parent ) = @_;
209
for my $key ( keys %$node ) {
210
next if ( substr( $key, 0, 1 ) eq '_' || $key eq '_att' || $key eq 'comment' );
211
if ( $key eq 'value' ) {
212
my $val = $node->{'value'};
213
my $regexp = $scheme->{'value'};
215
if ( $val !~ m/^($regexp)$/ ) {
216
my $linfo = $self->lineinfo( $node->{'_i'} );
217
return "Value of '$parent' node ($val) does not match /$regexp/ [$linfo]";
222
my $sub = $node->{$key};
223
my $ssub = $scheme->{$key};
224
if ( !$ssub ) { #&& ref( $schemesub ) ne 'HASH'
225
my $linfo = $self->lineinfo( $sub->{'_i'} );
226
return "Invalid node '$key' in xml [$linfo]";
228
if ( ref($sub) eq 'HASH' ) {
229
my $res = $self->check( $sub, $ssub, $key );
230
return $res if ($res);
232
if ( ref($sub) eq 'ARRAY' ) {
234
if ( ref($asub) eq 'ARRAY' ) {
237
if ( $asub->{'_t'} ) {
238
my $max = $asub->{'_max'} || 0;
239
if ( $#$sub >= $max ) {
240
my $linfo = $self->lineinfo( $sub->[0]->{'_i'} );
241
return "Too many nodes of type '$key'; max $max; [$linfo]";
243
my $min = $asub->{'_min'} || 0;
244
if ( ( $#$sub + 1 ) < $min ) {
245
my $linfo = $self->lineinfo( $sub->[0]->{'_i'} );
246
return "Not enough nodes of type '$key'; min $min [$linfo]";
250
my $res = $self->check( $_, $ssub, $key );
251
return $res if ($res);
255
if ( my $dem = $scheme->{'_demand'} ) {
256
for my $req ( @{ $scheme->{'_demand'} } ) {
257
my $ck = $node->{$req};
259
my $linfo = $self->lineinfo( $node->{'_i'} );
260
return "Required node '$req' does not exist [$linfo]";
262
if ( ref($ck) eq 'ARRAY' ) {
263
my $linfo = $self->lineinfo( $node->{'_i'} );
264
return "Required node '$req' is empty array [$linfo]" if ( $#$ck == -1 );
271
sub readxbs { # xbs = xml bare schema
274
for my $key ( keys %$node ) {
275
next if ( substr( $key, 0, 1 ) eq '_' || $key eq '_att' || $key eq 'comment' );
276
if ( $key eq 'value' ) {
277
my $val = $node->{'value'};
278
delete $node->{'value'} if ( $val =~ m/^\W*$/ );
281
my $sub = $node->{$key};
283
if ( $key =~ m/([a-z_]+)([^a-z_]+)/ ) {
292
elsif ( $t eq '*' ) {
296
elsif ( $t eq '?' ) {
300
elsif ( $t eq '@' ) {
301
$name = 'multi_' . $name;
305
elsif ( $t =~ m/\{([0-9]+),([0-9]+)\}/ ) {
312
if ( ref($sub) eq 'HASH' ) {
313
$res = readxbs($sub);
315
$sub->{'_min'} = $min;
316
$sub->{'_max'} = $max;
318
if ( ref($sub) eq 'ARRAY' ) {
319
for my $item (@$sub) {
320
$res = readxbs($item);
322
$item->{'_min'} = $min;
323
$item->{'_max'} = $max;
327
push( @demand, $name ) if ($min);
328
$node->{$name} = $node->{$key};
329
delete $node->{$key};
332
if ( ref($sub) eq 'HASH' ) {
338
if ( ref($sub) eq 'ARRAY' ) {
339
for my $item (@$sub) {
347
push( @demand, $key );
350
if (@demand) { $node->{'_demand'} = \@demand; }
356
my $res = XML::Bare::xml2obj_simple();
357
$self->{'structroot'} = XML::Bare::get_root();
364
my ( $self, $node, $name ) = @_;
367
$node->{ 'multi_' . $name } = \%blank if ( !$node->{ 'multi_' . $name } );
368
$node->{$name} = \@newar if ( !$node->{$name} );
369
my $newnode = new_node( 0, splice( @_, 3 ) );
370
push( @{ $node->{$name} }, $newnode );
375
my ( $self, $node, $prev, $name ) = @_;
378
$node->{ 'multi_' . $name } = \%blank if ( !$node->{ 'multi_' . $name } );
379
$node->{$name} = \@newar if ( !$node->{$name} );
380
my $newnode = $self->new_node( splice( @_, 4 ) );
383
for my $anode ( @{ $node->{$name} } ) {
384
$anode->{'_pos'} = $cur if ( !$anode->{'_pos'} );
387
my $opos = $prev->{'_pos'};
388
for my $anode ( @{ $node->{$name} } ) {
389
$anode->{'_pos'}++ if ( $anode->{'_pos'} > $opos );
391
$newnode->{'_pos'} = $opos + 1;
393
push( @{ $node->{$name} }, $newnode );
401
$cond =~ s/-([a-z]+)/\$ob->\{'$1'\}->\{'value'\}/g;
404
foreach my $ob (@$arr) { push( @res, $ob ) if ( eval($cond) ); }
415
#croak "Cannot search empty node for $name" if( !$node );
416
#$node = $node->{ $name } or croak "Cannot find $name";
417
$node = $node->{$name} or return 0;
418
return 0 if ( !$node );
419
if ( ref($node) eq 'HASH' ) {
420
foreach my $key ( keys %match ) {
421
my $val = $match{$key};
423
if ( $node->{$key}->{'value'} eq $val ) {
428
if ( ref($node) eq 'ARRAY' ) {
429
for ( my $i = 0; $i <= $#$node; $i++ ) {
430
my $one = $node->[$i];
431
foreach my $key ( keys %match ) {
432
my $val = $match{$key};
433
croak('undefined value in find') unless defined $val;
434
if ( $one->{$key}->{'value'} eq $val ) {
448
$node = $node->{$name};
449
return if ( !$node );
450
for ( my $i = 0; $i <= $#$node; $i++ ) {
451
my $one = $node->[$i];
452
foreach my $key ( keys %match ) {
453
my $val = $match{$key};
454
if ( $one->{$key}->{'value'} eq $val ) {
464
$cond =~ s/-value/\$ob->\{'value'\}/g;
465
$cond =~ s/-([a-z]+)/\$ob->\{'$1'\}->\{'value'\}/g;
467
for ( my $i = 0; $i <= $#$arr; $i++ ) {
470
delete $arr->[$i] if ( eval($cond) );
476
# Created a node of XML hash with the passed in variables already set
482
foreach ( keys %parts ) {
483
my $val = $parts{$_};
484
if ( m/^_/ || ref($val) eq 'HASH' ) {
488
$newnode{$_} = { value => $val };
495
sub newhash { shift; return { value => shift }; }
501
foreach my $name ( keys %$root ) {
502
next if ( $name =~ m|^_| || $name eq 'comment' || $name eq 'value' );
503
my $val = xval $root->{$name};
510
return $_[0] ? $_[0]->{'value'} : ( $_[1] || '' );
513
# Save an XML hash tree into a file
516
return if ( !$self->{'xml'} );
518
my $xml = $self->xml( $self->{'xml'} );
527
open my $F, '>:encoding(UTF-8)', $self->{'file'};
531
my $cursize = tell($F);
532
if ( $cursize != $len ) { # concurrency; we are writing a smaller file
533
warn "Truncating File $self->{'file'}";
534
truncate( $F, $len );
538
if ( $cursize != $len ) { # still not the right size even after truncate??
539
die "Write problem; $cursize != $len";
545
my ( $self, $obj, $name ) = @_;
549
return obj2xml( \%hash, '', 0 );
553
return obj2xml( \%hash, '', 0 );
557
my ( $self, $obj, $name ) = @_;
559
if ( $self->{'style'} ) {
560
$pre = "<style type='text/css'>\@import '$self->{'style'}';</style>";
565
return $pre . obj2html( \%hash, '', 0 );
569
return $pre . obj2html( \%hash, '', 0 );
573
my ( $objs, $name, $pad, $level ) = @_;
574
$level = 0 if ( !$level );
575
$pad = '' if ( $level <= 2 );
579
return '' if ( !$objs );
581
#return $objs->{'_raw'} if( $objs->{'_raw'} );
583
my $oba = $objs->{$a};
584
my $obb = $objs->{$b};
587
$oba = $oba->[0] if ( ref($oba) eq 'ARRAY' );
588
$obb = $obb->[0] if ( ref($obb) eq 'ARRAY' );
589
if ( ref($oba) eq 'HASH' ) { $posa = $oba->{'_pos'} || 0; }
590
if ( ref($obb) eq 'HASH' ) { $posb = $obb->{'_pos'} || 0; }
591
return $posa <=> $posb;
594
my $obj = $objs->{$i} || '';
595
my $type = ref($obj);
596
if ( $type eq 'ARRAY' ) {
600
if ( !$a ) { return 0; }
601
if ( !$b ) { return 0; }
602
if ( ref($a) eq 'HASH' && ref($b) eq 'HASH' ) {
603
my $posa = $a->{'_pos'};
604
my $posb = $b->{'_pos'};
605
if ( !$posa ) { $posa = 0; }
606
if ( !$posb ) { $posb = 0; }
607
return $posa <=> $posb;
613
$xml .= obj2xml( $j, $i, $pad . ' ', $level + 1, $#dex );
616
elsif ( $type eq 'HASH' && $i !~ /^_/ ) {
617
if ( $obj->{'_att'} ) {
618
$att .= ' ' . $i . '="' . $obj->{'value'} . '"' if ( $i !~ /^_/ );
622
$xml .= obj2xml( $obj, $i, $pad . ' ', $level + 1, $#dex );
626
if ( $i eq 'comment' ) { $xml .= '<!--' . $obj . '-->' . "\n"; }
627
elsif ( $i eq 'value' ) {
628
if ( $level > 1 ) { # $#dex < 4 &&
629
if ( $obj && $obj =~ /[<>&;]/ ) { $xml .= '<![CDATA[' . $obj . ']]>'; }
630
else { $xml .= $obj if ( $obj =~ /\S/ ); }
633
elsif ( $i =~ /^_/ ) { }
634
else { $xml .= '<' . $i . '>' . $obj . '</' . $i . '>'; }
637
my $pad2 = $imm ? '' : $pad;
638
my $cr = $imm ? '' : "\n";
639
if ( substr( $name, 0, 1 ) ne '_' ) {
642
$xml = $pad . '<' . $name . $att . '>' . $cr . $xml . $pad2 . '</' . $name . '>';
645
$xml = $pad . '<' . $name . $att . ' />';
648
return $xml . "\n" if ( $level > 1 );
655
my ( $objs, $name, $pad, $level ) = @_;
657
my $less = "<span class='ang'><</span>";
658
my $more = "<span class='ang'>></span>";
659
my $tn0 = "<span class='tname'>";
661
my $eq0 = "<span class='eq'>";
663
my $qo0 = "<span class='qo'>";
665
my $sp0 = "<span class='sp'>";
670
$level = 0 if ( !$level );
671
$pad = '' if ( $level == 1 );
675
return '' if ( !$objs );
677
my $oba = $objs->{$a};
678
my $obb = $objs->{$b};
681
$oba = $oba->[0] if ( ref($oba) eq 'ARRAY' );
682
$obb = $obb->[0] if ( ref($obb) eq 'ARRAY' );
683
if ( ref($oba) eq 'HASH' ) { $posa = $oba->{'_pos'} || 0; }
684
if ( ref($obb) eq 'HASH' ) { $posb = $obb->{'_pos'} || 0; }
685
return $posa <=> $posb;
688
if ( $objs->{'_cdata'} ) {
689
my $val = $objs->{'value'};
690
$val =~ s/^(\s*\n)+//;
694
$objs->{'value'} = $val;
696
#$xml = "$less![CDATA[<div class='node'><div class='cdata'>$val</div></div>]]$more";
697
$cd0 = "$less![CDATA[<div class='node'><div class='cdata'>";
698
$cd1 = "</div></div>]]$more";
701
my $obj = $objs->{$i} || '';
702
my $type = ref($obj);
703
if ( $type eq 'ARRAY' ) {
707
if ( !$a ) { return 0; }
708
if ( !$b ) { return 0; }
709
if ( ref($a) eq 'HASH' && ref($b) eq 'HASH' ) {
710
my $posa = $a->{'_pos'};
711
my $posb = $b->{'_pos'};
712
if ( !$posa ) { $posa = 0; }
713
if ( !$posb ) { $posb = 0; }
714
return $posa <=> $posb;
719
for my $j (@dex2) { $xml .= obj2html( $j, $i, $pad . ' ', $level + 1, $#dex ); }
721
elsif ( $type eq 'HASH' && $i !~ /^_/ ) {
722
if ( $obj->{'_att'} ) {
723
my $val = $obj->{'value'};
726
$att .= " <span class='aname'>$i</span>" if ( $i !~ /^_/ );
729
$att .= " <span class='aname'>$i</span>$eq0=$eq1$qo0\"$qo1$val$qo0\"$qo1" if ( $i !~ /^_/ );
734
$xml .= obj2html( $obj, $i, $pad . ' ', $level + 1, $#dex );
738
if ( $i eq 'comment' ) { $xml .= "$less!--" . $obj . "--$more" . "<br>\n"; }
739
elsif ( $i eq 'value' ) {
741
if ( $obj && $obj =~ /[<>&;]/ && !$objs->{'_cdata'} ) { $xml .= "$less![CDATA[$obj]]$more"; }
742
else { $xml .= $obj if ( $obj =~ /\S/ ); }
745
elsif ( $i =~ /^_/ ) { }
746
else { $xml .= "$less$tn0$i$tn1$more$obj$less/$tn0$i$tn1$more"; }
749
if ( substr( $name, 0, 1 ) ne '_' ) {
752
if ( $xml =~ /\S/ ) {
753
$xml = "$sp0$pad$sp1$less$tn0$name$tn1$att$more$cd0$xml$cd1$less/$tn0$name$tn1$more";
756
$xml = "$sp0$pad$sp1$less$tn0$name$tn1$att/$more";
760
if ( $xml =~ /\S/ ) {
762
"$sp0$pad$sp1$less$tn0$name$tn1$att$more<div class='node'>$xml</div>$sp0$pad$sp1$less/$tn0$name$tn1$more";
764
else { $xml = "$sp0$pad$sp1$less$tn0$name$tn1$att/$more"; }
767
$xml .= "<br>" if ( $objs->{'_br'} );
768
if ( $objs->{'_note'} ) {
770
my $note = $objs->{'_note'}{'value'};
771
my @notes = split( /\|/, $note );
774
.= "<div class='note'>$sp0$pad$sp1<span class='com'><!--</span> $_ <span class='com'>--></span></div>";
777
return $xml . "<br>\n" if ($level);
785
if ( $self->{'structroot'} ) {
786
XML::Bare::free_tree_c( $self->{'structroot'} );
787
delete( $self->{'structroot'} );
797
=for stopwords CDATA GDSL LibXML Sergey Skvortsov XBS dequoting exe
798
executables html iff keeproot makebench nodeset notree recognised
799
subnode templated tmpl xml xmlin
803
XML::Bare - Minimal XML parser implemented via a C state engine
813
my $ob = new XML::Bare( text => '<xml><name>Bob</name></xml>' );
815
# Parse the xml into a hash tree
816
my $root = $ob->parse();
818
# Print the content of the name node
819
print $root->{xml}->{name}->{value};
821
# --------------------------------------------------------------
823
# Load xml from a file ( assume same contents as first example )
824
my $ob2 = new XML::Bare( file => 'test.xml' );
826
my $root2 = $ob2->parse();
828
$root2->{xml}->{name}->{value} = 'Tim';
830
# Save the changes back to the file
833
# --------------------------------------------------------------
835
# Load xml and verify against XBS ( XML Bare Schema )
836
my $xml_text = '<xml><item name=bob/></xml>';
837
my $schema_text = '<xml><item* name=[a-z]+></item*></xml>';
838
my $ob3 = new XML::Bare( text => $xml_text, schema => { text => $schema_text } );
839
$ob3->parse(); # this will error out if schema is invalid
843
This module is a 'Bare' XML parser. It is implemented in C. The parser
844
itself is a simple state engine that is less than 500 lines of C. The
845
parser builds a C struct tree from input text. That C struct tree is
846
converted to a Perl hash by a Perl function that makes basic calls back
847
to the C to go through the nodes sequentially.
849
The parser itself will only cease parsing if it encounters tags that
850
are not closed properly. All other inputs will parse, even invalid
851
inputs. To allowing checking for validity, a schema checker is included
852
in the module as well.
854
The schema format is custom and is meant to be as simple as possible.
855
It is based loosely around the way multiplicity is handled in Perl
860
To demonstrate what sort of XML is supported, consider the following
861
examples. Each of the PERL statements evaluates to true.
865
=item * Node containing just text
868
PERL: $root->{xml}->{value} eq "blah";
872
XML: <xml><name>Bob</name></xml>
873
PERL: $root->{xml}->{name}->{value} eq "Bob";
875
=item * Attributes unquoted
877
XML: <xml><a href=index.htm>Link</a></xml>
878
PERL: $root->{xml}->{a}->{href}->{value} eq "index.htm";
880
=item * Attributes quoted
882
XML: <xml><a href="index.htm">Link</a></xml>
883
PERL: $root->{xml}->{a}->{href}->{value} eq "index.htm";
887
XML: <xml><raw><![CDATA[some raw $~<!bad xml<>]]></raw></xml>
888
PERL: $root->{xml}->{raw}->{value} eq "some raw \$~<!bad xml<>";
890
=item * Multiple nodes; form array
892
XML: <xml><item>1</item><item>2</item></xml>
893
PERL: $root->{xml}->{item}->[0]->{value} eq "1";
895
=item * Forcing array creation
897
XML: <xml><multi_item/><item>1</item></xml>
898
PERL: $root->{xml}->{item}->[0]->{value} eq "1";
900
=item * One comment supported per node
902
XML: <xml><!--test--></xml>
903
PERL: $root->{xml}->{comment} eq 'test';
907
=head2 Schema Checking
909
Schema checking is done by providing the module with an XBS (XML::Bare Schema) to check
910
the XML against. If the XML checks as valid against the schema, parsing will continue as
911
normal. If the XML is invalid, the parse function will die, providing information about
914
The following information is provided in the error message:
918
=item * The type of error
920
=item * Where the error occurred ( line and char )
922
=item * A short snippet of the XML at the point of failure
926
=head2 XBS ( XML::Bare Schema ) Format
930
=item * Required nodes
935
=item * Optional nodes - allow one
939
or XBS: <xml><item?/></xml>
941
=item * Optional nodes - allow 0 or more
943
XML: <xml><item/></xml>
946
=item * Required nodes - allow 1 or more
948
XML: <xml><item/><item/></xml>
951
=item * Nodes - specified minimum and maximum number
953
XML: <xml><item/><item/></xml>
954
XBS: <xml item{1,2}/>
955
or XBS: <xml><item{1,2}/></xml>
956
or XBS: <xml><item{1,2}></item{1,2}></xml>
958
=item * Multiple acceptable node formats
960
XML: <xml><item type=box volume=20/><item type=line length=10/></xml>
961
XBS: <xml><item type=box volume/><item type=line length/></xml>
963
=item * Regular expressions checking for values
965
XML: <xml name=Bob dir=up num=10/>
966
XBS: <xml name=[A-Za-z]+ dir=up|down num=[0-9]+/>
968
=item * Require multi_ tags
970
XML: <xml><multi_item/></xml>
975
=head2 Parsed Hash Structure
977
The hash structure returned from XML parsing is created in a specific format.
978
Besides as described above, the structure contains some additional nodes in
979
order to preserve information that will allow that structure to be correctly
980
converted back to XML.
982
Nodes may contain the following 3 additional subnodes:
988
The character offset within the original parsed XML of where the node
989
begins. This is used to provide line information for errors when XML
990
fails a schema check.
994
This is a number indicating the ordering of nodes. It is used to allow
995
items in a perl hash to be sorted when writing back to xml. Note that
996
items are not sorted after parsing in order to save time if all you
997
are doing is reading and you do not care about the order.
999
In future versions of this module an option will be added to allow
1000
you to sort your nodes so that you can read them in order.
1001
( note that multiple nodes of the same name are stored in order )
1005
This is a boolean value that exists and is 1 iff the node is an
1010
=head2 Parsing Limitations / Features
1014
=item * CDATA parsed correctly, but stripped if unneeded
1016
Currently the contents of a node that are CDATA are read and
1017
put into the value hash, but the hash structure does not have
1018
a value indicating the node contains CDATA.
1020
When converting back to XML, the contents of the value hash
1021
are parsed to check for xml incompatible data using a regular
1022
expression. If 'CDATA like' stuff is encountered, the node
1025
=item * Standard XML quoted characters are decoded
1027
The basic XML quoted characters - C<&> C<>> C<<> C<quot;>
1028
and C<'> - are recognised and decoded when reading values.
1029
However when writing the builder will put any values that need quoting
1030
into a CDATA wrapper as described above.
1032
=item * Node position stored, but hash remains unsorted
1034
The ordering of nodes is noted using the '_pos' value, but
1035
the hash itself is not ordered after parsing. Currently
1036
items will be out of order when looking at them in the
1039
Note that when converted back to XML, the nodes are then
1040
sorted and output in the correct order to XML. Note that
1041
nodes of the same name with the same parent will be
1042
grouped together; the position of the first item to
1043
appear will determine the output position of the group.
1045
=item * Comments are parsed but only one is stored per node.
1047
For each node, there can be a comment within it, and that
1048
comment will be saved and output back when dumping to XML.
1050
=item * Comments override output of immediate value
1052
If a node contains only a comment node and a text value,
1053
only the comment node will be displayed. This is in line
1054
with treating a comment node as a node and only displaying
1055
immediate values when a node contains no subnodes.
1057
=item * PI sections are parsed, but discarded
1059
=item * Unknown C<< <! >> sections are parsed, but discarded
1061
=item * Attributes may use no quotes, single quotes, quotes
1063
=item * Quoted attributes cannot contain escaped quotes
1065
No escape character is recognized within quotes. As a result,
1066
regular quotes cannot be stored to XML, or the written XML
1067
will not be correct, due to all attributes always being written
1070
=item * Attributes are always written back to XML with quotes
1072
=item * Nodes cannot contain subnodes as well as an immediate value
1074
Actually nodes can in fact contain a value as well, but that
1075
value will be discarded if you write back to XML. That value is
1076
equal to the first continuous string of text besides a subnode.
1078
<node>text<subnode/>text2</node>
1079
( the value of node is text )
1081
<node><subnode/>text</node>
1082
( the value of node is text )
1087
( the value of node is "\n " )
1091
=head2 Module Functions
1095
=item * C<< $ob = new XML::Bare( text => "[some xml]" ) >>
1097
Create a new XML object, with the given text as the xml source.
1099
=item * C<< $object = new XML::Bare( file => "[filename]" ) >>
1101
Create a new XML object, with the given filename/path as the xml source
1103
=item * C<< $object = new XML::Bare( text => "[some xml]", file => "[filename]" ) >>
1105
Create a new XML object, with the given text as the xml input, and the given
1106
filename/path as the potential output ( used by save() )
1108
=item * C<< $object = new XML::Bare( file => "data.xml", scheme => { file => "scheme.xbs" } ) >>
1110
Create a new XML object and check to ensure it is valid xml by way of the XBS scheme.
1112
=item * C<< $tree = $object->parse() >>
1114
Parse the xml of the object and return a tree reference
1116
=item * C<< $tree = $object->simple() >>
1118
Alternate to the parse function which generates a tree similar to that
1119
generated by XML::Simple. Note that the sets of nodes are turned into
1120
arrays always, regardless of whether they have a 'name' attribute, unlike
1123
Note that currently the generated tree cannot be used with any of the
1124
functions in this module that operate upon trees. The function is provided
1125
purely as a quick and dirty way to read simple XML files.
1127
=item * C<< $tree = xmlin( $xmlext, keeproot => 1 ) >>
1129
The xmlin function is a shortcut to creating an XML::Bare object and
1130
parsing it using the simple function. It behaves similarly to the
1131
XML::Simple function by the same name. The keeproot option is optional
1132
and if left out the root node will be discarded, same as the function
1135
=item * C<< $text = $object->xml( [root] ) >>
1137
Take the hash tree in [root] and turn it into cleanly indented ( 2 spaces )
1140
=item * C<< $text = $object->html( [root], [root node name] ) >>
1142
Take the hash tree in [root] and turn it into nicely colorized and styled
1143
html. [root node name] is optional.
1145
=item * C<< $object->save() >>
1147
The the current tree in the object, cleanly indent it, and save it
1148
to the file parameter specified when creating the object.
1150
=item * C<< $value = xval $node, $default >>
1152
Returns the value of $node or $default if the node does not exist.
1153
If default is not passed to the function, then '' is returned as
1154
a default value when the node does not exist.
1156
=item * C<< ( $name, $age ) = xget( $personnode, qw/name age/ ) >>
1158
Shortcut function to grab a number of values from a node all at the
1159
same time. Note that this function assumes that all of the subnodes
1160
exist; it will fail if they do not.
1162
=item * C<< $text = XML::Bare::clean( text => "[some xml]" ) >>
1164
Shortcut to creating an xml object and immediately turning it into clean xml text.
1166
=item * C<< $text = XML::Bare::clean( file => "[filename]" ) >>
1168
Similar to previous.
1170
=item * C<< XML::Bare::clean( file => "[filename]", save => 1 ) >>
1172
Clean up the xml in the file, saving the results back to the file
1174
=item * C<< XML::Bare::clean( text => "[some xml]", save => "[filename]" ) >>
1176
Clean up the xml provided, and save it into the specified file.
1178
=item * C<< XML::Bare::clean( file => "[filename1]", save => "[filename2]" ) >>
1180
Clean up the xml in filename1 and save the results to filename2.
1182
=item * C<< $html = XML::Bare::tohtml( text => "[some xml]", root => 'xml' ) >>
1184
Shortcut to creating an xml object and immediately turning it into html.
1185
Root is optional, and specifies the name of the root node for the xml
1186
( which defaults to 'xml' )
1188
=item * C<< $object->add_node( [node], [nodeset name], name => value, name2 => value2, ... ) >>
1191
$object->add_node( $root->{xml}, 'item', name => 'Bob' );
1200
=item * C<< $object->add_node_after( [node], [subnode within node to add after], [nodeset name], ... ) >>
1202
=item * C<< $object->del_node( [node], [nodeset name], name => value ) >>
1216
$xml->del_node( $root->{xml}, 'a', b=>'1' );
1225
=item * C<< $object->find_node( [node], [nodeset name], name => value ) >>
1241
$object->find_node( $root->{xml}, 'ob', key => '1' )->{val}->{value} = 'test';
1255
=item * C<< $object->find_by_perl( [nodeset], "[perl code]" ) >>
1257
find_by_perl evaluates some perl code for each node in a set of nodes, and
1258
returns the nodes where the perl code evaluates as true. In order to
1259
easily reference node values, node values can be directly referred
1260
to from within the perl code by the name of the node with a dash(-) in
1261
front of the name. See the example below.
1263
Note that this function returns an array reference as opposed to a single
1264
node unlike the find_node function.
1280
$object->find_by_perl( $root->{xml}->{ob}, "-key eq '1'" )->[0]->{val}->{value} = 'test';
1294
=item * C<< XML::Bare::merge( [nodeset1], [nodeset2], [id node name] ) >>
1296
Merges the nodes from nodeset2 into nodeset1, matching the contents of
1297
each node based up the content in the id node.
1302
my $ob1 = new XML::Bare( text => "
1311
my $ob2 = new XML::Bare( text => "
1321
my $root1 = $ob1->parse();
1322
my $root2 = $ob2->parse();
1323
merge( $root1->{'xml'}->{'a'}, $root2->{'xml'}->{'a'}, 'id' );
1324
print $ob1->xml( $root1 );
1338
=item * C<< XML::Bare::del_by_perl( ... ) >>
1340
Works exactly like find_by_perl, but deletes whatever matches.
1342
=item * C<< XML::Bare::forcearray( [noderef] ) >>
1344
Turns the node reference into an array reference, whether that
1345
node is just a single node, or is already an array reference.
1347
=item * C<< XML::Bare::new_node( ... ) >>
1349
Creates a new node...
1351
=item * C<< XML::Bare::newhash( ... ) >>
1353
Creates a new hash with the specified value.
1355
=item * C<< XML::Bare::simplify( [noderef] ) >>
1357
Take a node with children that have immediate values and
1358
creates a hashref to reference those values by the name of
1363
=head2 Functions Used Internally
1367
=item * C<< check() checkone() readxbs() free_tree_c() >>
1369
=item * C<< lineinfo() c_parse() c_parsefile() free_tree() xml2obj() >>
1371
=item * C<< obj2xml() get_root() obj2html() xml2obj_simple() >>
1377
In comparison to other available perl xml parsers that create trees, XML::Bare
1378
is extremely fast. In order to measure the performance of loading and parsing
1379
compared to the alternatives, a templated speed comparison mechanism has been
1380
created and included with XML::Bare.
1382
The include makebench.pl file runs when you make the module and creates perl
1383
files within the bench directory corresponding to the .tmpl contained there.
1385
Currently there are three types of modules that can be tested against,
1386
executable parsers ( exe.tmpl ), tree parsers ( tree.tmpl ), and parsers
1387
that do not generated trees ( notree.tmpl ).
1389
A full list of modules currently tested against is as follows:
1394
XML::LibXML (notree)
1395
XML::Parser (notree)
1396
XML::Parser::Expat (notree)
1397
XML::Descent (notree)
1398
XML::Parser::EasyTree
1402
XML::Simple using XML::Parser
1403
XML::Simple using XML::SAX::PurePerl
1404
XML::Simple using XML::LibXML::SAX::Parser
1405
XML::Simple using XML::Bare::SAX::Parser
1410
XML::XPath::XMLParser
1413
To run the comparisons, run the appropriate perl file within the
1414
bench directory. ( exe.pl, tree.pl, or notree.pl )
1416
The script measures the milliseconds of loading and parsing, and
1417
compares the time against the time of XML::Bare. So a 7 means
1418
it takes 7 times as long as XML::Bare.
1420
Here is a combined table of the script run against each alternative
1421
using the included test.xml:
1423
-Module- load parse total
1425
XML::TreePP 2.3063 33.1776 6.1598
1426
XML::Parser::EasyTree 4.9405 25.7278 7.4571
1427
XML::Handler::Trees 7.2303 26.5688 9.6447
1428
XML::Trivial 5.0636 12.4715 7.3046
1429
XML::Smart 6.8138 78.7939 15.8296
1430
XML::Simple (XML::Parser) 2.3346 50.4772 10.7455
1431
XML::Simple (PurePerl) 2.361 261.4571 33.6524
1432
XML::Simple (LibXML) 2.3187 163.7501 23.1816
1433
XML::Simple (XML::Bare) 2.3252 59.1254 10.9163
1434
XML::SAX::Simple 8.7792 170.7313 28.3634
1435
XML::Twig 27.8266 56.4476 31.3594
1436
XML::Grove::Builder 7.1267 26.1672 9.4064
1437
XML::XPath::XMLParser 9.7783 35.5486 13.0002
1438
XML::LibXML (notree) 11.0038 4.5758 10.6881
1439
XML::Parser (notree) 4.4698 17.6448 5.8609
1440
XML::Parser::Expat(notree) 3.7681 50.0382 6.0069
1441
XML::Descent (notree) 6.0525 37.0265 11.0322
1442
Tiny XML (exe) 1.0095
1446
Here is a combined table of the script run against each alternative
1447
using the included feed2.xml:
1449
-Module- load parse total
1451
XML::TreePP 2.3068 23.7554 7.6921
1452
XML::Parser::EasyTree 4.8799 25.3691 9.6257
1453
XML::Handler::Trees 6.8545 33.1007 13.0575
1454
XML::Trivial 5.0105 32.0043 11.4113
1455
XML::Simple (XML::Parser) 2.3498 41.9007 12.3062
1456
XML::Simple (PurePerl) 2.3551 224.3027 51.7832
1457
XML::Simple (LibXML) 2.3617 88.8741 23.215
1458
XML::Simple (XML::Bare) 2.4319 37.7355 10.2343
1459
XML::Simple 2.7168 90.7203 26.7525
1460
XML::SAX::Simple 8.7386 94.8276 29.2166
1461
XML::Twig 28.3206 48.1014 33.1222
1462
XML::Grove::Builder 7.2021 30.7926 12.9334
1463
XML::XPath::XMLParser 9.6869 43.5032 17.4941
1464
XML::LibXML (notree) 11.0023 5.022 10.5214
1465
XML::Parser (notree) 4.3748 25.0213 5.9803
1466
XML::Parser::Expat(notree) 3.6555 51.6426 7.4316
1467
XML::Descent (notree) 5.9206 155.0289 18.7767
1468
Tiny XML (exe) 1.2212
1472
These results show that XML::Bare is, at least on the
1473
test machine, running all tests within cygwin, faster
1474
at loading and parsing than everything being tested
1477
The following things are shown as well:
1478
- XML::Bare can parse XML and create a hash tree
1479
in less time than it takes LibXML just to parse.
1480
- XML::Bare can parse XML and create a tree
1481
in less time than all three binary parsers take
1484
Note that the executable parsers are not perl modules
1485
and are timed using dummy programs that just uses the
1486
library to load and parse the example files. The
1487
executables are not included with this program. Any
1488
source modifications used to generate the shown test
1489
results can be found in the bench/src directory of
1492
=head1 CONTRIBUTED CODE
1494
The XML dequoting code used is taken from L<XML::Quote> by I<Sergey
1495
Skvortsov> (I<GDSL> on CPAN) with very minor modifications.
1499
See perlmodinstall for information and options on installing Perl modules.
1501
=head1 BUGS AND LIMITATIONS
1503
No bugs have been reported.
1505
Please report any bugs or feature requests through the web interface at
1506
L<http://rt.cpan.org/Public/Dist/Display.html?Name=XML-Bare>.
1510
The project homepage is L<https://metacpan.org/release/XML-Bare>.
1512
The latest version of this module is available from the Comprehensive Perl
1513
Archive Network (CPAN). Visit L<http://www.perl.com/CPAN/> to find a CPAN
1514
site near you, or see L<http://search.cpan.org/dist/XML-Bare/>.
1516
The development version lives at L<http://github.com/nigelm/xml-bare>
1517
and may be cloned from L<git://github.com/nigelm/xml-bare.git>.
1518
Instead of sending patches, please fork this project using the standard
1519
git and github infrastructure.
1527
David Helkowski <cpan@codechild.com>
1531
Nigel Metheringham <nigelm@cpan.org>
1535
=head1 COPYRIGHT AND LICENSE
1537
This software is Copyright (c) 2012 by David Helkowski.
1539
This is free software, licensed under:
1541
The GNU General Public License, Version 2, June 1991