~ubuntu-branches/ubuntu/utopic/libxml-bare-perl/utopic-proposed

Viewing changes to Bare.pm

Committer: Package Import Robot
Author(s): Nuno Carvalho, gregor herrmann, Salvatore Bonaccorso, Axel Beckert, Nuno Carvalho
Date: 2013-09-17 15:54:28 UTC
mfrom: (1.1.4)
Revision ID: package-import@ubuntu.com-20130917155428-4d0xb5cissw2323f

Tags: 0.53-1

* Team upload.

[ gregor herrmann ]
* debian/control: update {versioned,alternative} (build) dependencies.

[ Salvatore Bonaccorso ]
* Change Vcs-Git to canonical URI (git://anonscm.debian.org)
* Change search.cpan.org based URIs to metacpan.org based URIs

[ Axel Beckert ]
* debian/copyright: migrate pre-1.0 format to 1.0 using "cme fix dpkg-
  copyright"

[ Nuno Carvalho ]
* New upstream release.
* debian/copyright: update copyright years.
* debian/control: update standards version.
* debian/control: update debhelper required version, in order to pass all
  the hardening flags to EUMM.
* Add lintian override to apparently false-positive warning.
* Add set of patches accepted upstream but still not included in this
  release, visit https://rt.cpan.org/Public/Bug/Display.html?id=88155
  for details.

files added:
.pc

.pc/.quilt_patches

.pc/.quilt_series

.pc/.version

.pc/applied-patches

.pc/implicit_declaration_of_free.patch

.pc/implicit_declaration_of_free.patch/parser.c

.pc/libm.patch

.pc/libm.patch/Makefile.PL

.pc/pointer_from_integer.patch

.pc/pointer_from_integer.patch/Bare.xs

.pc/pointer_from_integer.patch/parser.c

Bare.pm

debian/libxml-bare-perl.lintian-overrides

debian/patches

debian/patches/implicit_declaration_of_free.patch

debian/patches/libm.patch

debian/patches/pointer_from_integer.patch

debian/patches/series

t/Bug_node_named_value.t

t/Bug_simple_empty_is_one.t

t/Bug_simple_with_cdata.t

t/Func_find_by_perl.t

t/Func_find_node.t

t/Multithreading.t

t/Pod.t

t/Pod_Coverage.t

files removed:
INSTALL

LICENSE

MANIFEST.skip

bench/exe.pl

bench/notree.pl

bench/oneexe.pl

bench/onenotree.pl

bench/onetree.pl

bench/tree.pl

inc/XMLBareMakeMaker.pm

lib/XML

lib/XML/Bare.pm

t/00-compile.t

t/000-report-versions.t

t/author-critic.t

t/escaped_xml.t

t/memory_leak.t

t/release-dist-manifest.t

t/release-distmeta.t

t/release-eol.t

t/release-has-version.t

t/release-kwalitee.t

t/release-minimum-version.t

t/release-no-tabs.t

t/release-pod-coverage.t

t/release-pod-spell.t

t/release-pod-syntax.t

t/release-portability.t

t/release-synopsis.t

t/release-unused-vars.t

files modified:
Bare.xs *

Changes *

MANIFEST

META.json

META.yml

Makefile.PL *

README

bench/feed2.xml

bench/large.xml

bench/license.txt

bench/src/barexml.c *

bench/src/ezxml_main.c *

bench/src/tinyxml.cpp *

bench/src/xmlio_testread.cpp *

bench/test.xml

bench/tree.tmpl

debian/changelog

debian/control

debian/copyright

debian/watch

makebench.PL

parser.c *

parser.h *

t/Basic.t

t/UTF8_Attributes.t *

t/UTF8_Values.t *

t/test.xml *

Show diffs side-by-side

added added

removed removed

Bare.pm

#!/usr/bin/perl -w

package XML::Bare;

use Carp;

use strict;

use vars qw( @ISA @EXPORT @EXPORT_OK $VERSION );

use utf8;

require Exporter;

require DynaLoader;

@ISA = qw(Exporter DynaLoader);

$VERSION = "0.53";

use vars qw($VERSION *AUTOLOAD);

*AUTOLOAD = \&XML::Bare::AUTOLOAD;

bootstrap XML::Bare $VERSION;

@EXPORT = qw( );

@EXPORT_OK = qw( xget merge clean add_node del_node find_node del_node forcearray del_by_perl xmlin xval );

=head1 NAME

XML::Bare - Minimal XML parser implemented via a C state engine

=head1 VERSION

0.53

=cut

sub new {

my $class = shift;

my $self = { @_ };

$self->{'i'} = 0;

if( $self->{ 'text' } ) {

if( $self->{'unsafe'} ) {

$self->{'parser'} = XML::Bare::c_parse_unsafely( $self->{'text'} );

}

else {

$self->{'parser'} = XML::Bare::c_parse( $self->{'text'} );

}

else {

my $res = open( my $XML, $self->{ 'file' } );

if( !$res ) {

$self->{ 'xml' } = 0;

return 0;

}

{

local $/ = undef;

$self->{'text'} = <$XML>;

}

close( $XML );

$self->{'parser'} = XML::Bare::c_parse( $self->{'text'} );

}

bless $self, "XML::Bare::Object";

return $self if( !wantarray );

return ( $self, ( $self->{'simple'} ? $self->simple() : $self->parse() ) );

}

sub simple {

return new( @_, simple => 1 );

}

package XML::Bare::Object;

use Carp;

use strict;

# Stubs ( to allow these functions to be used via an object as well, not just via import or namespace )

sub find_by_perl { shift; return XML::Bare::find_by_perl( @_ ); }

sub find_node { shift; return XML::Bare::find_node( @_ ); }

sub DESTROY {

my $self = shift;

use Data::Dumper;

#print Dumper( $self );

undef $self->{'text'};

undef $self->{'i'};

$self->free_tree();

undef $self->{'parser'};

}

sub read_more {

my $self = shift;

my %p = ( @_ );

my $i = $self->{'i'}++;

if( $p{'text'} ) {

$self->{"text$i"} = $p{'text'};

XML::Bare::c_parse_more( $self->{"text$i"}, $self->{'parser'} );

}

sub parse {

my $self = shift;

my $res = XML::Bare::xml2obj( $self->{'parser'} );

if( defined( $self->{'scheme'} ) ) {

100

$self->{'xbs'} = new XML::Bare( %{ $self->{'scheme'} } );

101

}

102

if( defined( $self->{'xbs'} ) ) {

103

my $xbs = $self->{'xbs'};

104

my $ob = $xbs->parse();

105

$self->{'xbso'} = $ob;

106

readxbs( $ob );

107

}

108

109

if( !ref( $res ) && $res < 0 ) { croak "Error at ".$self->lineinfo( -$res ); }

110

$self->{ 'xml' } = $res;

111

112

if( defined( $self->{'xbso'} ) ) {

113

my $ob = $self->{'xbso'};

114

my $cres = $self->check( $res, $ob );

115

croak( $cres ) if( $cres );

116

}

117

118

return $self->{ 'xml' };

119

}

120

121

# xml bare schema

122

sub check {

123

my ( $self, $node, $scheme, $parent ) = @_;

124

125

my $fail = '';

126

if( ref( $scheme ) eq 'ARRAY' ) {

127

for my $one ( @$scheme ) {

128

my $res = $self->checkone( $node, $one, $parent );

129

return 0 if( !$res );

130

$fail .= "$res\n";

131

}

132

}

133

else { return $self->checkone( $node, $scheme, $parent ); }

134

return $fail;

135

}

136

137

sub checkone {

138

my ( $self, $node, $scheme, $parent ) = @_;

139

140

for my $key ( keys %$node ) {

141

next if( substr( $key, 0, 1 ) eq '_' || $key eq '_att' || $key eq 'comment' );

142

if( $key eq 'value' ) {

143

my $val = $node->{ 'value' };

144

my $regexp = $scheme->{'value'};

145

if( $regexp ) {

146

if( $val !~ m/^($regexp)$/ ) {

147

my $linfo = $self->lineinfo( $node->{'_i'} );

148

return "Value of '$parent' node ($val) does not match /$regexp/ [$linfo]";

149

}

150

}

151

next;

152

}

153

my $sub = $node->{ $key };

154

my $ssub = $scheme->{ $key };

155

if( !$ssub ) { #&& ref( $schemesub ) ne 'HASH'

156

my $linfo = $self->lineinfo( $sub->{'_i'} );

157

return "Invalid node '$key' in xml [$linfo]";

158

}

159

if( ref( $sub ) eq 'HASH' ) {

160

my $res = $self->check( $sub, $ssub, $key );

161

return $res if( $res );

162

}

163

if( ref( $sub ) eq 'ARRAY' ) {

164

my $asub = $ssub;

165

if( ref( $asub ) eq 'ARRAY' ) {

166

$asub = $asub->[0];

167

}

168

if( $asub->{'_t'} ) {

169

my $max = $asub->{'_max'} || 0;

170

if( $#$sub >= $max ) {

171

my $linfo = $self->lineinfo( $sub->[0]->{'_i'} );

172

return "Too many nodes of type '$key'; max $max; [$linfo]"

173

}

174

my $min = $asub->{'_min'} || 0;

175

if( ($#$sub+1)<$min ) {

176

my $linfo = $self->lineinfo( $sub->[0]->{'_i'} );

177

return "Not enough nodes of type '$key'; min $min [$linfo]"

178

}

179

}

180

for( @$sub ) {

181

my $res = $self->check( $_, $ssub, $key );

182

return $res if( $res );

183

}

184

}

185

}

186

if( my $dem = $scheme->{'_demand'} ) {

187

for my $req ( @{$scheme->{'_demand'}} ) {

188

my $ck = $node->{ $req };

189

if( !$ck ) {

190

my $linfo = $self->lineinfo( $node->{'_i'} );

191

return "Required node '$req' does not exist [$linfo]"

192

}

193

if( ref( $ck ) eq 'ARRAY' ) {

194

my $linfo = $self->lineinfo( $node->{'_i'} );

195

return "Required node '$req' is empty array [$linfo]" if( $#$ck == -1 );

196

}

197

}

198

}

199

return 0;

200

}

201

202

sub simple {

203

my $self = shift;

204

205

my $res = XML::Bare::xml2obj_simple( $self->{'parser'} );#$self->xml2obj();

206

207

if( !ref( $res ) && $res < 0 ) { croak "Error at ".$self->lineinfo( -$res ); }

208

$self->{ 'xml' } = $res;

209

210

return $res;

211

}

212

213

sub add_node {

214

my ( $self, $node, $name ) = @_;

215

my @newar;

216

my %blank;

217

$node->{ 'multi_'.$name } = \%blank if( ! $node->{ 'multi_'.$name } );

218

$node->{ $name } = \@newar if( ! $node->{ $name } );

219

my $newnode = new_node( 0, splice( @_, 3 ) );

220

push( @{ $node->{ $name } }, $newnode );

221

return $newnode;

222

}

223

224

sub add_node_after {

225

my ( $self, $node, $prev, $name ) = @_;

226

my @newar;

227

my %blank;

228

$node->{ 'multi_'.$name } = \%blank if( ! $node->{ 'multi_'.$name } );

229

$node->{ $name } = \@newar if( ! $node->{ $name } );

230

my $newnode = $self->new_node( splice( @_, 4 ) );

231

232

my $cur = 0;

233

for my $anode ( @{ $node->{ $name } } ) {

234

$anode->{'_pos'} = $cur if( !$anode->{'_pos'} );

235

$cur++;

236

}

237

my $opos = $prev->{'_pos'};

238

for my $anode ( @{ $node->{ $name } } ) {

239

$anode->{'_pos'}++ if( $anode->{'_pos'} > $opos );

240

}

241

$newnode->{'_pos'} = $opos + 1;

242

243

push( @{ $node->{ $name } }, $newnode );

244

245

return $newnode;

246

}

247

248

sub del_node {

249

my $self = shift;

250

my $node = shift;

251

my $name = shift;

252

my %match = @_;

253

$node = $node->{ $name };

254

return if( !$node );

255

for( my $i = 0; $i <= $#$node; $i++ ) {

256

my $one = $node->[ $i ];

257

foreach my $key ( keys %match ) {

258

my $val = $match{ $key };

259

if( $one->{ $key }->{'value'} eq $val ) {

260

delete $node->[ $i ];

261

}

262

}

263

}

264

}

265

266

# Created a node of XML hash with the passed in variables already set

267

sub new_node {

268

my $self = shift;

269

my %parts = @_;

270

271

my %newnode;

272

foreach( keys %parts ) {

273

my $val = $parts{$_};

274

if( m/^_/ || ref( $val ) eq 'HASH' ) {

275

$newnode{ $_ } = $val;

276

}

277

else {

278

$newnode{ $_ } = { value => $val };

279

}

280

}

281

282

return \%newnode;

283

}

284

285

sub simplify {

286

my $node = CORE::shift;

287

my $ref = ref( $node );

288

if( $ref eq 'ARRAY' ) {

289

my @ret;

290

for my $sub ( @$node ) {

291

CORE::push( @ret, simplify( $sub ) );

292

}

293

return \@ret;

294

}

295

if( $ref eq 'HASH' ) {

296

my %ret;

297

my $cnt = 0;

298

for my $key ( keys %$node ) {

299

next if( $key eq 'comment' || $key eq 'value' || $key =~ m/^_/ );

300

$cnt++;

301

$ret{ $key } = simplify( $node->{ $key } );

302

}

303

if( $cnt == 0 ) {

304

return $node->{'value'};

305

}

306

return \%ret;

307

}

308

return $node;

309

}

310

311

sub hash2xml {

312

my ( $node, $name ) = @_;

313

my $ref = ref( $node );

314

return '' if( $name && $name =~ m/^\_/ );

315

my $txt = $name ? "<$name>" : '';

316

if( $ref eq 'ARRAY' ) {

317

$txt = '';

318

for my $sub ( @$node ) {

319

$txt .= hash2xml( $sub, $name );

320

}

321

return $txt;

322

}

323

elsif( $ref eq 'HASH' ) {

324

for my $key ( keys %$node ) {

325

$txt .= hash2xml( $node->{ $key }, $key );

326

}

327

}

328

else {

329

$node ||= '';

330

if( $node =~ /[<]/ ) { $txt .= '<![CDATA[' . $node . ']]>'; }

331

else { $txt .= $node; }

332

}

333

if( $name ) {

334

$txt .= "</$name>";

335

}

336

337

return $txt;

338

}

339

340

# Save an XML hash tree into a file

341

sub save {

342

my $self = shift;

343

return if( ! $self->{ 'xml' } );

344

345

my $xml = $self->xml( $self->{'xml'} );

346

347

my $len;

348

{

349

use bytes;

350

$len = length( $xml );

351

}

352

return if( !$len );

353

354

# This is intentionally just :utf8 and not :encoding(UTF-8)

355

# :encoding(UTF-8) checks the data for actually being valid UTF-8, and doing so would slow down the file write

356

# See http://perldoc.perl.org/functions/binmode.html

357

358

my $os = $^O;

359

my $F;

360

361

# Note on the following conditional OS check... WTF? This is total bullshit.

362

if( $os eq 'MSWin32' ) {

363

open( $F, '>:utf8', $self->{ 'file' } );

364

binmode $F;

365

}

366

else {

367

open( $F, '>', $self->{ 'file' } );

368

binmode $F, ':utf8';

369

}

370

print $F $xml;

371

372

seek( $F, 0, 2 );

373

my $cursize = tell( $F );

374

if( $cursize != $len ) { # concurrency; we are writing a smaller file

375

warn "Truncating File $self->{'file'}";

376

`cp $self->{'file'} $self->{'file'}.bad`;

377

truncate( F, $len );

378

}

379

seek( $F, 0, 2 );

380

$cursize = tell( $F );

381

if( $cursize != $len ) { # still not the right size even after truncate??

382

die "Write problem; $cursize != $len";

383

}

384

close $F;

385

}

386

387

sub xml {

388

my ( $self, $obj, $name ) = @_;

389

if( !$name ) {

390

my %hash;

391

$hash{0} = $obj;

392

return XML::Bare::obj2xml( \%hash, '', 0 );

393

}

394

my %hash;

395

$hash{$name} = $obj;

396

return XML::Bare::obj2xml( \%hash, '', 0 );

397

}

398

399

sub html {

400

my ( $self, $obj, $name ) = @_;

401

my $pre = '';

402

if( $self->{'style'} ) {

403

$pre = "<style type='text/css'>\@import '$self->{'style'}';</style>";

404

}

405

if( !$name ) {

406

my %hash;

407

$hash{0} = $obj;

408

return $pre.obj2html( \%hash, '', 0 );

409

}

410

my %hash;

411

$hash{$name} = $obj;

412

return $pre.obj2html( \%hash, '', 0 );

413

}

414

415

sub lineinfo {

416

my $self = shift;

417

my $res = shift;

418

my $line = 1;

419

my $j = 0;

420

for( my $i=0;$i<$res;$i++ ) {

421

my $let = substr( $self->{'text'}, $i, 1 );

422

if( ord($let) == 10 ) {

423

$line++;

424

$j = $i;

425

}

426

}

427

my $part = substr( $self->{'text'}, $res, 10 );

428

$part =~ s/\n//g;

429

$res -= $j;

430

if( $self->{'offset'} ) {

431

my $off = $self->{'offset'};

432

$line += $off;

433

return "$off line $line char $res \"$part\"";

434

}

435

return "line $line char $res \"$part\"";

436

}

437

438

sub free_tree { my $self = shift; XML::Bare::free_tree_c( $self->{'parser'} ); }

439

440

package XML::Bare;

441

442

sub find_node {

443

my $node = shift;

444

my $name = shift;

445

my %match = @_;

446

return 0 if( ! defined $node );

447

$node = $node->{ $name } or return 0;

448

$node = [ $node ] if( ref( $node ) eq 'HASH' );

449

if( ref( $node ) eq 'ARRAY' ) {

450

for( my $i = 0; $i <= $#$node; $i++ ) {

451

my $one = $node->[ $i ];

452

for my $key ( keys %match ) {

453

my $val = $match{ $key };

454

croak('undefined value in find') unless defined $val;

455

if( $one->{ $key }{'value'} eq $val ) {

456

return $node->[ $i ];

457

}

458

}

459

}

460

}

461

return 0;

462

}

463

464

sub xget {

465

my $hash = shift;

466

return map $_->{'value'}, @{$hash}{@_};

467

}

468

469

sub forcearray {

470

my $ref = shift;

471

return [] if( !$ref );

472

return $ref if( ref( $ref ) eq 'ARRAY' );

473

return [ $ref ];

474

}

475

476

sub merge {

477

# shift in the two array references as well as the field to merge on

478

my ( $a, $b, $id ) = @_;

479

my %hash = map { $_->{ $id } ? ( $_->{ $id }->{ 'value' } => $_ ) : ( 0 => 0 ) } @$a;

480

for my $one ( @$b ) {

481

next if( !$one->{ $id } );

482

my $short = $hash{ $one->{ $id }->{ 'value' } };

483

next if( !$short );

484

foreach my $key ( keys %$one ) {

485

next if( $key eq '_pos' || $key eq 'id' );

486

my $cur = $short->{ $key };

487

my $add = $one->{ $key };

488

if( !$cur ) { $short->{ $key } = $add; }

489

else {

490

my $type = ref( $cur );

491

if( $type eq 'HASH' ) {

492

my @arr;

493

$short->{ $key } = \@arr;

494

push( @arr, $cur );

495

}

496

if( ref( $add ) eq 'HASH' ) {

497

push( @{$short->{ $key }}, $add );

498

}

499

else { # we are merging an array

500

push( @{$short->{ $key }}, @$add );

501

}

502

}

503

# we need to deal with the case where this node

504

# is already there, either alone or as an array

505

}

506

}

507

return $a;

508

}

509

510

sub clean {

511

my $ob = new XML::Bare( @_ );

512

my $root = $ob->parse();

513

if( $ob->{'save'} ) {

514

$ob->{'file'} = $ob->{'save'} if( "$ob->{'save'}" ne "1" );

515

$ob->save();

516

return;

517

}

518

return $ob->xml( $root );

519

}

520

521

sub xmlin {

522

my $text = shift;

523

my %ops = ( @_ );

524

my $ob = new XML::Bare( text => $text );

525

my $simple = $ob->simple();

526

if( !$ops{'keeproot'} ) {

527

my @keys = keys %$simple;

528

my $first = $keys[0];

529

$simple = $simple->{ $first } if( $first );

530

}

531

return $simple;

532

}

533

534

sub tohtml {

535

my %ops = ( @_ );

536

my $ob = new XML::Bare( %ops );

537

return $ob->html( $ob->parse(), $ops{'root'} || 'xml' );

538

}

539

540

sub readxbs { # xbs = xml bare schema

541

my $node = shift;

542

my @demand;

543

for my $key ( keys %$node ) {

544

next if( substr( $key, 0, 1 ) eq '_' || $key eq '_att' || $key eq 'comment' );

545

if( $key eq 'value' ) {

546

my $val = $node->{'value'};

547

delete $node->{'value'} if( $val =~ m/^\W*$/ );

548

next;

549

}

550

my $sub = $node->{ $key };

551

552

if( $key =~ m/([a-z_]+)([^a-z_]+)/ ) {

553

my $name = $1;

554

my $t = $2;

555

my $min;

556

my $max;

557

if( $t eq '+' ) {

558

$min = 1;

559

$max = 1000;

560

}

561

elsif( $t eq '*' ) {

562

$min = 0;

563

$max = 1000;

564

}

565

elsif( $t eq '?' ) {

566

$min = 0;

567

$max = 1;

568

}

569

elsif( $t eq '@' ) {

570

$name = 'multi_'.$name;

571

$min = 1;

572

$max = 1;

573

}

574

elsif( $t =~ m/\{([0-9]+),([0-9]+)\}/ ) {

575

$min = $1;

576

$max = $2;

577

$t = 'r'; # range

578

}

579

580

if( ref( $sub ) eq 'HASH' ) {

581

my $res = readxbs( $sub );

582

$sub->{'_t'} = $t;

583

$sub->{'_min'} = $min;

584

$sub->{'_max'} = $max;

585

}

586

if( ref( $sub ) eq 'ARRAY' ) {

587

for my $item ( @$sub ) {

588

my $res = readxbs( $item );

589

$item->{'_t'} = $t;

590

$item->{'_min'} = $min;

591

$item->{'_max'} = $max;

592

}

593

}

594

595

push( @demand, $name ) if( $min );

596

$node->{$name} = $node->{$key};

597

delete $node->{$key};

598

}

599

else {

600

if( ref( $sub ) eq 'HASH' ) {

601

readxbs( $sub );

602

$sub->{'_t'} = 'r';

603

$sub->{'_min'} = 1;

604

$sub->{'_max'} = 1;

605

}

606

if( ref( $sub ) eq 'ARRAY' ) {

607

for my $item ( @$sub ) {

608

readxbs( $item );

609

$item->{'_t'} = 'r';

610

$item->{'_min'} = 1;

611

$item->{'_max'} = 1;

612

}

613

}

614

615

push( @demand, $key );

616

}

617

}

618

if( @demand ) { $node->{'_demand'} = \@demand; }

619

}

620

621

sub find_by_perl {

622

my $arr = shift;

623

my $cond = shift;

624

625

my @res;

626

if( ref( $arr ) eq 'ARRAY' ) {

627

$cond =~ s/-([a-z_]+)/\$ob->\{'$1'\}->\{'value'\}/gi;

628

foreach my $ob ( @$arr ) { push( @res, $ob ) if( eval( $cond ) ); }

629

}

630

else {

631

$cond =~ s/-([a-z_]+)/\$arr->\{'$1'\}->\{'value'\}/gi;

632

push( @res, $arr ) if( eval( $cond ) );

633

}

634

return \@res;

635

}

636

637

sub del_by_perl {

638

my $arr = shift;

639

my $cond = shift;

640

$cond =~ s/-value/\$ob->\{'value'\}/g;

641

$cond =~ s/-([a-z]+)/\$ob->\{'$1'\}->\{'value'\}/g;

642

my @res;

643

for( my $i = 0; $i <= $#$arr; $i++ ) {

644

my $ob = $arr->[ $i ];

645

delete $arr->[ $i ] if( eval( $cond ) );

646

}

647

return \@res;

648

}

649

650

sub newhash { shift; return { value => shift }; }

651

652

sub xval {

653

return $_[0] ? $_[0]->{'value'} : ( $_[1] || '' );

654

}

655

656

sub obj2xml {

657

my ( $objs, $name, $pad, $level, $pdex ) = @_;

658

$level = 0 if( !$level );

659

$pad = '' if( $level <= 2 );

660

my $xml = '';

661

my $att = '';

662

my $imm = 1;

663

return '' if( !$objs );

664

#return $objs->{'_raw'} if( $objs->{'_raw'} );

665

my @dex = sort {

666

my $oba = $objs->{ $a };

667

my $obb = $objs->{ $b };

668

my $posa = 0;

669

my $posb = 0;

670

$oba = $oba->[0] if( ref( $oba ) eq 'ARRAY' );

671

$obb = $obb->[0] if( ref( $obb ) eq 'ARRAY' );

672

if( ref( $oba ) eq 'HASH' ) { $posa = $oba->{'_pos'} || 0; }

673

if( ref( $obb ) eq 'HASH' ) { $posb = $obb->{'_pos'} || 0; }

674

return $posa <=> $posb;

675

} keys %$objs;

676

for my $i ( @dex ) {

677

my $obj = $objs->{ $i } || '';

678

my $type = ref( $obj );

679

if( $type eq 'ARRAY' ) {

680

$imm = 0;

681

682

my @dex2 = sort {

683

if( !$a ) { return 0; }

684

if( !$b ) { return 0; }

685

if( ref( $a ) eq 'HASH' && ref( $b ) eq 'HASH' ) {

686

my $posa = $a->{'_pos'};

687

my $posb = $b->{'_pos'};

688

if( !$posa ) { $posa = 0; }

689

if( !$posb ) { $posb = 0; }

690

return $posa <=> $posb;

691

}

692

return 0;

693

} @$obj;

694

695

for my $j ( @dex2 ) {

696

$xml .= obj2xml( $j, $i, $pad.' ', $level+1, $#dex );

697

}

698

}

699

elsif( $type eq 'HASH' && $i !~ /^_/ ) {

700

if( $obj->{ '_att' } ) {

701

$att .= ' ' . $i . '="' . $obj->{ 'value' } . '"' if( $i !~ /^_/ );;

702

}

703

else {

704

$imm = 0;

705

$xml .= obj2xml( $obj , $i, $pad.' ', $level+1, $#dex );

706

}

707

}

708

else {

709

if( $i eq 'comment' ) { $xml .= '' . "\n"; }

710

elsif( $i eq 'value' ) {

711

if( $level > 1 ) { # $#dex < 4 &&

712

if( $obj && $obj =~ /[<>&;]/ ) { $xml .= '<![CDATA[' . $obj . ']]>'; }

713

else { $xml .= $obj if( $obj =~ /\S/ ); }

714

}

715

}

716

elsif( $i =~ /^_/ ) {}

717

else { $xml .= '<' . $i . '>' . $obj . '</' . $i . '>'; }

718

}

719

}

720

my $pad2 = $imm ? '' : $pad;

721

my $cr = $imm ? '' : "\n";

722

if( substr( $name, 0, 1 ) ne '_' ) {

723

if( $name ) {

724

if( $xml ) {

725

$xml = $pad . '<' . $name . $att . '>' . $cr . $xml . $pad2 . '</' . $name . '>';

726

}

727

else {

728

$xml = $pad . '<' . $name . $att . ' />';

729

}

730

}

731

return $xml."\n" if( $level > 1 );

732

return $xml;

733

}

734

return '';

735

}

736

737

sub obj2html {

738

my ( $objs, $name, $pad, $level, $pdex ) = @_;

739

740

my $less = "<span class='ang'><</span>";

741

my $more = "<span class='ang'>></span>";

742

my $tn0 = "<span class='tname'>";

743

my $tn1 = "</span>";

744

my $eq0 = "<span class='eq'>";

745

my $eq1 = "</span>";

746

my $qo0 = "<span class='qo'>";

747

my $qo1 = "</span>";

748

my $sp0 = "<span class='sp'>";

749

my $sp1 = "</span>";

750

my $cd0 = "";

751

my $cd1 = "";

752

753

$level = 0 if( !$level );

754

$pad = '' if( $level == 1 );

755

my $xml = '';

756

my $att = '';

757

my $imm = 1;

758

return '' if( !$objs );

759

my @dex = sort {

760

my $oba = $objs->{ $a };

761

my $obb = $objs->{ $b };

762

my $posa = 0;

763

my $posb = 0;

764

$oba = $oba->[0] if( ref( $oba ) eq 'ARRAY' );

765

$obb = $obb->[0] if( ref( $obb ) eq 'ARRAY' );

766

if( ref( $oba ) eq 'HASH' ) { $posa = $oba->{'_pos'} || 0; }

767

if( ref( $obb ) eq 'HASH' ) { $posb = $obb->{'_pos'} || 0; }

768

return $posa <=> $posb;

769

} keys %$objs;

770

771

if( $objs->{'_cdata'} ) {

772

my $val = $objs->{'value'};

773

$val =~ s/^(\s*\n)+//;

774

$val =~ s/\s+$//;

775

$val =~ s/&/&/g;

776

$val =~ s/</</g;

777

$objs->{'value'} = $val;

778

#$xml = "$less![CDATA[<div class='node'><div class='cdata'>$val</div></div>]]$more";

779

$cd0 = "$less![CDATA[<div class='node'><div class='cdata'>";

780

$cd1 = "</div></div>]]$more";

781

}

782

for my $i ( @dex ) {

783

my $obj = $objs->{ $i } || '';

784

my $type = ref( $obj );

785

if( $type eq 'ARRAY' ) {

786

$imm = 0;

787

788

my @dex2 = sort {

789

if( !$a ) { return 0; }

790

if( !$b ) { return 0; }

791

if( ref( $a ) eq 'HASH' && ref( $b ) eq 'HASH' ) {

792

my $posa = $a->{'_pos'};

793

my $posb = $b->{'_pos'};

794

if( !$posa ) { $posa = 0; }

795

if( !$posb ) { $posb = 0; }

796

return $posa <=> $posb;

797

}

798

return 0;

799

} @$obj;

800

801

for my $j ( @dex2 ) { $xml .= obj2html( $j, $i, $pad.'  ', $level+1, $#dex ); }

802

}

803

elsif( $type eq 'HASH' && $i !~ /^_/ ) {

804

if( $obj->{ '_att' } ) {

805

my $val = $obj->{ 'value' };

806

$val =~ s/</</g;

807

if( $val eq '' ) {

808

$att .= " <span class='aname'>$i</span>" if( $i !~ /^_/ );

809

}

810

else {

811

$att .= " <span class='aname'>$i</span>$eq0=$eq1$qo0\"$qo1$val$qo0\"$qo1" if( $i !~ /^_/ );

812

}

813

}

814

else {

815

$imm = 0;

816

$xml .= obj2html( $obj , $i, $pad.'  ', $level+1, $#dex );

817

}

818

}

819

else {

820

if( $i eq 'comment' ) { $xml .= "$less!--" . $obj . "--$more" . "<br>\n"; }

821

elsif( $i eq 'value' ) {

822

if( $level > 1 ) {

823

if( $obj && $obj =~ /[<>&;]/ && ! $objs->{'_cdata'} ) { $xml .= "$less![CDATA[$obj]]$more"; }

824

else { $xml .= $obj if( $obj =~ /\S/ ); }

825

}

826

}

827

elsif( $i =~ /^_/ ) {}

828

else { $xml .= "$less$tn0$i$tn1$more$obj$less/$tn0$i$tn1$more"; }

829

}

830

}

831

my $pad2 = $imm ? '' : $pad;

832

if( substr( $name, 0, 1 ) ne '_' ) {

833

if( $name ) {

834

if( $imm ) {

835

if( $xml =~ /\S/ ) {

836

$xml = "$sp0$pad$sp1$less$tn0$name$tn1$att$more$cd0$xml$cd1$less/$tn0$name$tn1$more";

837

}

838

else {

839

$xml = "$sp0$pad$sp1$less$tn0$name$tn1$att/$more";

840

}

841

}

842

else {

843

if( $xml =~ /\S/ ) {

844

$xml = "$sp0$pad$sp1$less$tn0$name$tn1$att$more<div class='node'>$xml</div>$sp0$pad$sp1$less/$tn0$name$tn1$more";

845

}

846

else { $xml = "$sp0$pad$sp1$less$tn0$name$tn1$att/$more"; }

847

}

848

}

849

$xml .= "<br>" if( $objs->{'_br'} );

850

if( $objs->{'_note'} ) {

851

$xml .= "<br>";

852

my $note = $objs->{'_note'}{'value'};

853

my @notes = split( /\|/, $note );

854

for( @notes ) {

855

$xml .= "<div class='note'>$sp0$pad$sp1<span class='com'></span></div>";

856

}

857

}

858

return $xml."<br>\n" if( $level );

859

return $xml;

860

}

861

return '';

862

}

863

864

865

866

__END__

867

868

=head1 SYNOPSIS

869

870

use XML::Bare;

871

872

my $ob = new XML::Bare( text => '<xml><name>Bob</name></xml>' );

873

874

# Parse the xml into a hash tree

875

my $root = $ob->parse();

876

877

# Print the content of the name node

878

print $root->{xml}->{name}->{value};

879

880

---

881

882

# Load xml from a file ( assume same contents as first example )

883

my $ob2 = new XML::Bare( file => 'test.xml' );

884

885

my $root2 = $ob2->parse();

886

887

$root2->{xml}->{name}->{value} = 'Tim';

888

889

# Save the changes back to the file

890

$ob2->save();

891

892

---

893

894

# Load xml and verify against XBS ( XML Bare Schema )

895

my $xml_text = '<xml><item name=bob/></xml>''

896

my $schema_text = '<xml><item* name=[a-z]+></item*></xml>'

897

my $ob = new XML::Bare( text => $xml_text, schema => { text => $schema_text } );

898

$ob->parse(); # this will error out if schema is invalid

899

900

=head1 DESCRIPTION

901

902

This module is a 'Bare' XML parser. It is implemented in C. The parser

903

itself is a simple state engine that is less than 500 lines of C. The

904

parser builds a C struct tree from input text. That C struct tree is

905

converted to a Perl hash by a Perl function that makes basic calls back

906

to the C to go through the nodes sequentially.

907

908

The parser itself will only cease parsing if it encounters tags that

909

are not closed properly. All other inputs will parse, even invalid

910

inputs. To allowing checking for validity, a schema checker is included

911

in the module as well.

912

913

The schema format is custom and is meant to be as simple as possible.

914

It is based loosely around the way multiplicity is handled in Perl

915

regular expressions.

916

917

=head2 Supported XML

918

919

To demonstrate what sort of XML is supported, consider the following

920

examples. Each of the PERL statements evaluates to true.

921

922

=over 2

923

924

=item * Node containing just text

925

926

XML: <xml>blah</xml>

927

PERL: $root->{xml}->{value} eq "blah";

928

929

=item * Subset nodes

930

931

XML: <xml><name>Bob</name></xml>

932

PERL: $root->{xml}->{name}->{value} eq "Bob";

933

934

=item * Attributes unquoted

935

936

XML: <xml><a href=index.htm>Link</a></xml>

937

PERL: $root->{xml}->{a}->{href}->{value} eq "index.htm";

938

939

=item * Attributes quoted

940

941

XML: <xml><a href="index.htm">Link</a></xml>

942

PERL: $root->{xml}->{a}->{href}->{value} eq "index.htm";

943

944

=item * CDATA nodes

945

946

XML: <xml><raw><![CDATA[some raw $~<!bad xml<>]]></raw></xml>

947

PERL: $root->{xml}->{raw}->{value} eq "some raw \$~<!bad xml<>";

948

949

=item * Multiple nodes; form array

950

951

XML: <xml><item>1</item><item>2</item></xml>

952

PERL: $root->{xml}->{item}->[0]->{value} eq "1";

953

954

=item * Forcing array creation

955

956

XML: <xml><multi_item/><item>1</item></xml>

957

PERL: $root->{xml}->{item}->[0]->{value} eq "1";

958

959

=item * One comment supported per node

960

961

XML: <xml></xml>

962

PERL: $root->{xml}->{comment} eq 'test';

963

964

=back

965

966

=head2 Schema Checking

967

968

Schema checking is done by providing the module with an XBS (XML::Bare Schema) to check

969

the XML against. If the XML checks as valid against the schema, parsing will continue as

970

normal. If the XML is invalid, the parse function will die, providing information about

971

the failure.

972

973

The following information is provided in the error message:

974

975

=over 2

976

977

=item * The type of error

978

979

=item * Where the error occured ( line and char )

980

981

=item * A short snippet of the XML at the point of failure

982

983

=back

984

985

=head2 XBS ( XML::Bare Schema ) Format

986

987

=over 2

988

989

=item * Required nodes

990

991

XML: <xml></xml>

992

XBS: <xml/>

993

994

=item * Optional nodes - allow one

995

996

XML: <xml></xml>

997

XBS: <xml item?/>

998

or XBS: <xml><item?/></xml>

999

1000

=item * Optional nodes - allow 0 or more

1001

1002

XML: <xml><item/></xml>

1003

XBS: <xml item*/>

1004

1005

=item * Required nodes - allow 1 or more

1006

1007

XML: <xml><item/><item/></xml>

1008

XBS: <xml item+/>

1009

1010

=item * Nodes - specified minimum and maximum number

1011

1012

XML: <xml><item/><item/></xml>

1013

XBS: <xml item{1,2}/>

1014

or XBS: <xml><item{1,2}/></xml>

1015

or XBS: <xml><item{1,2}></item{1,2}></xml>

1016

1017

=item * Multiple acceptable node formats

1018

1019

XML: <xml><item type=box volume=20/><item type=line length=10/></xml>

1020

XBS: <xml><item type=box volume/><item type=line length/></xml>

1021

1022

=item * Regular expressions checking for values

1023

1024

XML: <xml name=Bob dir=up num=10/>

1025

XBS: <xml name=[A-Za-z]+ dir=up|down num=[0-9]+/>

1026

1027

=item * Require multi_ tags

1028

1029

XML: <xml><multi_item/></xml>

1030

XBS: <xml item@/>

1031

1032

=back

1033

1034

=head2 Parsed Hash Structure

1035

1036

The hash structure returned from XML parsing is created in a specific format.

1037

Besides as described above, the structure contains some additional nodes in

1038

order to preserve information that will allow that structure to be correctly

1039

converted back to XML.

1040

1041

Nodes may contain the following 3 additional subnodes:

1042

1043

=over 2

1044

1045

=item * _i

1046

1047

The character offset within the original parsed XML of where the node

1048

begins. This is used to provide line information for errors when XML

1049

fails a schema check.

1050

1051

=item * _pos

1052

1053

This is a number indicating the ordering of nodes. It is used to allow

1054

items in a perl hash to be sorted when writing back to xml. Note that

1055

items are not sorted after parsing in order to save time if all you

1056

are doing is reading and you do not care about the order.

1057

1058

In future versions of this module an option will be added to allow

1059

you to sort your nodes so that you can read them in order.

1060

( note that multiple nodes of the same name are stored in order )

1061

1062

=item * _att

1063

1064

This is a boolean value that exists and is 1 iff the node is an

1065

attribute.

1066

1067

=back

1068

1069

=head2 Parsing Limitations / Features

1070

1071

=over 2

1072

1073

=item * CDATA parsed correctly, but stripped if unneeded

1074

1075

Currently the contents of a node that are CDATA are read and

1076

put into the value hash, but the hash structure does not have

1077

a value indicating the node contains CDATA.

1078

1079

When converting back to XML, the contents of the value hash

1080

are parsed to check for xml incompatible data using a regular

1081

expression. If 'CDATA like' stuff is encountered, the node

1082

is output as CDATA.

1083

1084

=item * Node position stored, but hash remains unsorted

1085

1086

The ordering of nodes is noted using the '_pos' value, but

1087

the hash itself is not ordered after parsing. Currently

1088

items will be out of order when looking at them in the

1089

hash.

1090

1091

Note that when converted back to XML, the nodes are then

1092

sorted and output in the correct order to XML. Note that

1093

nodes of the same name with the same parent will be

1094

grouped together; the position of the first item to

1095

appear will determine the output position of the group.

1096

1097

=item * Comments are parsed but only one is stored per node.

1098

1099

For each node, there can be a comment within it, and that

1100

comment will be saved and output back when dumping to XML.

1101

1102

=item * Comments override output of immediate value

1103

1104

If a node contains only a comment node and a text value,

1105

only the comment node will be displayed. This is in line

1106

with treating a comment node as a node and only displaying

1107

immediate values when a node contains no subnodes.

1108

1109

=item * PI sections are parsed, but discarded

1110

1111

=item * Unknown C<< <! >> sections are parsed, but discarded

1112

1113

=item * Attributes may use no quotes, single quotes, quotes, or backtics

1114

1115

=item * Quoted attributes cannot contain escaped quotes

1116

1117

No escape character is recognized within quotes. As a result,

1118

regular quotes cannot be stored to XML, or the written XML

1119

will not be correct, due to all attributes always being written

1120

using quotes.

1121

1122

=item * Attributes are always written back to XML with quotes

1123

1124

=item * Nodes cannot contain subnodes as well as an immediate value

1125

1126

Actually nodes can in fact contain a value as well, but that

1127

value will be discarded if you write back to XML. That value is

1128

equal to the first continuous string of text besides a subnode.

1129

1130

1131

( the value of node is text )

1132

1133

1134

( the value of node is text )

1135

1136

<node>

1137

<subnode/>text

1138

</node>

1139

( the value of node is "\n " )

1140

1141

=item * Entities are not parsed

1142

1143

No entity parsing is done. This is intentional. Future versions of the module

1144

may include a feature to automatically parse entities, but by default any such

1145

feature will be disabled in order to keep from slowing down the parser.

1146

1147

Also, this is done so that round trip ( read and then write back out ) behavior

1148

is consistent.

1149

1150

=item * Nodes named value

1151

1152

Previously iterations of this module had problems with nodes named 'value',

1153

due to the fact that node contents are stored under the 'value' key already.

1154

The current version should parse such files without any problem, although it

1155

may be confusing to see a parsed tree with 'value' pointing to another hash

1156

containing 'value' as well.

1157

1158

In a future version of the module it will be possible to alter the name that

1159

values are stored under.

1160

1161

Note that node values are stored under the key 'content' when the "simple"

1162

parsing mode is used, so as to be consistent with XML::Simple.

1163

1164

=back

1165

1166

=head2 Module Functions

1167

1168

=over 2

1169

1170

=item * C<< $ob = XML::Bare->new( text => "[some xml]" ) >>

1171

1172

Create a new XML object, with the given text as the xml source.

1173

1174

=item * C<< $object = XML::Bare->new( file => "[filename]" ) >>

1175

1176

Create a new XML object, with the given filename/path as the xml source

1177

1178

=item * C<< $object = XML::Bare->new( text => "[some xml]", file => "[filename]" ) >>

1179

1180

Create a new XML object, with the given text as the xml input, and the given

1181

filename/path as the potential output ( used by save() )

1182

1183

=item * C<< $object = XML::Bare->new( file => "data.xml", scheme => { file => "scheme.xbs" } ) >>

1184

1185

Create a new XML object and check to ensure it is valid xml by way of the XBS scheme.

1186

1187

=item * C<< $tree = $object->parse() >>

1188

1189

Parse the xml of the object and return a tree reference

1190

1191

=item * C<< $tree = $object->simple() >>

1192

1193

Alternate to the parse function which generates a tree similar to that

1194

generated by XML::Simple. Note that the sets of nodes are turned into

1195

arrays always, regardless of whether they have a 'name' attribute, unlike

1196

XML::Simple.

1197

1198

Note that currently the generated tree cannot be used with any of the

1199

functions in this module that operate upon trees. The function is provided

1200

purely as a quick and dirty way to read simple XML files.

1201

1202

=item * C<< $tree = xmlin( $xmlext, keeproot => 1 ) >>

1203

1204

The xmlin function is a shortcut to creating an XML::Bare object and

1205

parsing it using the simple function. It behaves similarly to the

1206

XML::Simple function by the same name. The keeproot option is optional

1207

and if left out the root node will be discarded, same as the function

1208

in XML::Simple.

1209

1210

=item * C<< $text = $object->xml( [root] ) >>

1211

1212

Take the hash tree in [root] and turn it into cleanly indented ( 2 spaces )

1213

XML text.

1214

1215

=item * C<< $text = $object->html( [root], [root node name] ) >>

1216

1217

Take the hash tree in [root] and turn it into nicely colorized and styled

1218

html. [root node name] is optional.

1219

1220

=item * C<< $object->save() >>

1221

1222

The the current tree in the object, cleanly indent it, and save it

1223

to the file parameter specified when creating the object.

1224

1225

=item * C<< $value = xval $node, $default >>

1226

1227

Returns the value of $node or $default if the node does not exist.

1228

If default is not passed to the function, then '' is returned as

1229

a default value when the node does not exist.

1230

1231

=item * C<< ( $name, $age ) = xget( $personnode, qw/name age/ ) >>

1232

1233

Shortcut function to grab a number of values from a node all at the

1234

same time. Note that this function assumes that all of the subnodes

1235

exist; it will fail if they do not.

1236

1237

=item * C<< $text = XML::Bare::clean( text => "[some xml]" ) >>

1238

1239

Shortcut to creating an xml object and immediately turning it into clean xml text.

1240

1241

=item * C<< $text = XML::Bare::clean( file => "[filename]" ) >>

1242

1243

Similar to previous.

1244

1245

=item * C<< XML::Bare::clean( file => "[filename]", save => 1 ) >>

1246

1247

Clean up the xml in the file, saving the results back to the file

1248

1249

=item * C<< XML::Bare::clean( text => "[some xml]", save => "[filename]" ) >>

1250

1251

Clean up the xml provided, and save it into the specified file.

1252

1253

=item * C<< XML::Bare::clean( file => "[filename1]", save => "[filename2]" ) >>

1254

1255

Clean up the xml in filename1 and save the results to filename2.

1256

1257

=item * C<< $html = XML::Bare::tohtml( text => "[some xml]", root => 'xml' ) >>

1258

1259

Shortcut to creating an xml object and immediately turning it into html.

1260

Root is optional, and specifies the name of the root node for the xml

1261

( which defaults to 'xml' )

1262

1263

=item * C<< $object->add_node( [node], [nodeset name], name => value, name2 => value2, ... ) >>

1264

1265

Example:

1266

$object->add_node( $root->{xml}, 'item', name => 'Bob' );

1267

1268

Result:

1269

<xml>

1270

<item>

1271

1272

</item>

1273

</xml>

1274

1275

=item * C<< $object->add_node_after( [node], [subnode within node to add after], [nodeset name], ... ) >>

1276

1277

=item * C<< $object->del_node( [node], [nodeset name], name => value ) >>

1278

1279

Example:

1280

Starting XML:

1281

<xml>

1282

<a>

1283

1284

</a>

1285

<a>

1286

1287

</a>

1288

</xml>

1289

1290

Code:

1291

$xml->del_node( $root->{xml}, 'a', b=>'1' );

1292

1293

Ending XML:

1294

<xml>

1295

<a>

1296

1297

</a>

1298

</xml>

1299

1300

=item * C<< $object->find_node( [node], [nodeset name], name => value ) >>

1301

1302

Example:

1303

Starting XML:

1304

<xml>

1305

<ob>

1306

1307

1308

</ob>

1309

<ob>

1310

1311

1312

</ob>

1313

</xml>

1314

1315

Code:

1316

$object->find_node( $root->{xml}, 'ob', key => '1' )->{val}->{value} = 'test';

1317

1318

Ending XML:

1319

<xml>

1320

<ob>

1321

1322

1323

</ob>

1324

<ob>

1325

1326

1327

</ob>

1328

</xml>

1329

1330

=item * C<< $object->find_by_perl( [nodeset], "[perl code]" ) >>

1331

1332

find_by_perl evaluates some perl code for each node in a set of nodes, and

1333

returns the nodes where the perl code evaluates as true. In order to

1334

easily reference node values, node values can be directly referred

1335

to from within the perl code by the name of the node with a dash(-) in

1336

front of the name. See the example below.

1337

1338

Note that this function returns an array reference as opposed to a single

1339

node unlike the find_node function.

1340

1341

Example:

1342

Starting XML:

1343

<xml>

1344

<ob>

1345

1346

1347

</ob>

1348

<ob>

1349

1350

1351

</ob>

1352

</xml>

1353

1354

Code:

1355

$object->find_by_perl( $root->{xml}->{ob}, "-key eq '1'" )->[0]->{val}->{value} = 'test';

1356

1357

Ending XML:

1358

<xml>

1359

<ob>

1360

1361

1362

</ob>

1363

<ob>

1364

1365

1366

</ob>

1367

</xml>

1368

1369

=item * C<< XML::Bare::merge( [nodeset1], [nodeset2], [id node name] ) >>

1370

1371

Merges the nodes from nodeset2 into nodeset1, matching the contents of

1372

each node based up the content in the id node.

1373

1374

Example:

1375

1376

Code:

1377

my $ob1 = new XML::Bare( text => "

1378

<xml>

1379

<multi_a/>

1380

1381

<a>

1382

1383

1384

</a>

1385

</xml>" );

1386

my $ob2 = new XML::Bare( text => "

1387

<xml>

1388

<multi_a/>

1389

1390

<a>

1391

1392

1393

1394

</a>

1395

</xml>" );

1396

my $root1 = $ob1->parse();

1397

my $root2 = $ob2->parse();

1398

merge( $root1->{'xml'}->{'a'}, $root2->{'xml'}->{'a'}, 'id' );

1399

print $ob1->xml( $root1 );

1400

1401

Output:

1402

<xml>

1403

<multi_a></multi_a>

1404

1405

<a>

1406

1407

1408

1409

1410

</a>

1411

</xml>

1412

1413

=item * C<< XML::Bare::del_by_perl( ... ) >>

1414

1415

Works exactly like find_by_perl, but deletes whatever matches.

1416

1417

=item * C<< XML::Bare::forcearray( [noderef] ) >>

1418

1419

Turns the node reference into an array reference, whether that

1420

node is just a single node, or is already an array reference.

1421

1422

=item * C<< XML::Bare::new_node( ... ) >>

1423

1424

Creates a new node...

1425

1426

=item * C<< XML::Bare::newhash( ... ) >>

1427

1428

Creates a new hash with the specified value.

1429

1430

=item * C<< XML::Bare::simplify( [noderef] ) >>

1431

1432

Take a node with children that have immediate values and

1433

creates a hashref to reference those values by the name of

1434

each child.

1435

1436

=item * C<< XML::Bare::hash2xml( [hashref] ) >>

1437

1438

Take a recursive hash tree ( perhaps generated by the simplify function ) and turn it

1439

into a raw XML string. Note that this function does not indent nicely. You will need

1440

to feed this string back into the parser and output it again if you want it to look

1441

nice. ( or you could use the 'clean' function to do it in one go )

1442

1443

=item * C<< XML::Bare->new( text => "[xml]", unsafe => 1 ) >>

1444

1445

An extra speedy way to parse XML. It is unsafe; may harm pets and children. Don't

1446

say you weren't warned. 30% speed boost compared to the normal parsing. You -must-

1447

use $ob->simple() in combination with this for it to work properly.

1448

1449

The speed boost is gained by skipping checks for the end of the string when in the

1450

middle of properly formatted XML. The only time the check is done is within "values"

1451

( which includes the space after the final closing </xml> )

1452

1453

Also, in the unsafe mode, tags, complete with their attributes, must be on one line.

1454

Node contents of course, can still have carriage returns...

1455

1456

=item * C<< $object->read_more( text => "[xml fragment]" ) >>

1457

1458

Add more XML text to be handled. Note that this function must be called before

1459

calling the parse function.

1460

1461

Example:

1462

1463

Code:

1464

my $ob = XML::Bare->new( text => "

1465

<xml>

1466

<node>a</node>" );

1467

$ob->read_more( text => "<node>b</node>" );

1468

$ob->read_more( text => "</xml>" );

1469

my $root = $ob->parse();

1470

print $ob->xml( $root );

1471

1472

Output:

1473

<xml>

1474

1475

1476

</xml>

1477

1478

Warning! Reading in additional XML fragments only works properly at proper "division points".

1479

Currently the parser will -not- work properly if you split in the middle of a node value, or

1480

in the middle of a node name. A future version of the module will be properly updated to handle

1481

these cases.

1482

1483

Currently there is little to no benefit to parsing this way, rather than simple concatenating

1484

the two strings together and then reading all the XML in at once.

1485

1486

=back

1487

1488

=head2 Functions Used Internally

1489

1490

=over 2

1491

1492

=item * C<< check() checkone() readxbs() free_tree_c() >>

1493

1494

=item * C<< lineinfo() c_parse() c_parse_unsafely() c_parse_more() c_parsefile() free_tree() xml2obj() >>

1495

1496

=item * C<< obj2xml() get_root() obj2html() xml2obj_simple() >>

1497

1498

=back

1499

1500

=head2 Controversy

1501

1502

Since the creation of this module there has been a fair amount of controvesy surrounding

1503

it. A number of authors of other XML parsers have gone so far as to publicly attack this

1504

module and claim that it 'does not parse XML', and 'it is not XML compliant'. Some of the

1505

same people seem to be angered by the inclusion of a benchmark, claiming that it is an

1506

unfair comparison, and that if the proper options and setup are used, that other XML

1507

parsers are better.

1508

1509

The module should parse any XML document that conforms to the standardized

1510

XML specifications, there is no need for alarm and fear that the module will corrupt

1511

your XML documents on reading.

1512

1513

To be blunt about how the parser works, very little has been done to make the parser

1514

follow the specification known as 'XML'. The parser is meant to be flexibile and somewhat

1515

resilient, and will parse XML like garbage that would cause other parsers to error out.

1516

1517

As far as I am concerned, as the author of the module, the 'XML' in 'XML::Bare' should

1518

be thought of to mean 'eXtremely Mad Language', because the module was written from

1519

scratch without referring to the specification known as 'XML'.

1520

1521

In regard to the complaints about the unfairness of the included benchmarks, please

1522

make your own intelligent decision as to what module you like by trying multiple

1523

modules and/or running the performance tests yourself. If you like some other module,

1524

use that module. If you like XML::Bare and think it is the fastest thing on the planet,

1525

that is cool too.

1526

1527

If you hate XML::Bare and want to go around on the internet trashing it and telling

1528

people to use something else, I think perhaps you may want to seek counseling.

1529

1530

=head2 Performance

1531

1532

In comparison to other available perl xml parsers that create trees, XML::Bare

1533

is extremely fast. In order to measure the performance of loading and parsing

1534

compared to the alternatives, a templated speed comparison mechanism has been

1535

created and included with XML::Bare.

1536

1537

The include makebench.pl file runs when you make the module and creates perl

1538

files within the bench directory corresponding to the .tmpl contained there.

1539

1540

Currently there are three types of modules that can be tested against,

1541

executable parsers ( exe.tmpl ), tree parsers ( tree.tmpl ), and parsers

1542

that do not generated trees ( notree.tmpl ).

1543

1544

A full list of modules currently tested against is as follows:

1545

1546

EzXML (exe)

1547

Tiny XML (exe)

1548

XML::Descent (notree)

1549

XML::DOM

1550

XML::Fast

1551

XML::Grove::Builder

1552

XML::Handler::Trees

1553

XMLIO (exe)

1554

XML::LibXML (notree)

1555

XML::LibXML::Simple

1556

XML::Parser (notree)

1557

XML::Parser::EasyTree

1558

XML::Parser::Expat (notree)

1559

XML::SAX::Simple

1560

XML::Simple using XML::Parser

1561

XML::Simple using XML::SAX::PurePerl

1562

XML::Simple using XML::LibXML::SAX::Parser

1563

XML::Simple using XML::Bare::SAX::Parser

1564

XML::Smart

1565

XML::Twig

1566

XML::TreePP

1567

XML::Trivial

1568

XML::XPath::XMLParser

1569

1570

To run the comparisons, run the appropriate perl file within the

1571

bench directory. ( exe.pl, tree.pl, or notree.pl )

1572

1573

The script measures the milliseconds of loading and parsing, and

1574

compares the time against the time of XML::Bare. So a 7 means

1575

it takes 7 times as long as XML::Bare.

1576

1577

Here is a combined table of the script run against each alternative

1578

using the included test.xml:

1579

1580

-Module- load parse total

1581

XML::Bare 1 1 1

1582

XML::TreePP 2.3063 33.1776 6.1598

1583

XML::Parser::EasyTree 4.9405 25.7278 7.4571

1584

XML::Handler::Trees 7.2303 26.5688 9.6447

1585

XML::Trivial 5.0636 12.4715 7.3046

1586

XML::Smart 6.8138 78.7939 15.8296

1587

XML::Simple (XML::Parser) 2.3346 50.4772 10.7455

1588

XML::Simple (PurePerl) 2.361 261.4571 33.6524

1589

XML::Simple (LibXML) 2.3187 163.7501 23.1816

1590

XML::Simple (XML::Bare) 2.3252 59.1254 10.9163

1591

XML::SAX::Simple 8.7792 170.7313 28.3634

1592

XML::Twig 27.8266 56.4476 31.3594

1593

XML::Grove::Builder 7.1267 26.1672 9.4064

1594

XML::XPath::XMLParser 9.7783 35.5486 13.0002

1595

XML::LibXML (notree) 11.0038 4.5758 10.6881

1596

XML::Parser (notree) 4.4698 17.6448 5.8609

1597

XML::Parser::Expat(notree) 3.7681 50.0382 6.0069

1598

XML::Descent (notree) 6.0525 37.0265 11.0322

1599

Tiny XML (exe) 1.0095

1600

EzXML (exe) 1.1284

1601

XMLIO (exe) 1.0165

1602

1603

Here is a combined table of the script run against each alternative

1604

using the included feed2.xml:

1605

1606

-Module- load parse total

1607

XML::Bare 1 1 1

1608

XML::Bare (simple) 1 0.7238 ?

1609

XML::Bare (unsafe simple) 1 ~0.5538 ?

1610

XML::Fast 1.516 0.9733 1.4783

1611

XML::TreePP 0.6393 30.5951 2.6874

1612

XML::MyXML 1.8266 14.2571 2.7113

1613

XML::Parser::EasyTree 1.5208 22.8283 2.9748

1614

XML::Trivial 2.007 25.742 3.615

1615

XML::Tiny 0.1665 61.4918 4.3234

1616

XML::XPath::XMLParser 2.5762 33.2567 4.6742

1617

XML::Smart 1.702 59.4907 5.7566

1618

XML::Simple (XML::Parser) 0.5838 64.7243 5.0006

1619

XML::DOM::Lite 4.5207 17.4617 5.4033

1620

XML::Simple (LibXML) 0.5904 161.7544 11.5731

1621

XML::Twig 8.553 56.9034 11.8805

1622

XML::Grove::Builder 7.2021 30.7926 12.9334

1623

XML::Handler::Trees 6.8545 33.1007 13.0575

1624

XML::LibXML::Simple 14.0204 11.8482 13.8707

1625

XML::Simple (PurePerl) 0.6176 321.3422 23.0465

1626

XML::Simple 2.7168 90.7203 26.7525

1627

XML::SAX::Simple 8.7386 94.8276 29.2166

1628

XML::LibXML (notree) 11.0023 5.022 10.5214

1629

XML::Parser (notree) 4.3748 25.0213 5.9803

1630

XML::Parser::Expat(notree) 3.6555 51.6426 7.4316

1631

XML::Descent (notree) 5.9206 155.0289 18.7767

1632

Tiny XML (exe) 1.2212

1633

EzXML (exe) 1.3618

1634

XMLIO (exe) 1.0145

1635

1636

These results show that XML::Bare is, at least on the

1637

test machine, running all tests within cygwin, faster

1638

at loading and parsing than everything being tested

1639

against.

1640

1641

The following things are shown as well:

1642

- XML::Bare can parse XML and create a hash tree

1643

in less time than it takes LibXML just to parse.

1644

- XML::Bare can parse XML and create a tree

1645

in less time than all three binary parsers take

1646

just to parse.

1647

- XML::Fast is theoretically faster at parsing than

1648

the default 'full' mode of XML::Bare. Despite that,

1649

the 'simple' mode of XML::Bare is even faster.

1650

1651

Note that the executable parsers are not perl modules

1652

and are timed using dummy programs that just uses the

1653

library to load and parse the example files. The

1654

executables are not included with this program. Any

1655

source modifications used to generate the shown test

1656

results can be found in the bench/src directory of

1657

the distribution

1658

1659

=head1 LICENSE

1660

1661

1662

1663

This program is free software; you can redistribute it and/or

1664

modify it under the terms of the GNU General Public License as

1665

published by the Free Software Foundation; either version 2 of the

1666

License, or (at your option) any later version. You may also can

1667

redistribute it and/or modify it under the terms of the Perl

1668

Artistic License.

1669

1670

This program is distributed in the hope that it will be useful,

1671

but WITHOUT ANY WARRANTY; without even the implied warranty of

1672

MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

1673

GNU General Public License for more details.

1674

1675

=cut

Older »