1
package MARC::File::SAX;
5
MARC::File::SAX - SAX handler for parsing MARCXML
11
use base qw( XML::SAX::Base );
14
use MARC::Charset qw(utf8_to_marc8);
25
return bless {records => []}, ref($class) || $class;
30
Get all the MARC::Records that were parsed out of the XML.
35
return shift->{records};
40
In some contexts you might only expect there to be one record parsed. This
41
is a shorthand for getting it.
46
return shift->{records}[0];
50
my ( $self, $element ) = @_;
51
my $name = $element->{ LocalName };
52
if ( $name eq 'record' ) {
53
$self->{ record } = MARC::Record->new();
54
} elsif ( $name eq 'collection' ) {
55
# ignore collection wrappers
56
} elsif ( defined $self->{ record } ) {
57
if ( $name eq 'leader' ) {
58
$self->{ tag } = 'LDR';
59
} elsif ( $name eq 'controlfield' ) {
60
$self->{ tag } = $element->{ Attributes }{ '{}tag' }{ Value };
61
} elsif ( $name eq 'datafield' ) {
62
$self->{ tag } = $element->{ Attributes }{ '{}tag' }{ Value };
63
$self->{ i1 } = $element->{ Attributes }{ '{}ind1' }{ Value };
64
$self->{ i2 } = $element->{ Attributes }{ '{}ind2' }{ Value };
65
} elsif ( $name eq 'subfield' ) {
66
$self->{ subcode } = $element->{ Attributes }{ '{}code' }{ Value };
69
# we've reached a new element but haven't started populating
70
# a MARC::Record yet. This either means that we've encountered
71
# some non-MARC21slim stuff or the caller's given us an invalid
72
# doc that doesn't include a <record> element.
73
# In the first case, we'll just ignore the element; in the second
74
# case, we'll thow an exception with a better description.
76
# TODO: to be more consistent with how MARC::File::USMARC handles
77
# parse errors, rather than throwing an exception we could
78
# instantiate an empty MARC::Record and set its warnings
81
if ( $name eq 'leader' || $name eq 'controlfield' || $name eq 'datafield' || $name eq 'subfield' ) {
82
croak("found MARCXML element $name, but the <record> wrapper is missing");
88
my ( $self, $element ) = @_;
89
my $name = $element->{ LocalName };
90
if ( $name eq 'subfield' ) {
91
push @{ $self->{ subfields } }, $self->{ subcode };
93
if ($self->{ transcode }) {
94
push @{ $self->{ subfields } }, utf8_to_marc8($self->{ chars });
96
push @{ $self->{ subfields } }, $self->{ chars } ;
99
$self->{ chars } = '';
100
$self->{ subcode } = '';
101
} elsif ( $name eq 'controlfield' ) {
102
$self->{ record }->append_fields(
103
MARC::Field->new( $self->{ tag }, $self->{ chars } )
105
$self->{ chars } = '';
107
} elsif ( $name eq 'datafield' ) {
108
$self->{ record }->append_fields(
113
@{ $self->{ subfields } }
119
$self->{ subfields } = [];
120
$self->{ chars } = '';
121
} elsif ( $name eq 'leader' ) {
122
my $ldr = $self->{ chars };
124
$self->{ transcode }++
125
if (substr($ldr,9,1) eq 'a' and $self->{toMARC8});
127
substr($ldr,9,1,' ') if ($self->{ transcode });
129
$self->{ record }->leader( $ldr );
130
$self->{ chars } = '';
132
} elsif ( $name eq 'record' ) {
133
push(@{ $self->{ records } }, $self->{ record });
134
undef $self->{ record };
139
my ( $self, $chars ) = @_;
141
( exists $self->{ subcode } && $self->{ subcode } ne '')
142
|| ( $self->{ tag } && ( $self->{ tag } eq 'LDR' || $self->{ tag } < 10 ))
144
$self->{ chars } .= $chars->{ Data };