| File | /usr/share/perl5/MARC/File/XML.pm |
| Statements Executed | 52 |
| Total Time | 0.0028299 seconds |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 1 | 1 | 1 | 20µs | 119µs | MARC::File::XML::import |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::BEGIN |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::DESTROY |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::_next |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::_unimarc_encoding |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::close |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::decideMARC8Binary |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::decode |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::default_record_format |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::encode |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::escape |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::footer |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::header |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::out |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::record |
| 0 | 0 | 0 | 0s | 0s | MARC::File::XML::write |
| 0 | 0 | 0 | 0s | 0s | MARC::Record::as_xml |
| 0 | 0 | 0 | 0s | 0s | MARC::Record::as_xml_record |
| 0 | 0 | 0 | 0s | 0s | MARC::Record::new_from_xml |
| Line | Stmts. | Exclusive Time | Avg. | Code |
|---|---|---|---|---|
| 1 | package MARC::File::XML; | |||
| 2 | ||||
| 3 | 3 | 30µs | 10µs | use warnings; # spent 28µs making 1 call to warnings::import |
| 4 | 3 | 41µs | 14µs | use strict; # spent 9µs making 1 call to strict::import |
| 5 | 3 | 39µs | 13µs | use vars qw( $VERSION %_load_args ); # spent 47µs making 1 call to vars::import |
| 6 | 3 | 36µs | 12µs | use base qw( MARC::File ); # spent 1.19ms making 1 call to base::import |
| 7 | 3 | 140µs | 47µs | use MARC::Record; # spent 48µs making 1 call to Exporter::import |
| 8 | 3 | 28µs | 9µs | use MARC::Field; # spent 4µs making 1 call to import |
| 9 | 3 | 147µs | 49µs | use MARC::File::SAX; # spent 4µs making 1 call to import |
| 10 | 3 | 34µs | 11µs | use XML::SAX qw(Namespaces Validation); # spent 47µs making 1 call to Exporter::import |
| 11 | ||||
| 12 | 3 | 25µs | 8µs | use MARC::Charset qw( marc8_to_utf8 utf8_to_marc8 ); # spent 37µs making 1 call to Exporter::import |
| 13 | 3 | 38µs | 13µs | use IO::File; # spent 175µs making 1 call to Exporter::import |
| 14 | 3 | 32µs | 11µs | use Carp qw( croak ); # spent 34µs making 1 call to Exporter::import |
| 15 | 3 | 2.09ms | 696µs | use Encode (); |
| 16 | ||||
| 17 | 1 | 900ns | 900ns | $VERSION = '0.88'; |
| 18 | ||||
| 19 | 1 | 16µs | 16µs | my $handler = MARC::File::SAX->new(); # spent 121µs making 1 call to XML::SAX::Base::new |
| 20 | ||||
| 21 | 1 | 9µs | 9µs | my $factory = XML::SAX::ParserFactory->new(); # spent 591µs making 1 call to XML::SAX::ParserFactory::new |
| 22 | 1 | 8µs | 8µs | $factory->require_feature(Namespaces); # spent 12µs making 1 call to XML::SAX::ParserFactory::require_feature |
| 23 | ||||
| 24 | 1 | 9µs | 9µs | my $parser = $factory->parser( Handler => $handler, ProtocolEncoding => 'UTF-8' ); # spent 25.2ms making 1 call to XML::SAX::ParserFactory::parser |
| 25 | ||||
| 26 | # spent 119µs (20+99) within MARC::File::XML::import which was called
# once (20µs+99µs) at line 9 of /home/tamil/util/marc-moose/t/test-parsing | |||
| 27 | 5 | 19µs | 4µs | my $class = shift; |
| 28 | %_load_args = @_; | |||
| 29 | $_load_args{ DefaultEncoding } ||= 'UTF-8'; | |||
| 30 | $_load_args{ RecordFormat } ||= 'USMARC'; | |||
| 31 | ||||
| 32 | $parser = $factory->parser( Handler => $handler, ProtocolEncoding => $_load_args{DefaultEncoding} ); # spent 99µs making 1 call to XML::SAX::ParserFactory::parser | |||
| 33 | } | |||
| 34 | ||||
| 35 | =head1 NAME | |||
| 36 | ||||
| 37 | MARC::File::XML - Work with MARC data encoded as XML | |||
| 38 | ||||
| 39 | =head1 SYNOPSIS | |||
| 40 | ||||
| 41 | ## Loading with USE options | |||
| 42 | use MARC::File::XML ( BinaryEncoding => 'utf8', RecordFormat => 'UNIMARC' ); | |||
| 43 | ||||
| 44 | ## Setting the record format without USE options | |||
| 45 | MARC::File::XML->default_record_format('USMARC'); | |||
| 46 | ||||
| 47 | ## reading with MARC::Batch | |||
| 48 | my $batch = MARC::Batch->new( 'XML', $filename ); | |||
| 49 | my $record = $batch->next(); | |||
| 50 | ||||
| 51 | ## or reading with MARC::File::XML explicitly | |||
| 52 | my $file = MARC::File::XML->in( $filename ); | |||
| 53 | my $record = $file->next(); | |||
| 54 | ||||
| 55 | ## serialize a single MARC::Record object as XML | |||
| 56 | print $record->as_xml(); | |||
| 57 | ||||
| 58 | ## write a bunch of records to a file | |||
| 59 | my $file = MARC::File::XML->out( 'myfile.xml' ); | |||
| 60 | $file->write( $record1 ); | |||
| 61 | $file->write( $record2 ); | |||
| 62 | $file->write( $record3 ); | |||
| 63 | $file->close(); | |||
| 64 | ||||
| 65 | ## instead of writing to disk, get the xml directly | |||
| 66 | my $xml = join( "\n", | |||
| 67 | MARC::File::XML::header(), | |||
| 68 | MARC::File::XML::record( $record1 ), | |||
| 69 | MARC::File::XML::record( $record2 ), | |||
| 70 | MARC::File::XML::footer() | |||
| 71 | ); | |||
| 72 | ||||
| 73 | =head1 DESCRIPTION | |||
| 74 | ||||
| 75 | The MARC-XML distribution is an extension to the MARC-Record distribution for | |||
| 76 | working with MARC21 data that is encoded as XML. The XML encoding used is the | |||
| 77 | MARC21slim schema supplied by the Library of Congress. More information may | |||
| 78 | be obtained here: http://www.loc.gov/standards/marcxml/ | |||
| 79 | ||||
| 80 | You must have MARC::Record installed to use MARC::File::XML. In fact | |||
| 81 | once you install the MARC-XML distribution you will most likely not use it | |||
| 82 | directly, but will have an additional file format available to you when you | |||
| 83 | use MARC::Batch. | |||
| 84 | ||||
| 85 | This version of MARC-XML supersedes an the versions ending with 0.25 which | |||
| 86 | were used with the MARC.pm framework. MARC-XML now uses MARC::Record | |||
| 87 | exclusively. | |||
| 88 | ||||
| 89 | If you have any questions or would like to contribute to this module please | |||
| 90 | sign on to the perl4lib list. More information about perl4lib is available | |||
| 91 | at L<http://perl4lib.perl.org>. | |||
| 92 | ||||
| 93 | =head1 METHODS | |||
| 94 | ||||
| 95 | When you use MARC::File::XML your MARC::Record objects will have two new | |||
| 96 | additional methods available to them: | |||
| 97 | ||||
| 98 | =head2 MARC::File::XML->default_record_format([$format]) | |||
| 99 | ||||
| 100 | Sets or returns the default record format used by MARC::File::XML. Valid | |||
| 101 | formats are B<MARC21>, B<USMARC>, B<UNIMARC> and B<UNIMARCAUTH>. | |||
| 102 | ||||
| 103 | MARC::File::XML->default_record_format('UNIMARC'); | |||
| 104 | ||||
| 105 | =cut | |||
| 106 | ||||
| 107 | sub default_record_format { | |||
| 108 | my $self = shift; | |||
| 109 | my $format = shift; | |||
| 110 | ||||
| 111 | $_load_args{RecordFormat} = $format if ($format); | |||
| 112 | ||||
| 113 | return $_load_args{RecordFormat}; | |||
| 114 | } | |||
| 115 | ||||
| 116 | ||||
| 117 | =head2 as_xml() | |||
| 118 | ||||
| 119 | Returns a MARC::Record object serialized in XML. You can pass an optional format | |||
| 120 | parameter to tell MARC::File::XML what type of record (USMARC, UNIMARC, UNIMARCAUTH) you are | |||
| 121 | serializing. | |||
| 122 | ||||
| 123 | print $record->as_xml([$format]); | |||
| 124 | ||||
| 125 | =cut | |||
| 126 | ||||
| 127 | sub MARC::Record::as_xml { | |||
| 128 | my $record = shift; | |||
| 129 | my $format = shift || $_load_args{RecordFormat}; | |||
| 130 | return( MARC::File::XML::encode( $record, $format ) ); | |||
| 131 | } | |||
| 132 | ||||
| 133 | =head2 as_xml_record([$format]) | |||
| 134 | ||||
| 135 | Returns a MARC::Record object serialized in XML without a collection wrapper. | |||
| 136 | You can pass an optional format parameter to tell MARC::File::XML what type of | |||
| 137 | record (USMARC, UNIMARC, UNIMARCAUTH) you are serializing. | |||
| 138 | ||||
| 139 | print $record->as_xml_record('UNIMARC'); | |||
| 140 | ||||
| 141 | =cut | |||
| 142 | ||||
| 143 | sub MARC::Record::as_xml_record { | |||
| 144 | my $record = shift; | |||
| 145 | my $format = shift || $_load_args{RecordFormat}; | |||
| 146 | return( MARC::File::XML::encode( $record, $format, 1 ) ); | |||
| 147 | } | |||
| 148 | ||||
| 149 | =head2 new_from_xml([$encoding, $format]) | |||
| 150 | ||||
| 151 | If you have a chunk of XML and you want a record object for it you can use | |||
| 152 | this method to generate a MARC::Record object. You can pass an optional | |||
| 153 | encoding parameter to specify which encoding (UTF-8 or MARC-8) you would like | |||
| 154 | the resulting record to be in. You can also pass a format parameter to specify | |||
| 155 | the source record type, such as UNIMARC, UNIMARCAUTH, USMARC or MARC21. | |||
| 156 | ||||
| 157 | my $record = MARC::Record->new_from_xml( $xml, $encoding, $format ); | |||
| 158 | ||||
| 159 | Note: only works for single record XML chunks. | |||
| 160 | ||||
| 161 | =cut | |||
| 162 | ||||
| 163 | sub MARC::Record::new_from_xml { | |||
| 164 | my $xml = shift; | |||
| 165 | ## to allow calling as MARC::Record::new_from_xml() | |||
| 166 | ## or MARC::Record->new_from_xml() | |||
| 167 | $xml = shift if ( ref($xml) || ($xml eq "MARC::Record") ); | |||
| 168 | ||||
| 169 | my $enc = shift || $_load_args{BinaryEncoding}; | |||
| 170 | my $format = shift || $_load_args{RecordFormat}; | |||
| 171 | return( MARC::File::XML::decode( $xml, $enc, $format ) ); | |||
| 172 | } | |||
| 173 | ||||
| 174 | =pod | |||
| 175 | ||||
| 176 | If you want to write records as XML to a file you can use out() with write() | |||
| 177 | to serialize more than one record as XML. | |||
| 178 | ||||
| 179 | =head2 out() | |||
| 180 | ||||
| 181 | A constructor for creating a MARC::File::XML object that can write XML to a | |||
| 182 | file. You must pass in the name of a file to write XML to. If the $encoding | |||
| 183 | parameter or the DefaultEncoding (see above) is set to UTF-8 then the binmode | |||
| 184 | of the output file will be set appropriately. | |||
| 185 | ||||
| 186 | my $file = MARC::File::XML->out( $filename [, $encoding] ); | |||
| 187 | ||||
| 188 | =cut | |||
| 189 | ||||
| 190 | sub out { | |||
| 191 | my ( $class, $filename, $enc ) = @_; | |||
| 192 | my $fh = IO::File->new( ">$filename" ) or croak( $! ); | |||
| 193 | $enc ||= $_load_args{DefaultEncoding}; | |||
| 194 | ||||
| 195 | if ($enc =~ /^utf-?8$/oi) { | |||
| 196 | $fh->binmode(':utf8'); | |||
| 197 | } else { | |||
| 198 | $fh->binmode(':raw'); | |||
| 199 | } | |||
| 200 | ||||
| 201 | my %self = ( | |||
| 202 | filename => $filename, | |||
| 203 | fh => $fh, | |||
| 204 | header => 0, | |||
| 205 | encoding => $enc | |||
| 206 | ); | |||
| 207 | return( bless \%self, ref( $class ) || $class ); | |||
| 208 | } | |||
| 209 | ||||
| 210 | =head2 write() | |||
| 211 | ||||
| 212 | Used in tandem with out() to write records to a file. | |||
| 213 | ||||
| 214 | my $file = MARC::File::XML->out( $filename ); | |||
| 215 | $file->write( $record1 ); | |||
| 216 | $file->write( $record2 ); | |||
| 217 | ||||
| 218 | =cut | |||
| 219 | ||||
| 220 | sub write { | |||
| 221 | my ( $self, $record, $enc ) = @_; | |||
| 222 | if ( ! $self->{ fh } ) { | |||
| 223 | croak( "MARC::File::XML object not open for writing" ); | |||
| 224 | } | |||
| 225 | if ( ! $record ) { | |||
| 226 | croak( "must pass write() a MARC::Record object" ); | |||
| 227 | } | |||
| 228 | ## print the XML header if we haven't already | |||
| 229 | if ( ! $self->{ header } ) { | |||
| 230 | $enc ||= $self->{ encoding } || $_load_args{DefaultEncoding}; | |||
| 231 | $self->{ fh }->print( header( $enc ) ); | |||
| 232 | $self->{ header } = 1; | |||
| 233 | } | |||
| 234 | ## print out the record | |||
| 235 | $self->{ fh }->print( record( $record ) ) || croak( $! ); | |||
| 236 | return( 1 ); | |||
| 237 | } | |||
| 238 | ||||
| 239 | =head2 close() | |||
| 240 | ||||
| 241 | When writing records to disk the filehandle is automatically closed when you | |||
| 242 | the MARC::File::XML object goes out of scope. If you want to close it explicitly | |||
| 243 | use the close() method. | |||
| 244 | ||||
| 245 | =cut | |||
| 246 | ||||
| 247 | sub close { | |||
| 248 | my $self = shift; | |||
| 249 | if ( $self->{ fh } ) { | |||
| 250 | $self->{ fh }->print( footer() ) if $self->{ header }; | |||
| 251 | $self->{ fh } = undef; | |||
| 252 | $self->{ filename } = undef; | |||
| 253 | $self->{ header } = undef; | |||
| 254 | } | |||
| 255 | return( 1 ); | |||
| 256 | } | |||
| 257 | ||||
| 258 | ## makes sure that the XML file is closed off | |||
| 259 | ||||
| 260 | sub DESTROY { | |||
| 261 | shift->close(); | |||
| 262 | } | |||
| 263 | ||||
| 264 | =pod | |||
| 265 | ||||
| 266 | If you want to generate batches of records as XML, but don't want to write to | |||
| 267 | disk you'll have to use header(), record() and footer() to generate the | |||
| 268 | different portions. | |||
| 269 | ||||
| 270 | $xml = join( "\n", | |||
| 271 | MARC::File::XML::header(), | |||
| 272 | MARC::File::XML::record( $record1 ), | |||
| 273 | MARC::File::XML::record( $record2 ), | |||
| 274 | MARC::File::XML::record( $record3 ), | |||
| 275 | MARC::File::XML::footer() | |||
| 276 | ); | |||
| 277 | ||||
| 278 | =head2 header() | |||
| 279 | ||||
| 280 | Returns a string of XML to use as the header to your XML file. | |||
| 281 | ||||
| 282 | =cut | |||
| 283 | ||||
| 284 | sub header { | |||
| 285 | my $enc = shift; | |||
| 286 | $enc = shift if ( $enc && (ref($enc) || ($enc eq "MARC::File::XML")) ); | |||
| 287 | $enc ||= 'UTF-8'; | |||
| 288 | return( <<MARC_XML_HEADER ); | |||
| 289 | <?xml version="1.0" encoding="$enc"?> | |||
| 290 | <collection | |||
| 291 | xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | |||
| 292 | xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd" | |||
| 293 | xmlns="http://www.loc.gov/MARC21/slim"> | |||
| 294 | MARC_XML_HEADER | |||
| 295 | } | |||
| 296 | ||||
| 297 | =head2 footer() | |||
| 298 | ||||
| 299 | Returns a string of XML to use at the end of your XML file. | |||
| 300 | ||||
| 301 | =cut | |||
| 302 | ||||
| 303 | sub footer { | |||
| 304 | return( "</collection>" ); | |||
| 305 | } | |||
| 306 | ||||
| 307 | =head2 record() | |||
| 308 | ||||
| 309 | Returns a chunk of XML suitable for placement between the header and the footer. | |||
| 310 | ||||
| 311 | =cut | |||
| 312 | ||||
| 313 | sub record { | |||
| 314 | my $record = shift; | |||
| 315 | my $format = shift; | |||
| 316 | my $without_header = shift; | |||
| 317 | my $enc = shift; | |||
| 318 | ||||
| 319 | $format ||= $_load_args{RecordFormat}; | |||
| 320 | ||||
| 321 | my $_transcode = 0; | |||
| 322 | my $ldr = $record->leader; | |||
| 323 | my $original_encoding = substr($ldr,9,1); | |||
| 324 | ||||
| 325 | # Does the record think it is already Unicode? | |||
| 326 | if ($original_encoding ne 'a' && lc($format) !~ /^unimarc/o) { | |||
| 327 | # If not, we'll make it so | |||
| 328 | $_transcode++; | |||
| 329 | } | |||
| 330 | ||||
| 331 | my @xml = (); | |||
| 332 | ||||
| 333 | if ($without_header) { | |||
| 334 | push @xml, <<HEADER | |||
| 335 | <?xml version="1.0" encoding="$enc"?> | |||
| 336 | <record | |||
| 337 | xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | |||
| 338 | xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/ standards/marcxml/schema/MARC21slim.xsd" | |||
| 339 | xmlns="http://www.loc.gov/MARC21/slim"> | |||
| 340 | HEADER | |||
| 341 | ||||
| 342 | } else { | |||
| 343 | push( @xml, "<record>" ); | |||
| 344 | } | |||
| 345 | ||||
| 346 | push( @xml, " <leader>" . escape( $record->leader ) . "</leader>" ); | |||
| 347 | ||||
| 348 | foreach my $field ( $record->fields() ) { | |||
| 349 | my $tag = $field->tag(); | |||
| 350 | if ( $field->is_control_field() ) { | |||
| 351 | my $data = $field->data; | |||
| 352 | push( @xml, qq( <controlfield tag="$tag">) . | |||
| 353 | escape( ($_transcode ? marc8_to_utf8($data) : $data) ). qq(</controlfield>) ); | |||
| 354 | } else { | |||
| 355 | my $i1 = $field->indicator( 1 ); | |||
| 356 | my $i2 = $field->indicator( 2 ); | |||
| 357 | push( @xml, qq( <datafield tag="$tag" ind1="$i1" ind2="$i2">) ); | |||
| 358 | foreach my $subfield ( $field->subfields() ) { | |||
| 359 | my ( $code, $data ) = @$subfield; | |||
| 360 | push( @xml, qq( <subfield code="$code">). | |||
| 361 | escape( ($_transcode ? marc8_to_utf8($data) : $data) ).qq(</subfield>) ); | |||
| 362 | } | |||
| 363 | push( @xml, " </datafield>" ); | |||
| 364 | } | |||
| 365 | } | |||
| 366 | push( @xml, "</record>\n" ); | |||
| 367 | ||||
| 368 | if ($_transcode) { | |||
| 369 | substr($ldr,9,1,$original_encoding); | |||
| 370 | $record->leader( $ldr ); | |||
| 371 | } | |||
| 372 | ||||
| 373 | return( join( "\n", @xml ) ); | |||
| 374 | } | |||
| 375 | ||||
| 376 | 1 | 5µs | 5µs | my %ESCAPES = ( |
| 377 | '&' => '&', | |||
| 378 | '<' => '<', | |||
| 379 | '>' => '>', | |||
| 380 | ); | |||
| 381 | 3 | 5µs | 2µs | my $ESCAPE_REGEX = |
| 382 | eval 'qr/' . | |||
| 383 | 1 | 58µs | 58µs | join( '|', map { $_ = "\Q$_\E" } keys %ESCAPES ) . |
| 384 | '/;' | |||
| 385 | ; | |||
| 386 | ||||
| 387 | sub escape { | |||
| 388 | my $string = shift; | |||
| 389 | return '' if ! defined $string or $string eq ''; | |||
| 390 | $string =~ s/($ESCAPE_REGEX)/$ESCAPES{$1}/oge; | |||
| 391 | return( $string ); | |||
| 392 | } | |||
| 393 | ||||
| 394 | sub _next { | |||
| 395 | my $self = shift; | |||
| 396 | my $fh = $self->{ fh }; | |||
| 397 | ||||
| 398 | ## return undef at the end of the file | |||
| 399 | return if eof($fh); | |||
| 400 | ||||
| 401 | ## get a chunk of xml for a record | |||
| 402 | local $/ = '</record>'; | |||
| 403 | my $xml = <$fh>; | |||
| 404 | ||||
| 405 | ## trim stuff before the start record element | |||
| 406 | $xml =~ s/.*<record.*?>/<record>/s; | |||
| 407 | ||||
| 408 | ## return undef if there isn't a good chunk of xml | |||
| 409 | return if ( $xml !~ m|<record>.*</record>|s ); | |||
| 410 | ||||
| 411 | ## return the chunk of xml | |||
| 412 | return( $xml ); | |||
| 413 | } | |||
| 414 | ||||
| 415 | =head2 decode() | |||
| 416 | ||||
| 417 | You probably don't ever want to call this method directly. If you do | |||
| 418 | you should pass in a chunk of XML as the argument. | |||
| 419 | ||||
| 420 | It is normally invoked by a call to next(), see L<MARC::Batch> or L<MARC::File>. | |||
| 421 | ||||
| 422 | =cut | |||
| 423 | ||||
| 424 | sub decode { | |||
| 425 | ||||
| 426 | my $text; | |||
| 427 | my $location = ''; | |||
| 428 | my $self = shift; | |||
| 429 | ||||
| 430 | ## see MARC::File::USMARC::decode for explanation of what's going on | |||
| 431 | ## here | |||
| 432 | if ( ref($self) =~ /^MARC::File/ ) { | |||
| 433 | $location = 'in record '.$self->{recnum}; | |||
| 434 | $text = shift; | |||
| 435 | } else { | |||
| 436 | $location = 'in record 1'; | |||
| 437 | $text = $self=~/MARC::File/ ? shift : $self; | |||
| 438 | } | |||
| 439 | ||||
| 440 | my $enc = shift || $_load_args{BinaryEncoding}; | |||
| 441 | my $format = shift || $_load_args{RecordFormat}; | |||
| 442 | ||||
| 443 | $parser->{ tagStack } = []; | |||
| 444 | $parser->{ subfields } = []; | |||
| 445 | $parser->{ Handler }{ record } = MARC::Record->new(); | |||
| 446 | $parser->{ Handler }{ toMARC8 } = decideMARC8Binary($format,$enc); | |||
| 447 | ||||
| 448 | $parser->parse_string( $text ); | |||
| 449 | ||||
| 450 | return( $parser->{ Handler }{ record } ); | |||
| 451 | ||||
| 452 | } | |||
| 453 | ||||
| 454 | sub decideMARC8Binary { | |||
| 455 | my $format = shift; | |||
| 456 | my $enc = shift; | |||
| 457 | ||||
| 458 | return 0 if (defined($format) && lc($format) =~ /^unimarc/o); | |||
| 459 | return 0 if (defined($enc) && lc($enc) =~ /^utf-?8/o); | |||
| 460 | return 1; | |||
| 461 | } | |||
| 462 | ||||
| 463 | ||||
| 464 | =head2 encode() | |||
| 465 | ||||
| 466 | You probably want to use the as_xml() method on your MARC::Record object | |||
| 467 | instead of calling this directly. But if you want to you just need to | |||
| 468 | pass in the MARC::Record object you wish to encode as XML, and you will be | |||
| 469 | returned the XML as a scalar. | |||
| 470 | ||||
| 471 | =cut | |||
| 472 | ||||
| 473 | sub encode { | |||
| 474 | my $record = shift; | |||
| 475 | my $format = shift || $_load_args{RecordFormat}; | |||
| 476 | my $without_header = shift; | |||
| 477 | my $enc = shift || $_load_args{DefaultEncoding}; | |||
| 478 | ||||
| 479 | if (lc($format) =~ /^unimarc/o) { | |||
| 480 | $enc = _unimarc_encoding( $format => $record ); | |||
| 481 | } | |||
| 482 | ||||
| 483 | my @xml = (); | |||
| 484 | push( @xml, header( $enc ) ) unless ($without_header); | |||
| 485 | push( @xml, record( $record, $format, $without_header, $enc ) ); | |||
| 486 | push( @xml, footer() ) unless ($without_header); | |||
| 487 | ||||
| 488 | return( join( "\n", @xml ) ); | |||
| 489 | } | |||
| 490 | ||||
| 491 | sub _unimarc_encoding { | |||
| 492 | my $f = shift; | |||
| 493 | my $r = shift; | |||
| 494 | ||||
| 495 | my $pos = 26; | |||
| 496 | $pos = 13 if (lc($f) eq 'unimarcauth'); | |||
| 497 | ||||
| 498 | my $enc = substr( $r->subfield(100 => 'a'), $pos, 2 ); | |||
| 499 | ||||
| 500 | if ($enc eq '01' || $enc eq '03') { | |||
| 501 | return 'ISO-8859-1'; | |||
| 502 | } elsif ($enc eq '50') { | |||
| 503 | return 'UTF-8'; | |||
| 504 | } else { | |||
| 505 | die "Unsupported UNIMARC character encoding [$enc] for XML output for $f; 100$a -> " . $r->subfield(100 => 'a'); | |||
| 506 | } | |||
| 507 | } | |||
| 508 | ||||
| 509 | =head1 TODO | |||
| 510 | ||||
| 511 | =over 4 | |||
| 512 | ||||
| 513 | =item * Support for callback filters in decode(). | |||
| 514 | ||||
| 515 | =item * Command line utilities marc2xml, etc. | |||
| 516 | ||||
| 517 | =back | |||
| 518 | ||||
| 519 | =head1 SEE ALSO | |||
| 520 | ||||
| 521 | =over 4 | |||
| 522 | ||||
| 523 | =item L<http://www.loc.gov/standards/marcxml/> | |||
| 524 | ||||
| 525 | =item L<MARC::File::USMARC> | |||
| 526 | ||||
| 527 | =item L<MARC::Batch> | |||
| 528 | ||||
| 529 | =item L<MARC::Record> | |||
| 530 | ||||
| 531 | =back | |||
| 532 | ||||
| 533 | =head1 AUTHORS | |||
| 534 | ||||
| 535 | =over 4 | |||
| 536 | ||||
| 537 | =item * Ed Summers <ehs@pobox.com> | |||
| 538 | ||||
| 539 | =back | |||
| 540 | ||||
| 541 | =cut | |||
| 542 | ||||
| 543 | 1 | 20µs | 20µs | 1; |