| File | /usr/share/perl5/MARC/Charset/Table.pm |
| Statements Executed | 32 |
| Total Time | 0.0012879 seconds |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 1 | 1 | 1 | 81µs | 95µs | MARC::Charset::Table::_init |
| 1 | 1 | 1 | 22µs | 117µs | MARC::Charset::Table::new |
| 1 | 1 | 1 | 14µs | 14µs | MARC::Charset::Table::db_path |
| 0 | 0 | 0 | 0s | 0s | MARC::Charset::Table::BEGIN |
| 0 | 0 | 0 | 0s | 0s | MARC::Charset::Table::add_code |
| 0 | 0 | 0 | 0s | 0s | MARC::Charset::Table::brand_new |
| 0 | 0 | 0 | 0s | 0s | MARC::Charset::Table::db |
| 0 | 0 | 0 | 0s | 0s | MARC::Charset::Table::get_code |
| 0 | 0 | 0 | 0s | 0s | MARC::Charset::Table::lookup_by_marc8 |
| 0 | 0 | 0 | 0s | 0s | MARC::Charset::Table::lookup_by_utf8 |
| Line | Stmts. | Exclusive Time | Avg. | Code |
|---|---|---|---|---|
| 1 | package MARC::Charset::Table; | |||
| 2 | ||||
| 3 | =head1 NAME | |||
| 4 | ||||
| 5 | MARC::Charset::Table - character mapping db | |||
| 6 | ||||
| 7 | =head1 SYNOPSIS | |||
| 8 | ||||
| 9 | use MARC::Charset::Table; | |||
| 10 | use MARC::Charset::Constants qw(:all); | |||
| 11 | ||||
| 12 | # create the table object | |||
| 13 | my $table = MARC::Charset::Table->new(); | |||
| 14 | ||||
| 15 | # get a code using the marc8 character set code and the character | |||
| 16 | my $code = $table->lookup_by_marc8(CYRILLIC_BASIC, 'K'); | |||
| 17 | ||||
| 18 | # get a code using the utf8 value | |||
| 19 | $code = $table->lookup_by_utf8(chr(0x043A)); | |||
| 20 | ||||
| 21 | =head1 DESCRIPTION | |||
| 22 | ||||
| 23 | MARC::Charset::Table is a wrapper around the character mapping database, | |||
| 24 | which is implemented as a tied hash on disk. This database gets generated | |||
| 25 | by Makefile.PL on installation of MARC::Charset using | |||
| 26 | MARC::Charset::Compiler. | |||
| 27 | ||||
| 28 | The database is essentially a key/value mapping where a key is a | |||
| 29 | MARC-8 character set code + a MARC-8 character, or an integer representing the | |||
| 30 | UCS code point. These keys map to a serialized MARC::Charset::Code object. | |||
| 31 | ||||
| 32 | =cut | |||
| 33 | ||||
| 34 | 3 | 33µs | 11µs | use strict; # spent 11µs making 1 call to strict::import |
| 35 | 3 | 30µs | 10µs | use warnings; # spent 25µs making 1 call to warnings::import |
| 36 | 3 | 132µs | 44µs | use POSIX; # spent 7.29ms making 1 call to POSIX::import |
| 37 | 3 | 144µs | 48µs | use SDBM_File; # spent 6µs making 1 call to import |
| 38 | 3 | 138µs | 46µs | use MARC::Charset::Code; # spent 7µs making 1 call to import |
| 39 | 3 | 34µs | 11µs | use MARC::Charset::Constants qw(:all); # spent 244µs making 1 call to Exporter::import |
| 40 | 3 | 660µs | 220µs | use Storable qw(freeze thaw); # spent 88µs making 1 call to Exporter::import |
| 41 | ||||
| 42 | =head2 new() | |||
| 43 | ||||
| 44 | The consturctor. | |||
| 45 | ||||
| 46 | =cut | |||
| 47 | ||||
| 48 | sub new | |||
| 49 | # spent 117µs (22+95) within MARC::Charset::Table::new which was called
# once (22µs+95µs) at line 44 of /usr/share/perl5/MARC/Charset.pm | |||
| 50 | 1 | 1µs | 1µs | my $class = shift; |
| 51 | 1 | 10µs | 10µs | my $self = bless {}, ref($class) || $class; |
| 52 | 1 | 7µs | 7µs | $self->_init(O_RDONLY); # spent 95µs making 1 call to MARC::Charset::Table::_init |
| 53 | 1 | 1µs | 1µs | return $self; |
| 54 | } | |||
| 55 | ||||
| 56 | ||||
| 57 | =head2 add_code() | |||
| 58 | ||||
| 59 | Add a MARC::Charset::Code to the table. | |||
| 60 | ||||
| 61 | =cut | |||
| 62 | ||||
| 63 | ||||
| 64 | sub add_code | |||
| 65 | { | |||
| 66 | my ($self, $code) = @_; | |||
| 67 | ||||
| 68 | # the Code object is serialized | |||
| 69 | my $frozen = freeze($code); | |||
| 70 | ||||
| 71 | # to support lookup by marc8 and utf8 values we | |||
| 72 | # stash away the rule in the db using two keys | |||
| 73 | my $marc8_key = $code->marc8_hash_code(); | |||
| 74 | my $utf8_key = $code->utf8_hash_code(); | |||
| 75 | ||||
| 76 | # stash away the marc8 lookup key | |||
| 77 | $self->{db}->{$marc8_key} = $frozen; | |||
| 78 | ||||
| 79 | # stash away the utf8 lookup key (only if it's not already there!) | |||
| 80 | # this means that the sets that appear in the xml file will have | |||
| 81 | # precedence ascii/ansel | |||
| 82 | $self->{db}->{$utf8_key} = $frozen unless exists $self->{db}->{$utf8_key}; | |||
| 83 | } | |||
| 84 | ||||
| 85 | ||||
| 86 | =head2 get_code() | |||
| 87 | ||||
| 88 | Retrieve a code using a hash key. | |||
| 89 | ||||
| 90 | =cut | |||
| 91 | ||||
| 92 | sub get_code | |||
| 93 | { | |||
| 94 | my ($self, $key) = @_; | |||
| 95 | my $db = $self->db(); | |||
| 96 | my $frozen = $db->{$key}; | |||
| 97 | return thaw($frozen) if $frozen; | |||
| 98 | return undef; | |||
| 99 | } | |||
| 100 | ||||
| 101 | ||||
| 102 | =head2 lookup_by_marc8() | |||
| 103 | ||||
| 104 | Looks up MARC::Charset::Code entry using a character set code and a MARC-8 | |||
| 105 | value. | |||
| 106 | ||||
| 107 | use MARC::Charset::Constants qw(HEBREW); | |||
| 108 | $code = $table->lookup_by_marc8(HEBREW, chr(0x60)); | |||
| 109 | ||||
| 110 | =cut | |||
| 111 | ||||
| 112 | sub lookup_by_marc8 | |||
| 113 | { | |||
| 114 | my ($self, $charset, $marc8) = @_; | |||
| 115 | $charset = BASIC_LATIN if $charset eq ASCII_DEFAULT; | |||
| 116 | return $self->get_code(sprintf('%s:%s', $charset, $marc8)); | |||
| 117 | } | |||
| 118 | ||||
| 119 | ||||
| 120 | =head2 lookup_by_utf8() | |||
| 121 | ||||
| 122 | Looks up a MARC::Charset::Code object using a utf8 value. | |||
| 123 | ||||
| 124 | =cut | |||
| 125 | ||||
| 126 | sub lookup_by_utf8 | |||
| 127 | { | |||
| 128 | my ($self, $value) = @_; | |||
| 129 | return $self->get_code(ord($value)); | |||
| 130 | } | |||
| 131 | ||||
| 132 | ||||
| 133 | ||||
| 134 | ||||
| 135 | =head2 db() | |||
| 136 | ||||
| 137 | Returns a reference to a tied character database. MARC::Charset::Table | |||
| 138 | wraps access to the db, but you can get at it if you want. | |||
| 139 | ||||
| 140 | =cut | |||
| 141 | ||||
| 142 | sub db | |||
| 143 | { | |||
| 144 | return shift->{db}; | |||
| 145 | } | |||
| 146 | ||||
| 147 | ||||
| 148 | =head2 db_path() | |||
| 149 | ||||
| 150 | Returns the path to the character encoding database. Can be called | |||
| 151 | statically too: | |||
| 152 | ||||
| 153 | print MARC::Charset::Table->db_path(); | |||
| 154 | ||||
| 155 | =cut | |||
| 156 | ||||
| 157 | sub db_path | |||
| 158 | # spent 14µs within MARC::Charset::Table::db_path which was called
# once (14µs+0s) by MARC::Charset::Table::_init at line 187 | |||
| 159 | 1 | 1µs | 1µs | my $path = $INC{'MARC/Charset/Table.pm'}; |
| 160 | 1 | 4µs | 4µs | $path =~ s/\.pm$//; |
| 161 | 1 | 2µs | 2µs | return $path; |
| 162 | } | |||
| 163 | ||||
| 164 | ||||
| 165 | =head2 brand_new() | |||
| 166 | ||||
| 167 | An alternate constructor which removes the existing database and starts | |||
| 168 | afresh. Be careful with this one, it's really only used on MARC::Charset | |||
| 169 | installation. | |||
| 170 | ||||
| 171 | =cut | |||
| 172 | ||||
| 173 | sub brand_new | |||
| 174 | { | |||
| 175 | my $class = shift; | |||
| 176 | my $self = bless {}, ref($class) || $class; | |||
| 177 | $self->_init(O_CREAT|O_RDWR); | |||
| 178 | return $self; | |||
| 179 | } | |||
| 180 | ||||
| 181 | ||||
| 182 | # helper function for initializing table internals | |||
| 183 | ||||
| 184 | sub _init | |||
| 185 | # spent 95µs (81+14) within MARC::Charset::Table::_init which was called
# once (81µs+14µs) by MARC::Charset::Table::new at line 52 | |||
| 186 | 1 | 1µs | 1µs | my ($self,$opts) = @_; |
| 187 | 1 | 73µs | 73µs | tie my %db, 'SDBM_File', db_path(), $opts, 0644; # spent 14µs making 1 call to MARC::Charset::Table::db_path |
| 188 | 1 | 9µs | 9µs | $self->{db} = \%db; |
| 189 | } | |||
| 190 | ||||
| 191 | ||||
| 192 | ||||
| 193 | ||||
| 194 | ||||
| 195 | 1 | 8µs | 8µs | 1; |