| File | /usr/lib/perl/5.10/Unicode/Normalize.pm |
| Statements Executed | 29 |
| Total Time | 0.0009176 seconds |
| Calls | P | F | Exclusive Time |
Inclusive Time |
Subroutine |
|---|---|---|---|---|---|
| 2 | 1 | 2 | 114µs | 114µs | Unicode::Normalize::bootstrap(xsub) |
| 0 | 0 | 0 | 0s | 0s | Unicode::Normalize::BEGIN |
| 0 | 0 | 0 | 0s | 0s | Unicode::Normalize::FCD |
| 0 | 0 | 0 | 0s | 0s | Unicode::Normalize::check |
| 0 | 0 | 0 | 0s | 0s | Unicode::Normalize::normalize |
| 0 | 0 | 0 | 0s | 0s | Unicode::Normalize::pack_U |
| 0 | 0 | 0 | 0s | 0s | Unicode::Normalize::unpack_U |
| Line | Stmts. | Exclusive Time | Avg. | Code |
|---|---|---|---|---|
| 1 | package Unicode::Normalize; | |||
| 2 | ||||
| 3 | BEGIN { | |||
| 4 | 1 | 18µs | 18µs | unless ("A" eq pack('U', 0x41)) { |
| 5 | die "Unicode::Normalize cannot stringify a Unicode code point\n"; | |||
| 6 | } | |||
| 7 | 1 | 24µs | 24µs | } |
| 8 | ||||
| 9 | 3 | 85µs | 28µs | use 5.006; |
| 10 | 3 | 22µs | 7µs | use strict; # spent 7µs making 1 call to strict::import |
| 11 | 3 | 25µs | 8µs | use warnings; # spent 23µs making 1 call to warnings::import |
| 12 | 3 | 37µs | 12µs | use Carp; # spent 58µs making 1 call to Exporter::import |
| 13 | ||||
| 14 | 3 | 575µs | 192µs | no warnings 'utf8'; # spent 24µs making 1 call to warnings::unimport |
| 15 | ||||
| 16 | 1 | 900ns | 900ns | our $VERSION = '1.02'; |
| 17 | 1 | 400ns | 400ns | our $PACKAGE = __PACKAGE__; |
| 18 | ||||
| 19 | 1 | 800ns | 800ns | require Exporter; |
| 20 | 1 | 600ns | 600ns | require DynaLoader; |
| 21 | ||||
| 22 | 1 | 12µs | 12µs | our @ISA = qw(Exporter DynaLoader); |
| 23 | 1 | 2µs | 2µs | our @EXPORT = qw( NFC NFD NFKC NFKD ); |
| 24 | 1 | 8µs | 8µs | our @EXPORT_OK = qw( |
| 25 | normalize decompose reorder compose | |||
| 26 | checkNFD checkNFKD checkNFC checkNFKC check | |||
| 27 | getCanon getCompat getComposite getCombinClass | |||
| 28 | isExclusion isSingleton isNonStDecomp isComp2nd isComp_Ex | |||
| 29 | isNFD_NO isNFC_NO isNFC_MAYBE isNFKD_NO isNFKC_NO isNFKC_MAYBE | |||
| 30 | FCD checkFCD FCC checkFCC composeContiguous | |||
| 31 | splitOnLastStarter | |||
| 32 | ); | |||
| 33 | 1 | 17µs | 17µs | our %EXPORT_TAGS = ( |
| 34 | all => [ @EXPORT, @EXPORT_OK ], | |||
| 35 | normalize => [ @EXPORT, qw/normalize decompose reorder compose/ ], | |||
| 36 | check => [ qw/checkNFD checkNFKD checkNFC checkNFKC check/ ], | |||
| 37 | fast => [ qw/FCD checkFCD FCC checkFCC composeContiguous/ ], | |||
| 38 | ); | |||
| 39 | ||||
| 40 | ###### | |||
| 41 | ||||
| 42 | 1 | 16µs | 16µs | bootstrap Unicode::Normalize $VERSION; # spent 934µs making 1 call to DynaLoader::bootstrap |
| 43 | ||||
| 44 | ###### | |||
| 45 | ||||
| 46 | ## | |||
| 47 | ## utilites for tests | |||
| 48 | ## | |||
| 49 | ||||
| 50 | sub pack_U { | |||
| 51 | return pack('U*', @_); | |||
| 52 | } | |||
| 53 | ||||
| 54 | sub unpack_U { | |||
| 55 | return unpack('U*', shift(@_).pack('U*')); | |||
| 56 | } | |||
| 57 | ||||
| 58 | ||||
| 59 | ## | |||
| 60 | ## normalization forms | |||
| 61 | ## | |||
| 62 | ||||
| 63 | sub FCD ($) { | |||
| 64 | my $str = shift; | |||
| 65 | return checkFCD($str) ? $str : NFD($str); | |||
| 66 | } | |||
| 67 | ||||
| 68 | 1 | 18µs | 18µs | our %formNorm = ( |
| 69 | NFC => \&NFC, C => \&NFC, | |||
| 70 | NFD => \&NFD, D => \&NFD, | |||
| 71 | NFKC => \&NFKC, KC => \&NFKC, | |||
| 72 | NFKD => \&NFKD, KD => \&NFKD, | |||
| 73 | FCD => \&FCD, FCC => \&FCC, | |||
| 74 | ); | |||
| 75 | ||||
| 76 | sub normalize($$) | |||
| 77 | { | |||
| 78 | my $form = shift; | |||
| 79 | my $str = shift; | |||
| 80 | if (exists $formNorm{$form}) { | |||
| 81 | return $formNorm{$form}->($str); | |||
| 82 | } | |||
| 83 | croak($PACKAGE."::normalize: invalid form name: $form"); | |||
| 84 | } | |||
| 85 | ||||
| 86 | ||||
| 87 | ## | |||
| 88 | ## quick check | |||
| 89 | ## | |||
| 90 | ||||
| 91 | 1 | 10µs | 10µs | our %formCheck = ( |
| 92 | NFC => \&checkNFC, C => \&checkNFC, | |||
| 93 | NFD => \&checkNFD, D => \&checkNFD, | |||
| 94 | NFKC => \&checkNFKC, KC => \&checkNFKC, | |||
| 95 | NFKD => \&checkNFKD, KD => \&checkNFKD, | |||
| 96 | FCD => \&checkFCD, FCC => \&checkFCC, | |||
| 97 | ); | |||
| 98 | ||||
| 99 | sub check($$) | |||
| 100 | { | |||
| 101 | my $form = shift; | |||
| 102 | my $str = shift; | |||
| 103 | if (exists $formCheck{$form}) { | |||
| 104 | return $formCheck{$form}->($str); | |||
| 105 | } | |||
| 106 | croak($PACKAGE."::check: invalid form name: $form"); | |||
| 107 | } | |||
| 108 | ||||
| 109 | 1 | 45µs | 45µs | 1; |
| 110 | __END__ | |||
| 111 | ||||
| 112 | =head1 NAME | |||
| 113 | ||||
| 114 | Unicode::Normalize - Unicode Normalization Forms | |||
| 115 | ||||
| 116 | =head1 SYNOPSIS | |||
| 117 | ||||
| 118 | (1) using function names exported by default: | |||
| 119 | ||||
| 120 | use Unicode::Normalize; | |||
| 121 | ||||
| 122 | $NFD_string = NFD($string); # Normalization Form D | |||
| 123 | $NFC_string = NFC($string); # Normalization Form C | |||
| 124 | $NFKD_string = NFKD($string); # Normalization Form KD | |||
| 125 | $NFKC_string = NFKC($string); # Normalization Form KC | |||
| 126 | ||||
| 127 | (2) using function names exported on request: | |||
| 128 | ||||
| 129 | use Unicode::Normalize 'normalize'; | |||
| 130 | ||||
| 131 | $NFD_string = normalize('D', $string); # Normalization Form D | |||
| 132 | $NFC_string = normalize('C', $string); # Normalization Form C | |||
| 133 | $NFKD_string = normalize('KD', $string); # Normalization Form KD | |||
| 134 | $NFKC_string = normalize('KC', $string); # Normalization Form KC | |||
| 135 | ||||
| 136 | =head1 DESCRIPTION | |||
| 137 | ||||
| 138 | Parameters: | |||
| 139 | ||||
| 140 | C<$string> is used as a string under character semantics (see F<perlunicode>). | |||
| 141 | ||||
| 142 | C<$code_point> should be an unsigned integer representing a Unicode code point. | |||
| 143 | ||||
| 144 | Note: Between XSUB and pure Perl, there is an incompatibility | |||
| 145 | about the interpretation of C<$code_point> as a decimal number. | |||
| 146 | XSUB converts C<$code_point> to an unsigned integer, but pure Perl does not. | |||
| 147 | Do not use a floating point nor a negative sign in C<$code_point>. | |||
| 148 | ||||
| 149 | =head2 Normalization Forms | |||
| 150 | ||||
| 151 | =over 4 | |||
| 152 | ||||
| 153 | =item C<$NFD_string = NFD($string)> | |||
| 154 | ||||
| 155 | It returns the Normalization Form D (formed by canonical decomposition). | |||
| 156 | ||||
| 157 | =item C<$NFC_string = NFC($string)> | |||
| 158 | ||||
| 159 | It returns the Normalization Form C (formed by canonical decomposition | |||
| 160 | followed by canonical composition). | |||
| 161 | ||||
| 162 | =item C<$NFKD_string = NFKD($string)> | |||
| 163 | ||||
| 164 | It returns the Normalization Form KD (formed by compatibility decomposition). | |||
| 165 | ||||
| 166 | =item C<$NFKC_string = NFKC($string)> | |||
| 167 | ||||
| 168 | It returns the Normalization Form KC (formed by compatibility decomposition | |||
| 169 | followed by B<canonical> composition). | |||
| 170 | ||||
| 171 | =item C<$FCD_string = FCD($string)> | |||
| 172 | ||||
| 173 | If the given string is in FCD ("Fast C or D" form; cf. UTN #5), | |||
| 174 | it returns the string without modification; otherwise it returns an FCD string. | |||
| 175 | ||||
| 176 | Note: FCD is not always unique, then plural forms may be equivalent | |||
| 177 | each other. C<FCD()> will return one of these equivalent forms. | |||
| 178 | ||||
| 179 | =item C<$FCC_string = FCC($string)> | |||
| 180 | ||||
| 181 | It returns the FCC form ("Fast C Contiguous"; cf. UTN #5). | |||
| 182 | ||||
| 183 | Note: FCC is unique, as well as four normalization forms (NF*). | |||
| 184 | ||||
| 185 | =item C<$normalized_string = normalize($form_name, $string)> | |||
| 186 | ||||
| 187 | It returns the normalization form of C<$form_name>. | |||
| 188 | ||||
| 189 | As C<$form_name>, one of the following names must be given. | |||
| 190 | ||||
| 191 | 'C' or 'NFC' for Normalization Form C (UAX #15) | |||
| 192 | 'D' or 'NFD' for Normalization Form D (UAX #15) | |||
| 193 | 'KC' or 'NFKC' for Normalization Form KC (UAX #15) | |||
| 194 | 'KD' or 'NFKD' for Normalization Form KD (UAX #15) | |||
| 195 | ||||
| 196 | 'FCD' for "Fast C or D" Form (UTN #5) | |||
| 197 | 'FCC' for "Fast C Contiguous" (UTN #5) | |||
| 198 | ||||
| 199 | =back | |||
| 200 | ||||
| 201 | =head2 Decomposition and Composition | |||
| 202 | ||||
| 203 | =over 4 | |||
| 204 | ||||
| 205 | =item C<$decomposed_string = decompose($string [, $useCompatMapping])> | |||
| 206 | ||||
| 207 | It returns the concatenation of the decomposition of each character | |||
| 208 | in the string. | |||
| 209 | ||||
| 210 | If the second parameter (a boolean) is omitted or false, | |||
| 211 | the decomposition is canonical decomposition; | |||
| 212 | if the second parameter (a boolean) is true, | |||
| 213 | the decomposition is compatibility decomposition. | |||
| 214 | ||||
| 215 | The string returned is not always in NFD/NFKD. Reordering may be required. | |||
| 216 | ||||
| 217 | $NFD_string = reorder(decompose($string)); # eq. to NFD() | |||
| 218 | $NFKD_string = reorder(decompose($string, TRUE)); # eq. to NFKD() | |||
| 219 | ||||
| 220 | =item C<$reordered_string = reorder($string)> | |||
| 221 | ||||
| 222 | It returns the result of reordering the combining characters | |||
| 223 | according to Canonical Ordering Behavior. | |||
| 224 | ||||
| 225 | For example, when you have a list of NFD/NFKD strings, | |||
| 226 | you can get the concatenated NFD/NFKD string from them, by saying | |||
| 227 | ||||
| 228 | $concat_NFD = reorder(join '', @NFD_strings); | |||
| 229 | $concat_NFKD = reorder(join '', @NFKD_strings); | |||
| 230 | ||||
| 231 | =item C<$composed_string = compose($string)> | |||
| 232 | ||||
| 233 | It returns the result of canonical composition | |||
| 234 | without applying any decomposition. | |||
| 235 | ||||
| 236 | For example, when you have a NFD/NFKD string, | |||
| 237 | you can get its NFC/NFKC string, by saying | |||
| 238 | ||||
| 239 | $NFC_string = compose($NFD_string); | |||
| 240 | $NFKC_string = compose($NFKD_string); | |||
| 241 | ||||
| 242 | =back | |||
| 243 | ||||
| 244 | =head2 Quick Check | |||
| 245 | ||||
| 246 | (see Annex 8, UAX #15; and F<DerivedNormalizationProps.txt>) | |||
| 247 | ||||
| 248 | The following functions check whether the string is in that normalization form. | |||
| 249 | ||||
| 250 | The result returned will be one of the following: | |||
| 251 | ||||
| 252 | YES The string is in that normalization form. | |||
| 253 | NO The string is not in that normalization form. | |||
| 254 | MAYBE Dubious. Maybe yes, maybe no. | |||
| 255 | ||||
| 256 | =over 4 | |||
| 257 | ||||
| 258 | =item C<$result = checkNFD($string)> | |||
| 259 | ||||
| 260 | It returns true (C<1>) if C<YES>; false (C<empty string>) if C<NO>. | |||
| 261 | ||||
| 262 | =item C<$result = checkNFC($string)> | |||
| 263 | ||||
| 264 | It returns true (C<1>) if C<YES>; false (C<empty string>) if C<NO>; | |||
| 265 | C<undef> if C<MAYBE>. | |||
| 266 | ||||
| 267 | =item C<$result = checkNFKD($string)> | |||
| 268 | ||||
| 269 | It returns true (C<1>) if C<YES>; false (C<empty string>) if C<NO>. | |||
| 270 | ||||
| 271 | =item C<$result = checkNFKC($string)> | |||
| 272 | ||||
| 273 | It returns true (C<1>) if C<YES>; false (C<empty string>) if C<NO>; | |||
| 274 | C<undef> if C<MAYBE>. | |||
| 275 | ||||
| 276 | =item C<$result = checkFCD($string)> | |||
| 277 | ||||
| 278 | It returns true (C<1>) if C<YES>; false (C<empty string>) if C<NO>. | |||
| 279 | ||||
| 280 | =item C<$result = checkFCC($string)> | |||
| 281 | ||||
| 282 | It returns true (C<1>) if C<YES>; false (C<empty string>) if C<NO>; | |||
| 283 | C<undef> if C<MAYBE>. | |||
| 284 | ||||
| 285 | Note: If a string is not in FCD, it must not be in FCC. | |||
| 286 | So C<checkFCC($not_FCD_string)> should return C<NO>. | |||
| 287 | ||||
| 288 | =item C<$result = check($form_name, $string)> | |||
| 289 | ||||
| 290 | It returns true (C<1>) if C<YES>; false (C<empty string>) if C<NO>; | |||
| 291 | C<undef> if C<MAYBE>. | |||
| 292 | ||||
| 293 | As C<$form_name>, one of the following names must be given. | |||
| 294 | ||||
| 295 | 'C' or 'NFC' for Normalization Form C (UAX #15) | |||
| 296 | 'D' or 'NFD' for Normalization Form D (UAX #15) | |||
| 297 | 'KC' or 'NFKC' for Normalization Form KC (UAX #15) | |||
| 298 | 'KD' or 'NFKD' for Normalization Form KD (UAX #15) | |||
| 299 | ||||
| 300 | 'FCD' for "Fast C or D" Form (UTN #5) | |||
| 301 | 'FCC' for "Fast C Contiguous" (UTN #5) | |||
| 302 | ||||
| 303 | =back | |||
| 304 | ||||
| 305 | B<Note> | |||
| 306 | ||||
| 307 | In the cases of NFD, NFKD, and FCD, the answer must be | |||
| 308 | either C<YES> or C<NO>. The answer C<MAYBE> may be returned | |||
| 309 | in the cases of NFC, NFKC, and FCC. | |||
| 310 | ||||
| 311 | A C<MAYBE> string should contain at least one combining character | |||
| 312 | or the like. For example, C<COMBINING ACUTE ACCENT> has | |||
| 313 | the MAYBE_NFC/MAYBE_NFKC property. | |||
| 314 | ||||
| 315 | Both C<checkNFC("A\N{COMBINING ACUTE ACCENT}")> | |||
| 316 | and C<checkNFC("B\N{COMBINING ACUTE ACCENT}")> will return C<MAYBE>. | |||
| 317 | C<"A\N{COMBINING ACUTE ACCENT}"> is not in NFC | |||
| 318 | (its NFC is C<"\N{LATIN CAPITAL LETTER A WITH ACUTE}">), | |||
| 319 | while C<"B\N{COMBINING ACUTE ACCENT}"> is in NFC. | |||
| 320 | ||||
| 321 | If you want to check exactly, compare the string with its NFC/NFKC/FCC. | |||
| 322 | ||||
| 323 | if ($string eq NFC($string)) { | |||
| 324 | # $string is exactly normalized in NFC; | |||
| 325 | } else { | |||
| 326 | # $string is not normalized in NFC; | |||
| 327 | } | |||
| 328 | ||||
| 329 | if ($string eq NFKC($string)) { | |||
| 330 | # $string is exactly normalized in NFKC; | |||
| 331 | } else { | |||
| 332 | # $string is not normalized in NFKC; | |||
| 333 | } | |||
| 334 | ||||
| 335 | =head2 Character Data | |||
| 336 | ||||
| 337 | These functions are interface of character data used internally. | |||
| 338 | If you want only to get Unicode normalization forms, you don't need | |||
| 339 | call them yourself. | |||
| 340 | ||||
| 341 | =over 4 | |||
| 342 | ||||
| 343 | =item C<$canonical_decomposition = getCanon($code_point)> | |||
| 344 | ||||
| 345 | If the character is canonically decomposable (including Hangul Syllables), | |||
| 346 | it returns the (full) canonical decomposition as a string. | |||
| 347 | Otherwise it returns C<undef>. | |||
| 348 | ||||
| 349 | B<Note:> According to the Unicode standard, the canonical decomposition | |||
| 350 | of the character that is not canonically decomposable is same as | |||
| 351 | the character itself. | |||
| 352 | ||||
| 353 | =item C<$compatibility_decomposition = getCompat($code_point)> | |||
| 354 | ||||
| 355 | If the character is compatibility decomposable (including Hangul Syllables), | |||
| 356 | it returns the (full) compatibility decomposition as a string. | |||
| 357 | Otherwise it returns C<undef>. | |||
| 358 | ||||
| 359 | B<Note:> According to the Unicode standard, the compatibility decomposition | |||
| 360 | of the character that is not compatibility decomposable is same as | |||
| 361 | the character itself. | |||
| 362 | ||||
| 363 | =item C<$code_point_composite = getComposite($code_point_here, $code_point_next)> | |||
| 364 | ||||
| 365 | If two characters here and next (as code points) are composable | |||
| 366 | (including Hangul Jamo/Syllables and Composition Exclusions), | |||
| 367 | it returns the code point of the composite. | |||
| 368 | ||||
| 369 | If they are not composable, it returns C<undef>. | |||
| 370 | ||||
| 371 | =item C<$combining_class = getCombinClass($code_point)> | |||
| 372 | ||||
| 373 | It returns the combining class (as an integer) of the character. | |||
| 374 | ||||
| 375 | =item C<$may_be_composed_with_prev_char = isComp2nd($code_point)> | |||
| 376 | ||||
| 377 | It returns a boolean whether the character of the specified codepoint | |||
| 378 | may be composed with the previous one in a certain composition | |||
| 379 | (including Hangul Compositions, but excluding | |||
| 380 | Composition Exclusions and Non-Starter Decompositions). | |||
| 381 | ||||
| 382 | =item C<$is_exclusion = isExclusion($code_point)> | |||
| 383 | ||||
| 384 | It returns a boolean whether the code point is a composition exclusion. | |||
| 385 | ||||
| 386 | =item C<$is_singleton = isSingleton($code_point)> | |||
| 387 | ||||
| 388 | It returns a boolean whether the code point is a singleton | |||
| 389 | ||||
| 390 | =item C<$is_non_starter_decomposition = isNonStDecomp($code_point)> | |||
| 391 | ||||
| 392 | It returns a boolean whether the code point has Non-Starter Decomposition. | |||
| 393 | ||||
| 394 | =item C<$is_Full_Composition_Exclusion = isComp_Ex($code_point)> | |||
| 395 | ||||
| 396 | It returns a boolean of the derived property Comp_Ex | |||
| 397 | (Full_Composition_Exclusion). This property is generated from | |||
| 398 | Composition Exclusions + Singletons + Non-Starter Decompositions. | |||
| 399 | ||||
| 400 | =item C<$NFD_is_NO = isNFD_NO($code_point)> | |||
| 401 | ||||
| 402 | It returns a boolean of the derived property NFD_NO | |||
| 403 | (NFD_Quick_Check=No). | |||
| 404 | ||||
| 405 | =item C<$NFC_is_NO = isNFC_NO($code_point)> | |||
| 406 | ||||
| 407 | It returns a boolean of the derived property NFC_NO | |||
| 408 | (NFC_Quick_Check=No). | |||
| 409 | ||||
| 410 | =item C<$NFC_is_MAYBE = isNFC_MAYBE($code_point)> | |||
| 411 | ||||
| 412 | It returns a boolean of the derived property NFC_MAYBE | |||
| 413 | (NFC_Quick_Check=Maybe). | |||
| 414 | ||||
| 415 | =item C<$NFKD_is_NO = isNFKD_NO($code_point)> | |||
| 416 | ||||
| 417 | It returns a boolean of the derived property NFKD_NO | |||
| 418 | (NFKD_Quick_Check=No). | |||
| 419 | ||||
| 420 | =item C<$NFKC_is_NO = isNFKC_NO($code_point)> | |||
| 421 | ||||
| 422 | It returns a boolean of the derived property NFKC_NO | |||
| 423 | (NFKC_Quick_Check=No). | |||
| 424 | ||||
| 425 | =item C<$NFKC_is_MAYBE = isNFKC_MAYBE($code_point)> | |||
| 426 | ||||
| 427 | It returns a boolean of the derived property NFKC_MAYBE | |||
| 428 | (NFKC_Quick_Check=Maybe). | |||
| 429 | ||||
| 430 | =back | |||
| 431 | ||||
| 432 | =head1 EXPORT | |||
| 433 | ||||
| 434 | C<NFC>, C<NFD>, C<NFKC>, C<NFKD>: by default. | |||
| 435 | ||||
| 436 | C<normalize> and other some functions: on request. | |||
| 437 | ||||
| 438 | =head1 CAVEATS | |||
| 439 | ||||
| 440 | =over 4 | |||
| 441 | ||||
| 442 | =item Perl's version vs. Unicode version | |||
| 443 | ||||
| 444 | Since this module refers to perl core's Unicode database in the directory | |||
| 445 | F</lib/unicore> (or formerly F</lib/unicode>), the Unicode version of | |||
| 446 | normalization implemented by this module depends on your perl's version. | |||
| 447 | ||||
| 448 | perl's version implemented Unicode version | |||
| 449 | 5.6.1 3.0.1 | |||
| 450 | 5.7.2 3.1.0 | |||
| 451 | 5.7.3 3.1.1 (normalization is same as 3.1.0) | |||
| 452 | 5.8.0 3.2.0 | |||
| 453 | 5.8.1-5.8.3 4.0.0 | |||
| 454 | 5.8.4-5.8.6 4.0.1 (normalization is same as 4.0.0) | |||
| 455 | 5.8.7-5.8.8 4.1.0 | |||
| 456 | ||||
| 457 | =item Correction of decomposition mapping | |||
| 458 | ||||
| 459 | In older Unicode versions, a small number of characters (all of which are | |||
| 460 | CJK compatibility ideographs as far as they have been found) may have | |||
| 461 | an erroneous decomposition mapping (see F<NormalizationCorrections.txt>). | |||
| 462 | Anyhow, this module will neither refer to F<NormalizationCorrections.txt> | |||
| 463 | nor provide any specific version of normalization. Therefore this module | |||
| 464 | running on an older perl with an older Unicode database may use | |||
| 465 | the erroneous decomposition mapping blindly conforming to the Unicode database. | |||
| 466 | ||||
| 467 | =item Revised definition of canonical composition | |||
| 468 | ||||
| 469 | In Unicode 4.1.0, the definition D2 of canonical composition (which | |||
| 470 | affects NFC and NFKC) has been changed (see Public Review Issue #29 | |||
| 471 | and recent UAX #15). This module has used the newer definition | |||
| 472 | since the version 0.07 (Oct 31, 2001). | |||
| 473 | This module will not support the normalization according to the older | |||
| 474 | definition, even if the Unicode version implemented by perl is | |||
| 475 | lower than 4.1.0. | |||
| 476 | ||||
| 477 | =back | |||
| 478 | ||||
| 479 | =head1 AUTHOR | |||
| 480 | ||||
| 481 | SADAHIRO Tomoyuki <SADAHIRO@cpan.org> | |||
| 482 | ||||
| 483 | Copyright(C) 2001-2007, SADAHIRO Tomoyuki. Japan. All rights reserved. | |||
| 484 | ||||
| 485 | This module is free software; you can redistribute it | |||
| 486 | and/or modify it under the same terms as Perl itself. | |||
| 487 | ||||
| 488 | =head1 SEE ALSO | |||
| 489 | ||||
| 490 | =over 4 | |||
| 491 | ||||
| 492 | =item http://www.unicode.org/reports/tr15/ | |||
| 493 | ||||
| 494 | Unicode Normalization Forms - UAX #15 | |||
| 495 | ||||
| 496 | =item http://www.unicode.org/Public/UNIDATA/CompositionExclusions.txt | |||
| 497 | ||||
| 498 | Composition Exclusion Table | |||
| 499 | ||||
| 500 | =item http://www.unicode.org/Public/UNIDATA/DerivedNormalizationProps.txt | |||
| 501 | ||||
| 502 | Derived Normalization Properties | |||
| 503 | ||||
| 504 | =item http://www.unicode.org/Public/UNIDATA/NormalizationCorrections.txt | |||
| 505 | ||||
| 506 | Normalization Corrections | |||
| 507 | ||||
| 508 | =item http://www.unicode.org/review/pr-29.html | |||
| 509 | ||||
| 510 | Public Review Issue #29: Normalization Issue | |||
| 511 | ||||
| 512 | =item http://www.unicode.org/notes/tn5/ | |||
| 513 | ||||
| 514 | Canonical Equivalence in Applications - UTN #5 | |||
| 515 | ||||
| 516 | =back | |||
| 517 | ||||
| 518 | =cut | |||
# spent 114µs within Unicode::Normalize::bootstrap which was called
# once (114µs+0s) by DynaLoader::bootstrap at line 219 of /usr/lib/perl/5.10/DynaLoader.pm |