
=head1 

ShiftJIS::Collate - Shift-JIS ̏ƍ

=head1 Tv

  use ShiftJIS::Collate;

  @sorted = ShiftJIS::Collate->new(%tailoring)->sort(@source);

=head1 ڍ

̃W[́AB<JIS X 4061:1996> i{ꕶƍԁj
ɊÂāAShift-JIS ƍA񂷂邽߂̊֐񋟂܂B

̃W[ JIS X 4061:1996 ̎̈łA
ƍḰALKiɊÂĂ܂B
n`ڂQƂ̂ƁB

=head2 RXgN^ƃe[O

C<new> \bh́AR[^EIuWFNgԋp܂B

   $Collator = ShiftJIS::Collate->new(
      ignoreChar => $regexIgnoredChar,
      kanji => $kanji_class,
      katakana_before_hiragana => $bool,
      level => $collationLevel,
      position_in_bytes => $bool,
      tounicode  => \&sjis_to_unicode,
      preprocess => \&preprocess,
      upper_before_lower => $bool,
   );
   #  %tailoring Ui󃊃XgjȂA
   # $Collator ̓ftHg̏ƍ܂B

=over 4

=item ignoreChar

^ꂽK\Ƀ}b`镶AƍɍۂĖ܂B

Ⴆ΁ALƂ̔p`𖳎ȂAȉ̂悤ɂ܂B

   ignoreChar => '^(?:\x81\x5B|\xB0)',

=item katakana_before_hiragana

ftHgł́A͑ΉЉȌɂȂ܂B

̃p[^^ȂȀtɂāAЉ𕽉̑Oɒu܂B

=item kanji

gp銿NXw肵܂BNXɂāAڍׂ͌qB

  Class 1: ŏNX
  Class 2: {NX
  Class 3: gNX

NX C<1>, C<2>, C<3> ̂ꂩ̐lŎw肵܂B
ȗꂽꍇAC<2> p܂B

̕NXł́AgNX̓T|[g܂B
ȂȂAShift-JIS ̃p[gɂ́AׂĂ CJK 
`ȂłB

ANXƂ C<3> w肵ꍇɂ́A
PёQ̊ JIS X 0221 ɕׂ邱Ƃ\ɂȂ܂B
̏ꍇAL<tounicode> R[ht@XƂāA
Shift-JIS  UCS ̕ʒuɑΉtTu[`^
Kv܂B

=item level

Kpő̏ƍxw肵܂BƍxɂāAڍׂ͌qB

  Level 1: {ƍKɂƍ
  Level 2: Pƍ܂ōlƍi̐j
  Level 3: Qƍ܂ōlƍȋ召j
  Level 4: Rƍ܂ōlƍiƕЉj
  Level 5: Sƍ܂ōlƍiSpƔpj

ƍx́AC<1>  C<5> ܂ł̂ꂩ̐lŎw肵܂B
ȗɂ́Alevel 4 Ƃ܂B

 ƍx C<0>  C<4> łȂāAP傫̂́A
JIS X 4061 猩ƁAIł͂܂B
UTS #10, ISO/IEC DIS 14651 QlɂĂ܂B
܂AJIS X 4061 ɂ́ASƍ̋K͂܂B
Sƍ́ÃW[ɂǉłB

=item tounicode

 UCS ɕׂƂAC<kanji>  C<3> ɂŁA
C<tounicode> p[^̒lƂāAShift-JIS ̊
UCS ̕ʒuԂTu[`̃t@Xw肵ĂB

̂悤ȃTu[`́AShift-JIS ŕꂽ
PEQ̊ij UCS ̕ʒu
iC<0x4E00>`C<0x9FFF> ͈̔͂̐jɕϊ̂łȂ΂Ȃ܂B

=item position_in_bytes

ftHgł́AC<index> \bh́Aʂ𕶎PʂŕԂ܂B

̃p[^^ɂƁAʂoCgPʂŕԂ悤ɂȂ܂B

=item preprocess

ɃR[ht@X^ƁA\[gL[쐬OɁA
ƍΏەA^ꂽR[ht@Xpĕϊ܂B
܂AR[ht@X̕Ԃl\[gL[쐬܂B

=item upper_before_lower

ftHgł́A͑啶ȌɂȂ܂B

̃p[^^̏ꍇȀtɂȂ܂B

=back

=head2 r

=over 4

=item C<$result = $Collator-E<gt>cmp($a, $b)>

Ԃl͈ȉ̂悤ɂȂ܂B

C<$a>  C<$b> 傫Ƃ C<1>A
C<$a>  C<$b> ɏƂ C<0>A
C<$a>  C<$b> 菬Ƃ C<-1>B

=item C<$result = $Collator-E<gt>eq($a, $b)>

=item C<$result = $Collator-E<gt>ne($a, $b)>

=item C<$result = $Collator-E<gt>gt($a, $b)>

=item C<$result = $Collator-E<gt>ge($a, $b)>

=item C<$result = $Collator-E<gt>lt($a, $b)>

=item C<$result = $Collator-E<gt>le($a, $b)>

́AƍKKpĔr邱ƂA
̉ZqƓlȋ@\܂B

=back

=head2 \[g

=over 4

=item C<$sortKey = $Collator-E<gt>getSortKey($string)>

\[gL[Ԃ܂B

\[gL[oCir邱ƂŁA
̔rʂ𓾂邱Ƃł܂B

   $Collator->getSortKey($a) cmp $Collator->getSortKey($b)

      ́AȉƓłB

   $Collator->cmp($a, $b)

=item C<@sorted = $Collator-E<gt>sort(@source)>

uPƍvɂĕ\[g܂B

=item C<@sorted = $Collator-E<gt>sortYomi(@source)>

(ǂݗ, \L) Ȃz񃊃t@X
uǂ݁E\Lƍvɂă\[g܂B

Ⴆ΁AC<@source> ̗vf́AC<['{', 'ɂق']> łB
̔΂ C<['ɂق', '{']> g܂A
(ǂݗ, \L) Ƃ͕ς܂B

uǂ݁E\Lƍv͂QiKŔr܂B

Ⴆ΁Aȉ̃Xguǂ݁E\LƍvŃ\[gƂ܂傤B

C<['ic', 'Ȃ']>, C<['R', '']>, C<['c', '']>,
C<['c', 'Ȃ']>, C<['R', '']>.

܂Aǂݗŏt܂B
(C<''> E<lt> C<''> E<lt> C<''> E<lt> C<'Ȃ'>);

ɁAǂݗ񂪓񓯎mŁA\Lt܂B
(ǂ݂ C<'Ȃ'> ̂̂ł́AC<'ic'> E<lt> C<'c'> ł)

]āAʂ́Aȉ̂悤ɂȂ܂F C<['c', '']> E<lt>
C<['R', '']> E<lt> C<['R', '']> E<lt>
C<['ic', 'Ȃ']> E<lt> C<['c', 'Ȃ']>.

F<sample/yomi.txt> QƂ̂ƁB

=item C<@sorted = $Collator-E<gt>sortDaihyo(@source)>

(ǂݗ, \L) Ȃz񃊃t@X
uȈՑ\ǂݏƍvɂă\[g܂B

F̃W[ł́u{\ǂݏƍv͎Ă܂B

uȈՑ\ǂݏƍv͂TiKŔr܂B

ɁAȈՑ\ǂݏƍŃ\[gꂽXg̗܂B

  ['Sʑ', '߂񂽂'],
  ['QF', 'ɂ傭'],
  ['S', '悶'],
  ['Uʑ', '낭߂񂽂'],
  ['', 'At@ق'],
  ['֐', 'K}񂷂'],
  ['',   'x[^'],
  ['pl',   'L['],
  ['ihr', ''],
  ['Perl',   'p['],
  ['͐',   ''],
  ['͍',   '킢'],
  ['͓c',   '킾'],
  ['͓',   '킿'],
  ['͕',   ''],
  ['pc',   ''],
  ['pc',   'ǂ'],
  ['֓',   'Ƃ'],
  ['͓',   ''],
  ['',   '킵'],
  ['',   '킵'],
  ['c',   '킾'],
  ['V',   '킵'],
  ['V',   '킵'],
  ['Vc',   '킾'],
  ['pc',   '̂'],
  ['y',   ''],
  ['y',   ''],
  ['y',   'ǂ'],
  ['y',   'ǂ'],

(1) \L̐擪̕̕NXrB

   ('Sʑ') < L ('') < e ('ihr') <  ('֓').

(2) ǂݗ̐擪rB

  e.g. '߂񂽂' < 'ɂ傭' < '悶' < '낭߂񂽂'.

(3) \L̐擪rB

  e.g. ('͐','͓c',etc.) < ('pc','pc') < ('֓');

       ('','','c') < ('V','V','Vc').

(4) ǂݗŜrB

  e.g. ['͐', ''] < ['͍', '킢'] < ['͓c', '킾'];

       ['pc', ''] < ['pc', 'ǂ'].

(5) \LŜrB

  e.g. ['', '킵'] < ['', '킵'].

F<sample/daihyo.txt> QƁB

=back

=head2 ̌

=over 4

=item C<$position = $Collator-E<gt>index($string, $substring)>

=item C<($position, $length) = $Collator-E<gt>index($string, $substring)>

C<$substring>  C<$string> ̈ꕔɃ}b`ꍇA
XJ[ReLXgł́A}b`镔̍ŏ̏oʒuԂ܂B
XgReLXgł́Aʒuƃ}b`̕񒷂Ȃ
iQvf́jXgԂ܂B

B<> }b`̒́AC<$substring> ̒ƈقȂ\܂B

C<$substring>  C<$string> ̂ǂɂ}b`ȂꍇA
XJ[ReLXgł C<-1> AXgReLXgł͋󃊃XgԂ܂B



  use ShiftJIS::Collate;
  use ShiftJIS::String qw(substr);

  my $Col = ShiftJIS::Collate->new( level => $level );
  my $str = "* Ђ炪ȂƃJ^Ji̓xRł͓ȁB";
  my $sub = "";
  my $match;
  if (my @tmp = $Col->index($str, $sub)) {
    $match = substr($str, $tmp[0], $tmp[1]);
  }

If C<$level> is 1, you get C<"">;
if C<$level> is 2 or 3, you get C<"Ji">;
if C<$level> is 4 or 5, you get C<"">.

 C<substr> ֐AShift-JIS ̕PʂŕȂȂA
C<position_in_bytes> ^ɂĂB

=back

=head1 NOTE

=head2 ƍx

ƍx́Aƍ肷܂ŁAɍl܂B

ftHgł́Ax 1  4 ܂ł]Ax 5 ͖܂
(JIS X 4061 ł̓x 5 ͋K肳Ă܂jB

=over 4

=item Level 1: ꕶɂ鏇

NX̏́Aȉ̒ʂB

    1   Xy[X
    2   LqL
    3   ʋL
    4   wpL
    5   ʋL
    6   PʋL
    7   ArA
    8   L
    9   eAt@xbg
   10   iƕЉj
   11   
   12   L

At@xbg̓At@xbgɁA͌܏\ɁA JIS X 0208 
_ɂȂB

L ('', 0x81AC, U+3013) ͂Ȃ镶傫iŌjB

`ĂȂAႦΐ䕶ArLA`Ȃǂ́A
ƍɍۂāASɖiȂ̂ƓjB

=item Level 2: ɂ鏇

ł́AAȀƂB
Ⴆ΁A'' < ''; '' < '' < '' ̏B

=item Level 3: ̑召ɂ鏇B

eł́ÁA啶ȌƂB

ł́ALAAJԂLA啶̏ƂB
Ⴆ΁A'[' < '' < 'T' < '' ̏B

=item Level 4: pnɂ鏇

́AЉȌƂBႦ '' < 'A' ̏B

=item Level 5: ɂ鏇

C<Halfwidth and Fullwidth Forms> ɑ镶́A
Ήʏ̏̕ƂB

ȂAJIS X 4061 ɂ̓x 5 ɑK͂܂B
Level 5 ́ÃW[ɂgłB

=back

=head2 NX

JIS X 4061 ł́AR̊NXK肳Ă܂B
̃W[́ÂAŏNXƊ{NX
T|[gĂ܂B

=over 4

=item ŏNX

ȉ̂T\܂F

    'V' (0x8156, U+3003)
    'W' (0x8157, U+4EDD)
    'X' (0x8158, U+3005)
    'Y' (0x8159, U+3006)
    'Z' (0x815A, U+3007)

'W' ȊO̊́Aƍ̍ۂɖ܂B

=item {NX

ŏNXɁAPёQ̊̂łB
PёQ̊̏́AJIS ̋_ʒȕɂȂ܂B
JIS X 0208 ɋK肳Ȃ́Aƍ̍ۂɖ܂B

=item gNX

ŏNXɁAׂĂ CJK ̂łB
CJK ̏́AUCS ̕ɂȂ܂B

=back

=head2 LƌJԂL̒u

        SJIS    UCS     

   '['    0x815B  U+30FC  KATAKANA-HIRAGANA PROLONGED SOUND MARK
   ''     0xB0    U+FF70  HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
   'T'    0x8154  U+309D  HIRAGANA ITERATION MARK
   'U'    0x8155  U+309E  HIRAGANA VOICED ITERATION MARK
   'R'    0x8152  U+30FD  KATAKANA ITERATION MARK
   'S'    0x8153  U+30FE  KATAKANA VOICED ITERATION MARK

=over 4

=item KATAKANA-HIRAGANA PROLONGED SOUND MARKs

Lip`܂ށj́AOɉ΁A
Ήꉹ܂͕@iuvjɒu܂B

   

  'J['    'JA'
  'с['    'уC'
  '['  'A'
  's['  'sE'
  '['    ''
  '['    ''

=item HIRAGANA- and KATAKANA ITERATION MARKs

JԂLij́AOɉ΁A
Ή鉼ijɒu܂B

   

  'T'    ''
  'hT'    'h'
  'T'    ''
  'JR'    'JJ'
  '΁R'    '΃n'
  'vR'    'vt'
  'BR'  'BC'
  'sT'  's'

=item HIRAGANA- and KATAKANA VOICED ITERATION MARKs

JԂLij́AOɉ΁A
Ή鉼ijɒu܂B

   

  '́U'  '͂'
  'vU'  'v'
  'vS'  'vu'
  'S'  'S'
  'ES'  'E'

=item ûȂꍇ

ȏ̏ꍇAu͋N܂B

ûȂꍇALьJԂĹA
̉̏ɂȂ܂B

ûȂꍇƂẮAႦ΁A
L̑O CJK ꍇA
JԂL̑OɐꍇA
'AS'i'A' ɂ͑͂܂j܂B

=item ƍ̗

ႦΏƍΏەup[v́AR̏ƍvf܂BȂ킿A
uЉ̃pvAuЉAŒuꂽLvAuЉvłB

    

   up[v͊ꕶupAvɒu܂B

     x 1 ł́u͂vɓB
     x 2 ł́uςvɓAu͂v傫B
     x 3 ł́uρ[vɓAupAvB
     x 4 ł́uρ[v傫B

=back

=head2 n` [JIS X 4061, 6.2]

  (1) ̕@F Shift-JIS.

  (2) ƍΏە̍őLF

  (3) NX̒ǉFȂ

  (4) ̒ǉ͈ȉ̒ʂłB

      IDEOGRAPHIC SPACE uXy[XvNXɒǉB

      ACUTE ACCENT, GRAVE ACCENT, DIAERESIS, CIRCUMFLEX ACCENT
      uLqLvNXɒǉB

      APOSTROPHE, QUOTATION MARK uʋLvNXɒǉB

      HYPHEN-MINUS uwpLvNXɒǉB

  (5) eAt@xbgɂ}NtyуT[JtbNXt
      ́AT|[g܂B

  (6) INXFŏNXъ{NX

=head1 

SADAHIRO Tomoyuki <SADAHIRO@cpan.org> A ms

  Copyright(C) 2001-2007, SADAHIRO Tomoyuki. Japan. All rights reserved.

  This module is free software; you can redistribute it
  and/or modify it under the same terms as Perl itself.

=head1 Ql

=over 4

=item JIS X 4061:1996

{ꕶƍ

=item JIS X 0201:1997

VrbgyтWrbg̏pW

=item JIS X 0208:1997

VrbgyтWrbĝQoCgpW

=item JIS X 0221:1995

ەW (UCS) \ P@̌nyъ{

=item {HƕW (JISC)

L<http://www.jisc.go.jp/>

=item {Ki (JSA)

L<http://www.jsa.or.jp/>

=item Unicode Collation Algorithm (UTS #10)

http://www.unicode.org/reports/tr10/

=item ISO/IEC DIS 14651

http://wwwold.dkuug.dk/jtc1/sc22/wg20/docs/projects#14651

=item L<ShiftJIS::String>

=item L<ShiftJIS::Regexp>

=back

=cut
