#!perl

require 5.006;
use strict;
use warnings;

use Lingua::JA::Sort::JIS qw(kcmp);

use Unicode::CharName qw(uname);

my $output = "element.txt";

print "output collation elements to $output...\n";

open FH, ">$output";
print FH << '';
#  COLLATION ELEMENTS for Lingua::JA::Sort::JIS
#
#   based on Collation of Japanese Character String <JIS X 4061-1996>
#
# * This list is <<privately>> prepared for Lingua::JA::Sort::JIS.pm.
#   This is <<not included>> in the original JIS.
#
# * <CJK> characters are just before GETA MARK.
#
#   Here is based on the basic kanji class of levels 1 and 2 JIS kanji.
#

my %order = (
   Lingua::JA::Sort::JIS::getorder(),
   Lingua::JA::Sort::JIS::getkanji(),
);


for(sort { kcmp([ $order{$a} ], [ $order{$b} ], 5) } keys %order){
    my($u,$v,$m,$n,$name,$ucs,$x,$y);
    ($u,$v) = unpack 'UU', $_;
    $n = uname($u);
    $m = defined $v ? uname($v) : '';
    $x = "U+".sprintf("%04X", $u);
    $y = defined $v ? 'U+'.sprintf("%04X", $v) : '';
    if($n =~ /CJK/){
      $name = '<CJK>';
      $ucs  = $x;
    }
    elsif($m && $n =~ /ITERATION MARK|PROLONGED SOUND/){
      $name = $n . " replaced by " . $m;
      $ucs  = "$x($y)";
    }
    elsif($m =~ s/SEMI-VOICED SOUND MARK//){
      ($m = (split(' ',$n))[-1]) =~ tr/H/P/;
      $name = "halfwidth form of KATAKANA LETTER $m";
      $ucs  = "$x:$y";
    }
    elsif($m =~ s/VOICED SOUND MARK//){
      ($m = (split(' ',$n))[-1]) =~ tr/KSTH/GZDB/;
      $m = 'VU' if $m eq 'U';
      $name = "halfwidth form of KATAKANA LETTER $m";
      $ucs  = "$x:$y";
    }
    else {
      $name = $n;
      $ucs  = $x;
    }
    print FH
        sprintf("[%-15s] ", join(", ", @{ $order{$_} })),
        "$ucs\t$name\n";
}
__END__
