package Lingua::Han2PinYin::Light;

use strict;
use vars qw($VERSION @ISA @EXPORT);
use Exporter;
$VERSION = '0.02';
@ISA = qw(Exporter);
@EXPORT = qw(han2pinyin);

my %py = (
	45219 => 'a',
	45232 => 'ai',
	45241 => 'an',
	45244 => 'ang',
	45253 => 'ao',
	45271 => 'ba',
	45279 => 'bai',
	45294 => 'ban',
	45306 => 'bang',
	45485 => 'bao',
	45500 => 'bei',
	45504 => 'ben',
	45510 => 'beng',
	45534 => 'bi',
	45546 => 'bian',
	45550 => 'biao',
	45554 => 'bie',
	45560 => 'bin',
	45731 => 'bing',
	45752 => 'bo',
	45761 => 'bu',
	45762 => 'ca',
	45773 => 'cai',
	45780 => 'can',
	45785 => 'cang',
	45790 => 'cao',
	45795 => 'ce',
	45797 => 'ceng',
	45808 => 'cha',
	45811 => 'chai',
	45821 => 'chan',
	45996 => 'chang',
	46005 => 'chao',
	46011 => 'che',
	46021 => 'chen',
	46036 => 'cheng',
	46052 => 'chi',
	46057 => 'chong',
	46069 => 'chou',
	46247 => 'chu',
	46248 => 'chuai',
	46255 => 'chuan',
	46261 => 'chuang',
	46266 => 'chui',
	46273 => 'chun',
	46275 => 'chuo',
	46287 => 'ci',
	46293 => 'cong',
	46294 => 'cou',
	46298 => 'cu',
	46301 => 'cuan',
	46309 => 'cui',
	46312 => 'cun',
	46318 => 'cuo',
	46324 => 'da',
	46498 => 'dai',
	46513 => 'dan',
	46518 => 'dang',
	46530 => 'dao',
	46533 => 'de',
	46540 => 'deng',
	46559 => 'di',
	46575 => 'dian',
	46584 => 'diao',
	46753 => 'die',
	46762 => 'ding',
	46763 => 'diu',
	46773 => 'dong',
	46780 => 'dou',
	46795 => 'du',
	46801 => 'duan',
	46805 => 'dui',
	46814 => 'dun',
	46826 => 'duo',
	46839 => 'e',
	46840 => 'en',
	47010 => 'er',
	47018 => 'fa',
	47035 => 'fan',
	47046 => 'fang',
	47058 => 'fei',
	47073 => 'fen',
	47088 => 'feng',
	47089 => 'fo',
	47090 => 'fou',
	47297 => 'fu',
	47299 => 'ga',
	47305 => 'gai',
	47316 => 'gan',
	47325 => 'gang',
	47335 => 'gao',
	47352 => 'ge',
	47353 => 'gei',
	47355 => 'gen',
	47524 => 'geng',
	47539 => 'gong',
	47548 => 'gou',
	47566 => 'gu',
	47572 => 'gua',
	47575 => 'guai',
	47586 => 'guan',
	47589 => 'guang',
	47605 => 'gui',
	47608 => 'gun',
	47614 => 'guo',
	47777 => 'ha',
	47784 => 'hai',
	47803 => 'han',
	47806 => 'hang',
	47815 => 'hao',
	47833 => 'he',
	47835 => 'hei',
	47839 => 'hen',
	47844 => 'heng',
	47853 => 'hong',
	47860 => 'hou',
	48040 => 'hu',
	48049 => 'hua',
	48054 => 'huai',
	48068 => 'huan',
	48082 => 'huang',
	48103 => 'hui',
	48109 => 'hun',
	48119 => 'huo',
	48334 => 'ji',
	48351 => 'jia',
	48553 => 'jian',
	48566 => 'jiang',
	48594 => 'jiao',
	48621 => 'jie',
	48803 => 'jin',
	48828 => 'jing',
	48830 => 'jiong',
	48847 => 'jiu',
	48872 => 'ju',
	48879 => 'juan',
	48889 => 'jue',
	49062 => 'jun',
	49066 => 'ka',
	49071 => 'kai',
	49077 => 'kan',
	49084 => 'kang',
	49088 => 'kao',
	49103 => 'ke',
	49107 => 'ken',
	49109 => 'keng',
	49113 => 'kong',
	49117 => 'kou',
	49124 => 'ku',
	49129 => 'kua',
	49133 => 'kuai',
	49135 => 'kuan',
	49143 => 'kuang',
	49316 => 'kui',
	49320 => 'kun',
	49324 => 'kuo',
	49331 => 'la',
	49334 => 'lai',
	49349 => 'lan',
	49356 => 'lang',
	49365 => 'lao',
	49367 => 'le',
	49378 => 'lei',
	49381 => 'leng',
	49577 => 'li',
	49578 => 'lia',
	49592 => 'lian',
	49603 => 'liang',
	49616 => 'liao',
	49621 => 'lie',
	49633 => 'lin',
	49647 => 'ling',
	49658 => 'liu',
	49829 => 'long',
	49835 => 'lou',
	49855 => 'lu',
	49869 => 'lv',
	49875 => 'luan',
	49877 => 'lue',
	49884 => 'lun',
	49896 => 'luo',
	49905 => 'ma',
	49911 => 'mai',
	50082 => 'man',
	50088 => 'mang',
	50100 => 'mao',
	50101 => 'me',
	50117 => 'mei',
	50120 => 'men',
	50128 => 'meng',
	50142 => 'mi',
	50151 => 'mian',
	50159 => 'miao',
	50161 => 'mie',
	50167 => 'min',
	50173 => 'ming',
	50174 => 'miu',
	50353 => 'mo',
	50356 => 'mou',
	50371 => 'mu',
	50378 => 'na',
	50383 => 'nai',
	50386 => 'nan',
	50387 => 'nang',
	50392 => 'nao',
	50393 => 'ne',
	50395 => 'nei',
	50396 => 'nen',
	50397 => 'neng',
	50408 => 'ni',
	50415 => 'nian',
	50417 => 'niang',
	50419 => 'niao',
	50426 => 'nie',
	50427 => 'nin',
	50595 => 'ning',
	50599 => 'niu',
	50603 => 'nong',
	50606 => 'nu',
	50607 => 'nv',
	50608 => 'nuan',
	50610 => 'nue',
	50614 => 'nuo',
	50615 => 'o',
	50622 => 'ou',
	50628 => 'pa',
	50634 => 'pai',
	50642 => 'pan',
	50647 => 'pang',
	50654 => 'pao',
	50663 => 'pei',
	50665 => 'pen',
	50679 => 'peng',
	50858 => 'pi',
	50862 => 'pian',
	50866 => 'piao',
	50868 => 'pie',
	50873 => 'pin',
	50882 => 'ping',
	50891 => 'po',
	50906 => 'pu',
	50942 => 'qi',
	51107 => 'qia',
	51129 => 'qian',
	51137 => 'qiang',
	51152 => 'qiao',
	51157 => 'qie',
	51168 => 'qin',
	51181 => 'qing',
	51183 => 'qiong',
	51191 => 'qiu',
	51366 => 'qu',
	51377 => 'quan',
	51385 => 'que',
	51387 => 'qun',
	51391 => 'ran',
	51396 => 'rang',
	51399 => 'rao',
	51401 => 're',
	51411 => 'ren',
	51413 => 'reng',
	51414 => 'ri',
	51424 => 'rong',
	51427 => 'rou',
	51437 => 'ru',
	51439 => 'ruan',
	51442 => 'rui',
	51444 => 'run',
	51446 => 'ruo',
	51449 => 'sa',
	51453 => 'sai',
	51619 => 'san',
	51622 => 'sang',
	51626 => 'sao',
	51629 => 'se',
	51630 => 'sen',
	51631 => 'seng',
	51640 => 'sha',
	51642 => 'shai',
	51658 => 'shan',
	51666 => 'shang',
	51677 => 'shao',
	51689 => 'she',
	51705 => 'shen',
	51878 => 'sheng',
	51925 => 'shi',
	51935 => 'shou',
	52130 => 'shu',
	52132 => 'shua',
	52136 => 'shuai',
	52138 => 'shuan',
	52141 => 'shuang',
	52145 => 'shui',
	52149 => 'shun',
	52153 => 'shuo',
	52169 => 'si',
	52177 => 'song',
	52180 => 'sou',
	52193 => 'su',
	52196 => 'suan',
	52207 => 'sui',
	52210 => 'sun',
	52218 => 'suo',
	52389 => 'ta',
	52398 => 'tai',
	52416 => 'tan',
	52429 => 'tang',
	52440 => 'tao',
	52441 => 'te',
	52445 => 'teng',
	52460 => 'ti',
	52468 => 'tian',
	52473 => 'tiao',
	52476 => 'tie',
	52648 => 'ting',
	52661 => 'tong',
	52665 => 'tou',
	52676 => 'tu',
	52678 => 'tuan',
	52684 => 'tui',
	52687 => 'tun',
	52698 => 'tuo',
	52705 => 'wa',
	52707 => 'wai',
	52724 => 'wan',
	52734 => 'wang',
	52929 => 'wei',
	52939 => 'wen',
	52942 => 'weng',
	52951 => 'wo',
	52980 => 'wu',
	53177 => 'xi',
	53190 => 'xia',
	53216 => 'xian',
	53236 => 'xiang',
	53416 => 'xiao',
	53437 => 'xie',
	53447 => 'xin',
	53462 => 'xing',
	53469 => 'xiong',
	53478 => 'xiu',
	53497 => 'xu',
	53669 => 'xuan',
	53675 => 'xue',
	53689 => 'xun',
	53705 => 'ya',
	53738 => 'yan',
	53755 => 'yang',
	53932 => 'yao',
	53947 => 'ye',
	54000 => 'yi',
	54178 => 'yin',
	54196 => 'ying',
	54197 => 'yo',
	54212 => 'yong',
	54233 => 'you',
	54439 => 'yu',
	54459 => 'yuan',
	54469 => 'yue',
	54481 => 'yun',
	54484 => 'za',
	54491 => 'zai',
	54495 => 'zan',
	54498 => 'zang',
	54512 => 'zao',
	54516 => 'ze',
	54517 => 'zei',
	54518 => 'zen',
	54522 => 'zeng',
	54698 => 'zha',
	54704 => 'zhai',
	54721 => 'zhan',
	54736 => 'zhang',
	54746 => 'zhao',
	54756 => 'zhe',
	54772 => 'zhen',
	54949 => 'zheng',
	54992 => 'zhi',
	55003 => 'zhong',
	55017 => 'zhou',
	55205 => 'zhu',
	55207 => 'zhua',
	55208 => 'zhuai',
	55214 => 'zhuan',
	55221 => 'zhuang',
	55227 => 'zhui',
	55229 => 'zhun',
	55240 => 'zhuo',
	55255 => 'zi',
	55262 => 'zong',
	55266 => 'zou',
	55274 => 'zu',
	55276 => 'zuan',
	55280 => 'zui',
	55282 => 'zun',
	55290 => 'zuo',
);

sub han2pinyin {
	my (@han, @pinyin);
	if (scalar @_ == 1) {
		my $han = shift;
		if (length($han) == 2) { return _han2pinyin($han); }
		else { @han = ($han =~ /([\xa1-\xfe]{2})/g); }
	}
	else { @han = @_; }
	foreach (@han) {
		push(@pinyin, _han2pinyin($_));
	}
	return wantarray ? @pinyin : "@pinyin";
}

sub _han2pinyin {
	my $han = shift;
	return 'XX' if ($han !~ /[\xb0a1-\xd7f9]/); # we can only deal this distribution
	my $hcode = hex(unpack("H*",$han));
	foreach (sort keys %py) {
		next if ($hcode >= $_);
		return $py{$_};
	}
}

1;
__END__

=head1 NAME

Lingua::Han2PinYin::Light - convert Chinese character to its pinyin.

=head1 SYNOPSIS

  use Lingua::Han2PinYin::Light;
  
  my $hanzi = ""; # the Chinese character
  my $pinyin = han2pinyin($hanzi); # the corresponding spell
  #now $pinyin is 'wo';
  my $juzi = "й";
  my $juzi_pinyin = han2pinyin($juzi);
  #now $juzi_pinyin is 'wo shi zhong guo ren');
  my @juzi = qq/    /;
  my @pinyin = han2pinyin(@juzi);
  #now @pinyin is ('wo', 'shi', 'zhong', 'guo', 'ren')

=head1 DESCRIPTION

There is a Chinese document @ L<http://www.1313s.com/f/Han2PinYin.html>. It tells why and how I write this module.

It's a light version, only can deal with 3660 characters([\xb0a1-\xd7f9]).

=head1 RESTRICTIONS

for the uncommon character, the distribution of its' coding is B<NOT> continuous.
So we can only convert the common character(3660).

if the character is polyphone(DuoYinZi), we can B<NOT> point out the correct one.

=head1 OPTION

Parameter can be scalar(one word or more) and array.

=head1 RETURN VALUE

if it's a common character, it returns its pinyin/spell.

if not, it returns 'XX';

if you wantarray, it returns array else return scalar.

=head1 BUGS

I'm not sure it works at all OS or different versions of perl.

Follows are tested, and feel free to report any bugs or corrections:

Win2000(SP4) + ActivePerl 5.8.5

=head1 SEE ALSO

Lingua::Han2PinYin

=head1 AUTHOR

Fayland, fayland@gmail.com