#!/usr/bin/perl

use strict;
use utf8;
use warnings;

our $VERSION = 1;

our $UNICODE_VERSION = undef;
our $UNICODE_DATE = undef;

our $SRC = "data/IdnaMappingTable.txt";
open IdnaMappingTable, '<', $SRC || die "cannot open $SRC: $!";

my (%is,%map);

while(<IdnaMappingTable>) {
  chomp;

  if(m/^\s*#\s*IdnaMappingTable-(\d+)\.(\d+)\.(\d+)\b/i) {
    $UNICODE_VERSION = sprintf "%d.%03d_%03d", $1, $2, $3;
    next;
  } elsif(m/^#\sDate:\s*(\d\d\d\d+)-(\d\d)-(\d\d)\b/i) {
    $UNICODE_DATE = sprintf "%04d%02d%02d", $1, $2, $3;
    next;
  }

  s/\s*#.*//; next if !m/\S/; s/^\s+//; s/\s+$//;
  handle_idna_line(split /\s*;\s*/);
}
close IdnaMappingTable;

die "Could not determine Unicode version of IdnaMappingTable.txt" if !defined $UNICODE_VERSION;
die "Could not determine release date of IdnaMappingTable.txt" if !defined $UNICODE_DATE;

write_data();
exit(0);

sub handle_idna_line {
  no warnings 'uninitialized';

  my ($cp,$stat,$map,$nv8) = @_;
  $cp =~ m/^([[:xdigit:]]{4,})(?:\.\.([[:xdigit:]]{4,}))?$/i or die "illegal code point specification: ``$cp''";
  my @c = map {hex $_} ($1,$2||$1);

  $stat =~ m/^((disallowed_STD3_)?valid|ignored|(disallowed_STD3_)?mapped|deviation|disallowed)$/ or die "unknown Idna status: ``$stat''";
  my $perlstat = stat_to_perl($stat);

  $map =~ m/^([[:xdigit:]]{4,}(?:\s+[[:xdigit:]]{4,})*)?$/ or die "illegal mapping sequence specification: ``$map''";
  $map = join('', map { chr(hex($_)) } split /\s/, $1);

  if($stat =~ m/^((disallowed_STD3_)?mapped|deviation|ignored)$/i) {
    push @{$map{$perlstat}}, $_, $map foreach( $c[0]..$c[1] );
  } else {
    push @{$is{$perlstat}}, \@c;
  }
}

sub stat_to_perl {
  return join '', map { ucfirst $_ } split /_/, shift;
}

sub write_data {
  my $fn = $0; $fn =~ s/\.PL$/.pm/i;
  local $VERSION = sprintf "%03d", $VERSION;

  print STDERR "generating $fn\n" if -t STDERR;
  open STDOUT, ">", $fn || die "cannot open $fn: $!";
  binmode STDOUT, ":utf8";

  print <<__EOF;
# *** DO NOT EDIT *** generated file *** DO NOT EDIT ***
#
# generated by $0 from $SRC
# see repository at http://github.com/cfaerber/Net-IDN-Encode for source files
#
package Net::IDN::UTS46::_Mapping;

require 5.006;

use strict;
use utf8;
use warnings;

our \$VERSION = ${UNICODE_VERSION}_$VERSION;

our \$UNICODE_VERSION = $UNICODE_VERSION;
our \$UNICODE_DATE = "$UNICODE_DATE";
__EOF

  print "our \@ISA = qw(Exporter);\n";
  print "our \@EXPORT = ();\n";
  print "our \@EXPORT_OK = qw(",join(' ',
	map({ "Is$_" } sort(keys %is, keys %map)),
	map({ "Map$_" } sort(keys %map)),
    ),");\n";

  while(<DATA>) { last if m/^__BREAK__$/; print; }

  write_is($_, @{$is{$_}} ) foreach sort keys(%is);
  write_map($_, @{$map{$_}} ) foreach sort keys(%map);

  while(<DATA>) { print }
}

sub write_is {
  my $sub_name = shift; my $var_name = uc $sub_name;

  # clean list
  #
  @_ = sort { $a->[0] <=> $b->[0] } @_;
  @_ = map { [$_->[0], $_->[defined $_->[1]],] } @_;
  foreach( reverse (0..($#_-1)) ) {	# reverse b/c the array will be shortened by splice
    if($_[$_]->[1] == $_[$_+1]->[0]-1) {
      splice @_, $_, 2, [$_[$_]->[0],$_[$_+1]->[1],];
    }
  }
  foreach(@_) { $_->[1] = undef if $_->[0] == $_->[1]; }
  
  print "\nour \@$var_name = (";
  my $nr = 0;
  foreach(@_) {
    print "\n " if !($nr++ % 8);
    print map { defined($_) ? sprintf(" 0x%04X,", $_) : " undef, " } @{$_} ;
  }
  print "\n);\n";

  print "sub Is$sub_name { return _mk_prop(\@$var_name); };\n";
}

sub write_map {
  my ($sub_name,%map) = @_;
  my $var_name = uc $sub_name;

  print "\nour \%$var_name = (";
  my $nr = 0;
  foreach (sort { $a <=> $b } keys %map) {
    print "\n " if !($nr++ % 8);
    printf " 0x%04X => \"%s\",",
      $_, quote($map{$_});
  }
  print ");\n";

  write_is($sub_name, map { [$_,] } keys %map);


  print "sub Map$sub_name { my \$l = shift;\n";

  my @m = sort { $a <=> $b } keys %map;
  my @m0 = grep { length($map{$_}) == 0 } @m;
  my @m1 = grep { length($map{$_}) == 1 } @m;
  my @m2 = grep { length($map{$_}) >  1 } @m;
  my @m3 = (); 

  if($#m1 <= 1) { push @m2, @m1; @m1 = (); }
  if($#m2 >= 8) { @m3 = @m2; @m2 = (); };

  if($#m1 < 0 && $#m2 < 0 && $#m3 < 0) {
    print "  \$l =~ s/\\p{Is$sub_name}//g;\n" if @m0;
  } else {
    print "  \$l =~ s/[",quote(join '', map { chr($_) } @m0),"]//g;\n" if @m0;
    print "  \$l =~ tr/", quote(join '', map{ chr($_) } @m1), '/', quote(join '', map{ $map{$_}} @m1), "/;\n" if @m1;
    print "  \$l =~ s/",quote(chr($_)),"/",quote($map{$_}),"/g;\n" foreach @m2;
    print "  \$l =~ s/([",quote(join '', map { chr($_) } @m3), '])/$', $var_name, '{ord($1)}/eg;'."\n" if @m3;
  }
  
  print "  return \$l;\n";
  print "};\n";
}

sub quote {
  my $s = shift;
  $s =~ s/([^\x20\w\P{ASCII}])/\\$1/g;
  $s =~ s/([^\x20-\x7E\p{Alnum}])/my $c=ord($1);sprintf($c<0x80?'\x%02X':'\x{%04X}',$c)/ge;
  return $s;
}
__DATA__

sub _mk_prop {
  my @ll; while( my(@c) = splice(@_,0,2) ) {
    push @ll, join ' ', map { sprintf "%04X", $_ } grep { defined $_ } @c;
  }
  return join "\n", @ll;
}

1;
__BREAK__
__END__

=encoding utf8

=head1 NAME

Net::IDN::UTS46::_Mapping - Tables from Unicode Technical Standard #46 (S<UTS #46>)

=head1 DESCRIPTION

This module contains tables and private functions used by L<Net::IDN::UTS46>.
The interface may change without further notice.

=head1 AUTHOR

Claus FE<auml>rber <CFAERBER@cpan.org>

=head1 LICENSE

Copyright 2011-2012 Claus FE<auml>rber.

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

=head1 SEE ALSO

L<Net::IDN::UTS46>, L<Net::IDN::Stringprep>, S<UTS #46> (L<http://www.unicode.org/reports/tr46/>)
