#!/usr/bin/perl -w

#############################################################################
# Source: Ridas::Locale::ICU::Language
#         Ridas::Locale::ICU::Territory

#############################################################################
# Source: Ridas::Locale::ICU::Cache
#         Ridas::Locale::ICU::Calendar
#         Ridas::Locale::ICU::Common
#         Ridas::Locale::ICU::JavaPatterns

#############################################################################
# Source: Ridas::Locale::ICU::JavaPatterns

#############################################################################
# Source: Ridas::Locale::ICU::ICUtoPOSIXConverter
#
# Code provided by Rick Measham (cheers mate!)

#############################################################################
# Source: Ridas::Locale::ICU::makeDateTimeLocale.pl

use strict;

use 5.006;

use DateTime;
use File::Basename;
use File::Copy;
use File::Spec;
use Getopt::Long;
use XML::Simple;

my $VERSION = "0.01";

my ( %LocalesWithoutData, %XML, %LanguageLookup, %TerritoryLookup );

my $ScriptName = basename($0);

my %Aliases =
    (
     'C'             => 'en_US_POSIX',
     'POSIX'         => 'en_US_POSIX',
     # Apparently the Hebrew locale code was changed from iw to he at
     # one point.
     'iw'            => 'he',
     'iw_IL'         => 'he_IL',
    );

# it's an array because the order in which the regexes are checked is
# important
my @JavaPatterns =
    ( qr/G/     => '{era}',
      qr/yyyy/  => '{ce_year}',
      qr/y/     => 'y',
      qr/u/     => 'Y',
      qr/MMMM/  => 'B',
      qr/MMM/   => 'b',
      qr/MM/    => 'm',
      qr/M/     => '{month}',
      qr/dd/    => 'd',
      qr/d/     => '{day}',
      qr/hh/    => 'l',
      qr/h/     => '{hour_12}',
      qr/HH/    => 'H',
      qr/H/     => '{hour}',
      qr/mm/    => 'M',
      qr/m/     => '{minute}',
      qr/ss/    => 'S',
      qr/s/     => '{second}',
      qr/S/     => 'N',
      qr/EEEE/  => 'A',
      qr/E/     => 'a',
      qr/D/     => 'j',
      qr/F/     => '{weekday_of_month}',
      qr/w/     => 'V',
      qr/W/     => '{week_month}',
      qr/a/     => 'p',
      qr/k/     => '{hour_1}',
      qr/K/     => '{hour_12_0}',
      qr/z/     => '{time_zone_long_name}',
    );

my %opts;

main();

sub main
{
    GetOptions( 'dir:s'  => \$opts{dir},
                'name:s' => \$opts{name},
                'file:s' => \$opts{file},
                'quiet'  => \$opts{quiet},
                'help'   => \$opts{help},
              );

    $opts{help} = 1
        unless defined $opts{dir} && -d $opts{dir};

    usage() if $opts{help};

    unless ( $opts{name} )
    {
        copy( 'MANIFEST.base', 'MANIFEST' );
        open MAN, ">>MANIFEST" or die "Cannot write to MANIFEST: $!";
    }

    generate_from_ICU_xml();
}

sub generate_from_ICU_xml
{
    my @files = glob File::Spec->catfile( $opts{dir}, '*.xml' );

    print "Reading ICU files from: '$opts{dir}'...\n\n"
        unless $opts{quiet};

    foreach my $f (@files)
    {
        if ( $opts{file} )
        {
            next unless $opts{file} eq basename($f);
        }

        my $id = basename($f)
            or die "Could not get basename for: '$f'";
        $id =~ s/\.xml$//i;

        if ( $id =~ /^iw/ )
        {
            print "  skipping $id - iw/he Hebrew duplication, prefer 'he'\n"
                unless $opts{quiet};
            next;
        }

        print "  $id" unless $opts{quiet};

        die "Found a file matching an alias: $id\n" if $Aliases{$id};

        my $xml =
            XMLin($f)
                or die "Missing XML for locale: '$id'";

        $XML{$id} = { xml         => $xml,
                      source_file => basename($f),
                    };

        build_language_and_territory($id, $xml);

        print "\n" unless $opts{quiet};
    }

    foreach my $id ( sort keys %XML )
    {
        my $xml = $XML{$id}{xml};

        unless ( has_data($xml) )
        {
            my $parent = parent_id($id);
            $LocalesWithoutData{$id} = $parent;

            print "  $id has no data\n" unless $opts{quiet};
            next;
        }

        my $hash;
        $hash->{id}        = $id;
        $hash->{parent_id} = parent_id($id);

        get_version( $hash, $xml );
        get_generation_date( $hash, $xml );
        get_days( $hash, $xml );
        get_months( $hash, $xml );
        get_formats( $hash, $xml );
        get_am_pm_eras( $hash, $xml );

        user_supplied_changes($hash);

        generate_locale($hash);
    }

    generate_name_lookup();

    print "\nAll done\n" unless $opts{quiet};
}

sub build_language_and_territory
{
    my ($id, $xml) = @_;

    _build_lookup
        ( \%LanguageLookup,  $id, $xml->{localeDisplayNames}{languages}{language} );

    _build_lookup
        ( \%TerritoryLookup, $id, $xml->{localeDisplayNames}{territories}{territory} );
}

sub _build_lookup
{
    my ($lookup, $id, $xml) = @_;

    return unless $xml;

    ($id) = split /_/, $id;          # Interested in language only

    if ( ref $xml eq "HASH" )
    {
        $lookup->{$id}{ $xml->{type} } = $xml->{content};
    }
    else
    {
        $lookup->{$id}{ $_->{type} } = $_->{content} for @$xml;
    }
}

sub has_data
{
    my $cal = shift()->{dates}{calendars}{calendar};

    return
        ( $cal->{dayNames}{day}                ||
          $cal->{dayAbbr}{day}                 ||
          $cal->{monthNames}{month}            ||
          $cal->{monthAbbr}{month}             ||
          $cal->{am}                           ||
          $cal->{eras}{eraAbbr}{era}           ||
          $cal->{dateFormats}{dateFormatLength} ||
          $cal->{timeFormats}{timeFormatLength} ||
          $cal->{dateTimeFormat}{pattern}
        );
}

sub parent_id
{
    my $id = shift;

    my ($language, $territory, $variant) = split /_/, $id;

    my $parent_id;

    if ( $territory )
    {
        $parent_id  =   $language;
        $parent_id .= "_$territory" if $variant;
    }
    else
    {
        $parent_id = $language =~ /^root$/i ? 'Base' : 'root';
    }

    if ( $LocalesWithoutData{$parent_id} )
    {
        return parent_id($parent_id);
    }

    return $parent_id;
}

sub get_version
{
    my ( $hash, $xml ) = @_;

    $hash->{version} = $xml->{identity}{version}{number};
}

sub get_generation_date
{
    my ( $hash, $xml ) = @_;

    $hash->{generation_date} = $xml->{identity}{generation}{date};
}

sub get_days
{
    my ($hash, $xml) = @_;

    _get_days($hash, $xml, 'dayNames', 'day_names'        );
    _get_days($hash, $xml, 'dayAbbr',  'day_abbreviations');
}

sub _get_days
{
    my ($hash, $xml, $element, $key) = @_;

    return unless my $days = $xml->{dates}{calendars}{calendar}{$element}{day};

    $hash->{$key}[6]      = $days->[ 0]{content};
    $hash->{$key}[$_ - 1] = $days->[$_]{content} for 1..6;
}

sub get_months
{
    my ($hash, $xml) = @_;

    _get_months($hash, $xml, 'monthNames', 'month_names'        );
    _get_months($hash, $xml, 'monthAbbr',  'month_abbreviations');
}

sub _get_months
{
    my ($hash, $xml, $element, $key) = @_;

    return unless my $months = $xml->{dates}{calendars}{calendar}{$element}{month};

    $hash->{$key}[$_] = $months->[$_]{content} for 0..11;
}

sub get_am_pm_eras
{
    my ($hash, $xml) = @_;

    my $cal = $xml->{dates}{calendars}{calendar};

    $hash->{am_pms} = [ $cal->{am}, $cal->{pm} ] if $cal->{am};

    $hash->{eras} =
        [ $cal->{eras}{eraAbbr}{era}[0]{content},
          $cal->{eras}{eraAbbr}{era}[1]{content} ]
            if $cal->{eras}{eraAbbr}{era};
}

sub get_formats
{
    my ($hash, $xml) = @_;

    if ( my $dates = $xml->{dates}{calendars}{calendar}{dateFormats} )
    {
        my %formats =
            ( map { $_->{type} => $_->{dateFormat}{pattern} }
              @{ $xml->{dates}{calendars}{calendar}{dateFormats}{dateFormatLength} }
            );

        foreach my $length ( qw( full long medium short ) )
        {
            $hash->{date_formats}{$length} = simple2strf( $formats{$length} )
        }

        $hash->{default_date_format_length} = $dates->{default}{type};

	# Work out the order of the date parts (ymd, dmy, or mdy)
	my $p = $formats{short};
	$p =~ tr{dmyDMY}{}cd;
	$p =~ tr{dmyDMY}{dmydmy}s;
	$hash->{date_parts_order} = $p;
    }

    if ( my $times = $xml->{dates}{calendars}{calendar}{timeFormats} )
    {
        my %formats =
            ( map { $_->{type} => $_->{timeFormat}{pattern} }
              @{ $xml->{dates}{calendars}{calendar}{timeFormats}{timeFormatLength} }
            );

        foreach my $length ( qw( full long medium short ) )
        {
            $hash->{time_formats}{$length} = simple2strf( $formats{$length} )
        }

        $hash->{default_time_format_length} = $times->{default}{type};
    }

    if ( my $order =
         $xml->{dates}{calendars}{calendar}{dateTimeFormats}
               {dateTimeFormatLength}{dateTimeFormat}{pattern} )
    {
        $hash->{date_before_time} = $order eq "{1} {0}" ? 1 : 0;
    }
}

sub simple2strf
{
    my $simple = shift;
    $simple =~
        s/(G+|y+|u+|M+|d+|h+|H+|m+|s+|S+|E+|D+|F+|w+|W+|a+|k+|K+|z+)|'((?:[^']|'')*)'/
          ($2) ? _stringify($2) : ($1) ? _convert($1) : "'"/eg;

    return $simple;
}

sub _convert
{
    my $simple = shift;

    for ( my $x = 0; $x < @JavaPatterns; $x += 2 )
    {
        return '%' . $JavaPatterns[ $x + 1 ] if $simple =~ /$JavaPatterns[$x]/;
    }

    die "**Dont know $simple***";
}

sub _stringify
{
    my $string = shift;

    $string =~ s/%(?:[^%])/%%/g;
    $string =~ s/\'\'/\'/g;

    return $string;
}

sub user_supplied_changes
{
    my $hash = shift;

    if ( $hash->{id} eq 'nl' )
    {
        # Eugene Van Der Pijll <pijll@gmx.net>
        # Thursday 01 May 2003 10:14:35 pm
        # datetime@perl.org

        $hash->{eras} = ["v.Chr.", "n.Chr."];
    }
}

sub generate_locale
{
    my $hash = shift;

    my $vars = "";
    my $subs = "";

    foreach my $var
        ( qw( day_names
              day_abbreviations
              month_names
              month_abbreviations
              am_pms
              eras
            ) )
    {
        next unless $hash->{$var};

        $vars .= "my \@$var = (\n";

        foreach my $val ( @{ $hash->{$var} } )
        {
            $vars .= qq|"\Q$val\E",\n|;
        }

        $vars .= ");\n\n";

        $subs .= sprintf "sub %-30s { \\\@%-30s }\n", $var, $var;
    }

    foreach my $var ( qw( date_formats
                          time_formats
                        ) )
    {
        next unless $hash->{$var};

        $vars .= "my %$var = (\n";

        while ( my ( $k, $v ) = each %{ $hash->{$var} } )
        {
            $vars .= qq|"\Q$k\E" => "\Q$v\E",\n|;
        }

        $vars .= ");\n\n";

        $subs .= sprintf "sub %-30s { \\%%%-30s }\n", $var, $var;
    }

    # The accessor subs and whether they should be public (1) or 
    # private (0).  Private subs are prefixed with _.
    my %accessors =
	( date_before_time           => 1,
	  date_parts_order           => 1,
	  default_date_format_length => 0,
	  default_time_format_length => 0,
	  );
    while (my ($var, $public) = each %accessors)
    {
        next unless $hash->{$var};

        $vars .= qq|my \$$var = "\Q$hash->{$var}\E";\n|;

        my $sub_name = $public ? $var : "_$var";

        $subs .= sprintf "sub %-30s { \$%-31s }\n", $sub_name, $var;
    }

    my $class         = $hash->{id};
    my $parent        = $hash->{parent_id};

    my $id = $hash->{id};

    my $file = File::Spec->catfile( 'lib', 'DateTime', 'Locale', "$class.pm");

    local *OUTFILE;

    if ( $] >= 5.008 )
    {
        open OUTFILE, ">:utf8", $file or die $!;
    }
    else
    {
        open OUTFILE, ">$file" or die $!;
    }

    print OUTFILE <<"EOF" or die "print failed: $!";
###########################################################################
#
# This file is auto-generated by the Perl DateTime Suite time locale
# generator ($VERSION).  This code generator comes with the
# DateTime::Locale distribution in the tools/ directory, and is called
# $ScriptName.
#
# This file as generated from the ICU XML locale data.  See the
# LICENSE.icu file included in this distribution for license details.
#
# This file was generated from the source file $XML{$id}{source_file}.
# The source file version number was $hash->{version}, generated on
# $hash->{generation_date}.
#
# Do not edit this file directly.
#
###########################################################################

package DateTime::Locale::$class;

use strict;

BEGIN
{
    if ( \$] >= 5.006 )
    {
        require utf8; utf8->import;
    }
}

use DateTime::Locale::$parent;

\@DateTime::Locale::${class}::ISA = qw(DateTime::Locale::$parent);

$vars

$subs

1;

EOF

    close OUTFILE or warn $!;

    print MAN "$file\n";
}

sub generate_name_lookup
{
    local *OUTFILE;
    if ( $] >= 5.008 )
    {
        open OUTFILE, '>:utf8', File::Spec->catfile( 'lib', 'DateTime', 'LocaleCatalog.pm' )
            or die "$!";
    }
    else
    {
        open OUTFILE, '>' . File::Spec->catfile( 'lib', 'DateTime', 'LocaleCatalog.pm' )
            or die "$!";
    }

    print OUTFILE <<'EOF';
package DateTime::LocaleCatalog;

use strict;

BEGIN
{
    return unless $] >= 5.006;

    require utf8; import utf8;
}

EOF

    my $locales_in_pod = '';

    print OUTFILE "\@DateTime::Locale::Locales = (\n";

    my $locale_with_full_names =
        length $LanguageLookup{root}{en} > 3 ? 'root' : 'en';

    for my $id ( sort keys %XML )
    {
        my ($lang, $territory, $variant) = split /_/, $id;

        my %lookup;

        $lookup{en_language} = $LanguageLookup{$locale_with_full_names}{$lang};
        $lookup{native_language} =
            $LanguageLookup{$lang}{$lang} || $LanguageLookup{$locale_with_full_names}{$lang};

        if ($territory)
        {
            $lookup{en_territory} = $TerritoryLookup{$locale_with_full_names}{$territory};;

            $lookup{native_territory} =
                ( exists $TerritoryLookup{$lang}{$territory}
                  ? $TerritoryLookup{$lang}{$territory}
                  : $TerritoryLookup{$locale_with_full_names}{$territory} );
        }

        if ($variant)
        {
            $lookup{en_variant} = ucfirst lc $variant;
        }
        else
        {
            $lookup{en_variant} = undef;
        }

        $lookup{native_variant} = $lookup{en_variant};

        print OUTFILE qq|    { id => "\Q$id\E",\n|;
        foreach my $k ( qw( en_language en_territory en_variant
                            native_language native_territory native_variant
                          )
                      )
        {
            next unless defined $lookup{$k};

            my $val = qq|"\Q$lookup{$k}\E"|;
            print OUTFILE "      $k => $val,\n";
        }

        if ( $LocalesWithoutData{$id} )
        {
            print OUTFILE qq|      real_class => "\Q$LocalesWithoutData{$id}\E",\n|;
        }

        print OUTFILE "    },\n";

        my @pieces;
        foreach my $p ( qw( en_language en_territory en_variant ) )
        {
            push @pieces, $lookup{$p} if defined $lookup{$p};
        }

        $locales_in_pod .= sprintf( " %-18s  %s\n", $id, join ' ', @pieces );
    }

    print OUTFILE ");\n\n";

    my $aliases_in_pod = '';

    print OUTFILE "%DateTime::Locale::Aliases = (\n";
    foreach my $id ( sort keys %Aliases )
    {
        print OUTFILE "    $id => '$Aliases{$id}',\n";

        $aliases_in_pod .= sprintf( " %-18s  %s\n", $id, $Aliases{$id} );
    }
    print OUTFILE ");";

    print OUTFILE <<"EOF";


1;

__END__

=head1 NAME

DateTime::LocaleCatalog - Provides a list of all valid locale names

=head1 SYNOPSIS

See DateTime::Locale for usage details.

=head1 DESCRIPTION

This module contains a list of all known locales.

=head1 LOCALES

Any method taking locale id or name arguments should use one of the
values listed below.  Ids and names are case sensitive.

Always select the closest matching locale - for example, French
Canadians would choose fr_CA over fr - and B<always> use locale ids in
preference to names; locale ids offer greater compatibility when using
localized third party modules.

Many of the available locales are the same as other, more generic
locales for datetime information.  In that case, we simply load the
more generic class.  However, the various methods related to name,
territory, and variant return the values for the requested locale.

The available locales are:

 Locale id           Locale name
 ==================================================
$locales_in_pod

There are also some hard-coded aliases available, these are:

 Locale id           Is an alias for
 ==================================================
$aliases_in_pod

=cut
EOF

    close OUTFILE or warn $!;
}

sub usage
{
    print <<'EOF';

This script parses the ICU locale files and turns them into a set of
Perl modules.  It also generates the MANIFEST file.

It takes the following arguments:

  --dir    A directory containing ICU XML files.  Required.

  --file   Parse just the file with the given name.  For debugging.

  --name   Only create the specified locale.  For debugging.

  --quiet  Don't display any output while processing files.

  --help   What you are reading

If the --file or --name options are specified, the MANIFEST will not
be generated.

EOF

    exit;
}
