#!/usr/bin/perl -w

use strict;

use 5.006;

use DateTime;
use File::Basename;
use File::Copy;
use File::Find::Rule;
use File::Spec;
use Getopt::Long;
use List::Util qw( first );
use XML::Simple;

my $VERSION = "0.04";

my ( %LocalesWithoutData, %XML, %Parents,
     %LanguageLookup, %TerritoryLookup, %ScriptLookup );

my $ScriptName = basename($0);

my %Aliases =
    (
     'C'             => 'en_US_POSIX',
     'POSIX'         => 'en_US_POSIX',
     # Apparently the Hebrew locale code was changed from iw to he at
     # one point.
     'iw'            => 'he',
     'iw_IL'         => 'he_IL',
     # CLDR got rid of no
     'no'            => 'nn',
     'no_NO'         => 'nn_NO',
     'no_NO_NY'      => 'nn_NO',
    );

# it's an array because the order in which the regexes are checked is
# important
my @JavaPatterns =
    ( qr/G/     => '{era}',
      qr/yyyy/  => '{ce_year}',
      qr/y/     => 'y',
      qr/u/     => 'Y',
      qr/MMMM/  => 'B',
      qr/MMM/   => 'b',
      qr/MM/    => 'm',
      qr/M/     => '{month}',
      qr/dd/    => 'd',
      qr/d/     => '{day}',
      qr/hh/    => 'l',
      qr/h/     => '{hour_12}',
      qr/HH/    => 'H',
      qr/H/     => '{hour}',
      qr/mm/    => 'M',
      qr/m/     => '{minute}',
      qr/ss/    => 'S',
      qr/s/     => '{second}',
      qr/S/     => 'N',
      qr/EEEE/  => 'A',
      qr/E/     => 'a',
      qr/D/     => 'j',
      qr/F/     => '{weekday_of_month}',
      qr/w/     => 'V',
      qr/W/     => '{week_month}',
      qr/a/     => 'p',
      qr/k/     => '{hour_1}',
      qr/K/     => '{hour_12_0}',
      qr/z/     => '{time_zone_long_name}',
    );

my @FormatLengths = qw( full long medium short  );

my %opts;

main();

my $man;
sub main
{
    GetOptions( 'dir:s'   => \$opts{dir},
                'name:s'  => \$opts{name},
                'file:s'  => \$opts{file},
                'clean'   => \$opts{clean},
                'quiet'   => \$opts{quiet},
                'verbose' => \$opts{verbose},
                'help'    => \$opts{help},
              );

    $opts{help} = 1
        unless defined $opts{dir} && -d $opts{dir};

    usage() if $opts{help};

    clean() if $opts{clean};

    unless ( $opts{name} )
    {
        copy( 'MANIFEST.base', 'MANIFEST' );
        open $man, ">>MANIFEST" or die "Cannot write to MANIFEST: $!";
    }

    binmode STDOUT, ':utf8' if $opts{verbose};
    $| = 1;

    generate_from_CLDR_xml();
}

sub clean
{
    for my $f ( File::Find::Rule
                ->file
                ->name('*.pm')
                ->grep('This file is auto-generated' )
                ->in('lib')
              )
    {
        unlink $f or die "Cannot unlink $f: $!";
    }
}

sub generate_from_CLDR_xml
{
    my @files = glob File::Spec->catfile( $opts{dir}, '*.xml' );

    print "Reading CLDR files from: '$opts{dir}'...\n"
        unless $opts{quiet};

    foreach my $f (@files)
    {
        if ( $opts{file} )
        {
            next unless $opts{file} eq basename($f);
        }

        my $id = basename($f)
            or die "Could not get basename for: '$f'";
        $id =~ s/\.xml$//i;

        print "  $id" unless $opts{quiet};

        die "Found a file matching an alias: $id\n" if $Aliases{$id};

        # This is the fastest one from my testing
        local $XML::Simple::PREFERRED_PARSER = 'XML::LibXML::SAX';

        my %key_attr = ( calendar         => 'type',
                         monthContext     => 'type',
                         dayContext       => 'type',
                         monthWidth       => 'type',
                         dayWidth         => 'type',
                         month            => 'type',
                         day              => 'type',
                         era              => 'type',
                         quarterContext   => 'type',
                         quarterWidth     => 'type',
                         quarter          => 'type',
                         timeFormatLength => 'type',
                         dateFormatLength => 'type',
                       );

        my $xml =
            XMLin( $f,
                   KeyAttr      => \%key_attr,
                   GroupTags    => { eraAbbr  => 'era',
                                     eraNames => 'era',
                                   },
                   Forcearray   => [ keys %key_attr ],
                   ForceContent => 1,
                 )
                or die "Missing XML for locale: '$id'";

        my $gen_date = $xml->{identity}{generation}{date};
        $gen_date =~ s/^\$Date:\s+//;
        $gen_date =~ s/\s+\$$//;

        my $version = $xml->{identity}{version}{number};
        $version =~ s/^\$Revision:\s+//;
        $version =~ s/\s+\$$//;

        # As of CLDR 1.5, the following languages are incomplete:
        next if $id eq 'ssy' && $version eq '1.1';

        $XML{$id} = { locale      => $xml->{localeDisplayNames},
                      cal         => $xml->{dates}{calendars}{calendar}{gregorian},
                      version     => $version,
                      date        => $gen_date,
                      source_file => basename($f),
                    };

        build_language_and_territory( $id, $XML{$id}{locale} );

        $Parents{$id} = $xml->{alias}{source}
            if $xml->{alias};

        print "\n" unless $opts{quiet};
    }

    print "\nGenerating PM files\n" if $opts{verbose};

    foreach my $id ( sort keys %XML )
    {
        next if $opts{name} && $opts{name} ne $id;

        my $cal = $XML{$id}{cal};

        user_supplied_changes( $id, $cal );

        my $hash;
        $hash->{id}        = $id;
        $hash->{parent_id} = parent_id($id);

        print "\n  $id\n" if $opts{verbose};
        print "    parent_id: $hash->{parent_id}\n" if $opts{verbose};

        $hash->{version} = $XML{$id}{version};
        print "    version: $hash->{version}\n" if $opts{verbose};

        $hash->{generation_date} = $XML{$id}{date};
        print "    generation date: $hash->{generation_date}\n" if $opts{verbose};

        get_days( $hash, $cal );
        get_months( $hash, $cal );
        get_formats( $hash, $cal );
        get_quarters( $hash, $cal );
        get_am_pm( $hash, $cal );
        get_eras( $hash, $cal );

        unless ( has_data($hash) )
        {
            my $parent = parent_id($id);
            $LocalesWithoutData{$id} = $parent;

            print "  $id has no data\n" unless $opts{quiet};
            next;
        }

        generate_locale($hash);
    }

    generate_catalog();

    print "\nAll done\n" unless $opts{quiet};
}

sub build_language_and_territory
{
    my ( $id, $xml ) = @_;

    _build_lookup
        ( \%LanguageLookup,  $id, $xml->{languages}{language} );

    _build_lookup
        ( \%TerritoryLookup, $id, $xml->{territories}{territory} );

    _build_lookup
        ( \%ScriptLookup,    $id, $xml->{scripts}{script} );
}

sub _build_lookup
{
    my ( $lookup, $id, $xml ) = @_;

    return unless $xml;

    my ($lang) = split /_/, $id;          # Interested in language only

    if ( ref $xml eq "HASH" )
    {
        $lookup->{$lang}{ $xml->{type} } = $xml->{content};
    }
    else
    {
        $lookup->{$lang}{ $_->{type} } = $_->{content} for grep { ! $_->{alt} } @$xml;
    }
}

sub has_data
{
    my $hash = shift;

    # There are 4 items every locale has (id, parent id, version, gen
    # date).
    return keys %$hash > 4;
}

sub parent_id
{
    my $id = shift;

    return $Parents{$id} if exists $Parents{$id};

    my ($language, $territory, $variant) = split /_/, $id;

    my $parent_id;

    if ( $territory )
    {
        $parent_id  =   $language;
        $parent_id .= "_$territory" if $variant;
    }
    else
    {
        $parent_id = $language =~ /^root$/i ? 'Base' : 'root';
    }

    if ( $LocalesWithoutData{$parent_id} )
    {
        return parent_id($parent_id);
    }

    return $parent_id;
}

sub get_days
{
    my ( $hash, $cal ) = @_;

    _get_days($hash, $cal, 'wide',         'day_names'         );
    _get_days($hash, $cal, 'abbreviated',  'day_abbreviations' );
    _get_days($hash, $cal, 'narrow',       'day_narrows'       );
}

sub _get_days
{
    my ( $hash, $cal, $length, $key ) = @_;

    my $days =
        $cal->{days}{dayContext}{format}{dayWidth}{$length}{day};

    $days ||=
        $cal->{days}{dayContext}{'stand-alone'}{dayWidth}{$length}{day};

    return unless $days;

    my @keys = qw( mon tue wed thu fri sat sun );

    return if grep { ! exists $days->{$_} } @keys;

    @{ $hash->{$key} } = map { $days->{$_}{content} } @keys;

    print "    $key: @{ $hash->{$key} }\n" if $opts{verbose};
}

sub get_months
{
    my ( $hash, $cal ) = @_;

    _get_months($hash, $cal, 'wide',         'month_names'         );
    _get_months($hash, $cal, 'abbreviated',  'month_abbreviations' );
    _get_months($hash, $cal, 'narrow',       'month_narrows'       );
}

sub _get_months
{
    my ($hash, $cal, $length, $key) = @_;

    my $months =
        $cal->{months}{monthContext}{format}{monthWidth}{$length}{month};

    $months ||=
        $cal->{months}{monthContext}{'stand-alone'}{monthWidth}{$length}{month};

    return unless $months;

    return if grep { ! exists $months->{$_} } 1..12;

    @{ $hash->{$key} } = map { $months->{$_}{content} } 1..12;

    print "    $key: @{ $hash->{$key} }\n" if $opts{verbose};
}

sub get_formats
{
    my ( $hash, $cal ) = @_;

    if ( my $dates = $cal->{dateFormats} )
    {
        my %original;
        foreach my $length (@FormatLengths)
        {
            my $pattern = $dates->{dateFormatLength}{$length}{dateFormat}{pattern};
            next unless $pattern;

            my $value;
            if ( ref $pattern eq 'ARRAY' )
            {
                my @non_alt = grep { ! $_->{alt} } @$pattern;

                die "Multiple primary patterns!\n" if @non_alt > 1;

                $value = $non_alt[0]->{content};
            }
            else
            {
                $value = $pattern->{content};
            }

            next unless defined $value;

            $original{$length} = $value;

            $hash->{date_formats}{$length} = simple2strf($value);

            print "    date_formats{$length}: $hash->{date_formats}{$length}\n"
                if $opts{verbose};
        }

        if ( $dates->{default}{choice} )
        {
            $hash->{default_date_format_length} = $dates->{default}{choice};

            print "    default_date_format_length: $hash->{default_date_format_length}\n"
                if $opts{verbose};
        }

        if ( my $short = $original{short} )
        {
            # Work out the order of the date parts (ymd, dmy, or mdy)
            $short =~ tr{dmyDMY}{}cd;
            $short =~ tr{dmyDMY}{dmydmy}s;

            $hash->{date_parts_order} = $short;

            print "    date_parts_order: $hash->{date_parts_order}\n" if $opts{verbose};
        }
    }

    if ( my $times = $cal->{timeFormats} )
    {
        foreach my $length (@FormatLengths)
        {
            my $value = _non_alt_content( $times->{timeFormatLength}{$length}{timeFormat}{pattern} );

            next unless defined $value;

            $hash->{time_formats}{$length} = simple2strf($value);

            print "    time_formats{$length}: $hash->{time_formats}{$length}\n"
                if $opts{verbose};
        }

        if ( $times->{default}{choice} )
        {
            $hash->{default_time_format_length} = $times->{default}{choice};

            print "    default_time_format_length: $hash->{default_time_format_length}\n"
                if $opts{verbose};
        }
    }

    return unless $cal->{dateTimeFormats}{dateTimeFormatLength};

    my $format;
    if ( ref $cal->{dateTimeFormats}{dateTimeFormatLength} eq 'ARRAY' )
    {
        $format = $cal->{dateTimeFormats}{dateTimeFormatLength}[0]{dateTimeFormat};
    }
    else
    {
        $format = $cal->{dateTimeFormats}{dateTimeFormatLength}{dateTimeFormat}{pattern};
    }

    if ( my $value = _non_alt_content($format) )
    {
        $hash->{date_before_time} = $value eq "{1} {0}" ? 1 : 0;

        print "    date_before_time: $hash->{date_before_time}\n" if $opts{verbose};
    }
}

sub _non_alt_content
{
    my $frag = shift;

    return unless defined $frag;

    if ( ref $frag eq 'ARRAY' )
    {
        my @non_alt = grep { ! $_->{alt} } @$frag;

        die "Multiple primary patterns!\n" if @non_alt > 1;

        return $non_alt[0]->{content};
    }
    else
    {
        return $frag->{content};
    }
}

sub simple2strf
{
    my $simple = shift;

    $simple =~
        s/(G+|y+|u+|M+|d+|h+|H+|m+|s+|S+|E+|D+|F+|w+|W+|a+|k+|K+|z+)|'((?:[^']|'')*)'/
          $2 ? _stringify($2) : $1 ? _convert($1) : "'"/eg;

    return $simple;
}

sub _convert
{
    my $simple = shift;

    for ( my $x = 0; $x < @JavaPatterns; $x += 2 )
    {
        return '%' . $JavaPatterns[ $x + 1 ] if $simple =~ /$JavaPatterns[$x]/;
    }

    die "**Dont know $simple***";
}

sub _stringify
{
    my $string = shift;

    $string =~ s/%(?:[^%])/%%/g;
    $string =~ s/\'\'/\'/g;

    return $string;
}

# used to hardcode corrections that aren't yet in CLDR
sub user_supplied_changes
{
    my $id = shift;
    my $cal = shift;

    if ( $id eq 'az' )
    {
        # The az.xml file appears to have a mistake in the wide day
        # names, thursday and friday are the same for this locale
        $cal->{days}{dayContext}{format}{dayWidth}{wide}{day}{thu}{content} =~ s/ \w+$//;
    }
    elsif ( $id eq 'gaa' )
    {
        # I am completely making this up, but the data is marked as
        # uncomfimed in the locale file and it's preferable to having
        # two days with the same abbreviation
        $cal->{days}{dayContext}{format}{dayWidth}{abbreviated}{day}{sun}{content} = 'Hog';
    }
    elsif ( $id eq 've' )
    {
        # Again, making stuff up to avoid non-unique abbreviations
        $cal->{months}{monthContext}{format}{monthWidth}{abbreviated}{month}{3}{content} = 'Thf';
    }
    elsif ( $id eq 'root' )
    {
        # The root locale uses <alias source="../xpath/thing"> to
        # refer to other tags.
        $cal->{months}{monthContext}{format}{monthWidth}{abbreviated} =
            $cal->{months}{monthContext}{format}{monthWidth}{wide}
                unless $cal->{months}{monthContext}{format}{monthWidth}{abbreviated}{month}{1};

        $cal->{months}{monthContext}{format}{monthWidth}{narrow} =
            $cal->{months}{monthContext}{format}{monthWidth}{abbreviated}
                unless $cal->{months}{monthContext}{format}{monthWidth}{narrow}{month}{1};

        $cal->{days}{dayContext}{format}{dayWidth}{abbreviated} =
            $cal->{days}{dayContext}{format}{dayWidth}{wide}
                unless $cal->{days}{dayContext}{format}{dayWidth}{abbreviated}{day}{1};

        $cal->{days}{dayContext}{format}{dayWidth}{narrow} =
            $cal->{days}{dayContext}{format}{dayWidth}{abbreviated}
                unless $cal->{days}{dayContext}{format}{dayWidth}{narrow}{day}{1};

        $cal->{quarters}{quarterContext}{format}{quarterWidth}{abbreviated} =
            $cal->{quarters}{quarterContext}{format}{quarterWidth}{wide}
                unless $cal->{quarters}{quarterContext}{format}{quarterWidth}{abbreviated}{1};

        $cal->{eras}{eraNames} = $cal->{eras}{eraAbbr}
            unless $cal->{eras}{eraNames}{0};
    }
}

sub get_quarters
{
    my ( $hash, $cal ) = @_;

    my $quarters = $cal->{quarters}{quarterContext}{format}{quarterWidth};

    return unless $quarters;

    if ( $quarters->{abbreviated} )
    {
        $hash->{quarter_abbreviations} =
            [ map { $quarters->{abbreviated}{quarter}{$_}{content} } 1 .. 4 ];

        print "    quarter abbr: [@{ $hash->{quarter_abbreviations} }]\n" if $opts{verbose};
    }

    if ( $quarters->{wide} )
    {
        $hash->{quarter_names} =
            [ map { $quarters->{wide}{quarter}{$_}{content} } 1 .. 4 ];

        print "    quarter names: [@{ $hash->{quarter_names} }]\n" if $opts{verbose};
    }
}

sub get_am_pm
{
    my ( $hash, $cal ) = @_;

    return unless $cal->{am};

    my $am = eval { @{ $cal->{am} } }
             ? ( first { ! $_->{alt} } @{ $cal->{am} } )->{content}
             : $cal->{am}{content};

    my $pm = eval { @{ $cal->{pm} } }
             ? ( first { ! $_->{alt} } @{ $cal->{pm} } )->{content}
             : $cal->{pm}{content};

    $hash->{am_pms} = [ $am, $pm ];

    print "    am_pms: [@{ $hash->{am_pms} }]\n" if $opts{verbose};
}

sub get_eras
{
    my ( $hash, $cal ) = @_;

    # At least one locale, se_NO, has only the first era, not the second
    if ( $cal->{eras}{eraAbbr}
         && $cal->{eras}{eraAbbr}{0}
         && $cal->{eras}{eraAbbr}{1}
       )
    {
        my $eras = $cal->{eras}{eraAbbr};

        $hash->{era_abbreviations} =
            [ $eras->{0}{content},
              $eras->{1}{content} ];

        print "    era abbr: [@{ $hash->{era_abbreviations} }]\n" if $opts{verbose};
    }

    if ( $cal->{eras}{eraNames}
         && $cal->{eras}{eraNames}{0}
         && $cal->{eras}{eraNames}{1}
       )
    {
        my $eras = $cal->{eras}{eraNames};

        $hash->{era_names} =
            [ $eras->{0}{content},
              $eras->{1}{content} ];

        print "    era names: [@{ $hash->{era_names} }]\n" if $opts{verbose};
    }
}

sub generate_locale
{
    my $hash = shift;

    my $vars = "";
    my $subs = "";

    foreach my $var
        ( qw( day_names
              day_abbreviations
              day_narrows

              month_names
              month_abbreviations
              month_narrows

              quarter_names
              quarter_abbreviations

              am_pms
              era_names
              era_abbreviations
            ) )
    {
        next unless $hash->{$var};

        $vars .= "my \@$var = (\n";

        foreach my $val ( @{ $hash->{$var} } )
        {
            $vars .= qq|"\Q$val\E",\n|;
        }

        $vars .= ");\n\n";

        $subs .= sprintf "sub %-30s { \\\@%s }\n", $var, $var;
    }

    foreach my $var ( qw( date_formats
                          time_formats
                        ) )
    {
        next unless $hash->{$var};

        for my $length (@FormatLengths)
        {
            next unless $hash->{$var}{$length};

            my $sub_name = join '_', $length, $var;
            $sub_name =~ s/s$//;

            $subs .=
                sprintf "sub %-30s { %s }\n", $sub_name, qq|"\Q$hash->{$var}{$length}\E"|;
        }
    }

    # The accessor subs and whether they should be public (1) or
    # private (0).  Private subs are prefixed with _.
    my %accessors =
	( date_before_time           => 1,
	  date_parts_order           => 1,
	  default_date_format_length => 0,
	  default_time_format_length => 0,
	  );

    while ( my ( $var, $public ) = each %accessors )
    {
        next unless $hash->{$var};

        $vars .= qq|my \$$var = "\Q$hash->{$var}\E";\n|;

        my $sub_name = $public ? $var : "_$var";

        $subs .= sprintf "sub %-30s { \$%s }\n", $sub_name, $var;
    }

    my $class  = $hash->{id};
    my $parent = $hash->{parent_id};

    my $id = $hash->{id};

    my $file = File::Spec->catfile( 'lib', 'DateTime', 'Locale', "$class.pm");

    open my $fh, ">:utf8", $file or die $!;

    print $fh <<"EOF" or die "print failed: $!";
###########################################################################
#
# This file is auto-generated by the Perl DateTime Suite time locale
# generator ($VERSION).  This code generator comes with the
# DateTime::Locale distribution in the tools/ directory, and is called
# $ScriptName.
#
# This file as generated from the CLDR XML locale data.  See the
# LICENSE.cldr file included in this distribution for license details.
#
# This file was generated from the source file $XML{$id}{source_file}.
# The source file version number was $hash->{version}, generated on
# $hash->{generation_date}.
#
# Do not edit this file directly.
#
###########################################################################

package DateTime::Locale::$class;

use strict;

BEGIN
{
    if ( \$] >= 5.006 )
    {
        require utf8; utf8->import;
    }
}

use DateTime::Locale::$parent;

\@DateTime::Locale::${class}::ISA = qw(DateTime::Locale::$parent);

$vars

$subs


1;

EOF

    close $fh or warn $!;

    unless ( $opts{name} )
    {
        print $man "$file\n";
    }
}

sub generate_catalog
{
    open my $fh, '>:utf8', File::Spec->catfile( 'lib', 'DateTime', 'LocaleCatalog.pm' )
        or die "$!";

    print $fh <<"EOF";
###########################################################################
#
# This file is auto-generated by the Perl DateTime Suite time locale
# generator ($VERSION).  This code generator comes with the
# DateTime::Locale distribution in the tools/ directory, and is called
# $ScriptName.
#
# This file as generated from the CLDR XML locale data.  See the
# LICENSE.cldr file included in this distribution for license details.
#
# Do not edit this file directly.
#
###########################################################################

package DateTime::LocaleCatalog;

use strict;

BEGIN
{
    if (\$] >= 5.006)
    {
        require utf8; import utf8;
    }
}

EOF

    my $locales_in_pod = '';

    print $fh "\@DateTime::Locale::Locales = (\n";

    my $locale_with_full_names =
        $LanguageLookup{root}{en} && length $LanguageLookup{root}{en} > 3 ? 'root' : 'en';

    for my $id ( sort keys %XML )
    {
        my ($lang, $script, $territory, $variant ) =
            $id =~ /([a-z]+)               # id
                    (?: _([A-Z][a-z]+) )?  # script - Title Case - optional
                    (?: _([A-Z]+) )?       # territory - ALL CAPS - optional
                    (?: _([A-Z]+) )?       # variant - ALL CAPS - optional
                   /x;

        my %lookup;

        $lookup{en_language} = $LanguageLookup{$locale_with_full_names}{$lang};
        $lookup{native_language} =
            $LanguageLookup{$lang}{$lang} || $LanguageLookup{$locale_with_full_names}{$lang};

        if ($script)
        {
            $lookup{en_script} = $ScriptLookup{$locale_with_full_names}{$script};

            $lookup{native_script} =
                ( exists $ScriptLookup{$lang}{$script}
                  ? $ScriptLookup{$lang}{$script}
                  : $ScriptLookup{$locale_with_full_names}{$script} );
        }

        if ($territory)
        {
            $lookup{en_territory} = $TerritoryLookup{$locale_with_full_names}{$territory};

            $lookup{native_territory} =
                ( exists $TerritoryLookup{$lang}{$territory}
                  ? $TerritoryLookup{$lang}{$territory}
                  : $TerritoryLookup{$locale_with_full_names}{$territory} );
        }

        if ($variant)
        {
            $lookup{en_variant} = ucfirst lc $variant;
        }
        else
        {
            $lookup{en_variant} = undef;
        }

        $lookup{native_variant} = $lookup{en_variant};

        print $fh qq|    { id => "\Q$id\E",\n|;
        foreach my $k ( qw( en_language en_script en_territory en_variant
                            native_language native_script native_territory native_variant
                          )
                      )
        {
            next unless defined $lookup{$k};

            my $val = qq|"\Q$lookup{$k}\E"|;
            print $fh "      $k => $val,\n";
        }

        if ( $LocalesWithoutData{$id} )
        {
            my $real_id = $LocalesWithoutData{$id};
            while ( $LocalesWithoutData{$real_id} )
            {
                $real_id = $LocalesWithoutData{$real_id};
            }

            print $fh qq|      real_class => "\Q$real_id\E",\n|;
        }

        print $fh "    },\n";

        my @pieces;
        foreach my $p ( qw( en_language en_territory en_variant ) )
        {
            push @pieces, $lookup{$p} if defined $lookup{$p};
        }

        push @pieces, "($lookup{en_script})" if defined $lookup{en_script};

        $locales_in_pod .= sprintf( " %-18s  %s\n", $id, join ' ', @pieces );
    }

    print $fh ");\n\n";

    my $aliases_in_pod = '';
    %Aliases = ( %Aliases,
                 valid_iso639_2_aliases(),
               );

    print $fh "%DateTime::Locale::Aliases = (\n";
    foreach my $id ( sort keys %Aliases )
    {
        print $fh "    $id => '$Aliases{$id}',\n";

        $aliases_in_pod .= sprintf( " %-18s  %s\n", $id, $Aliases{$id} );
    }
    print $fh ");";

    print $fh <<"EOF";


1;

__END__

=head1 NAME

DateTime::LocaleCatalog - Provides a list of all valid locale names

=head1 SYNOPSIS

See DateTime::Locale for usage details.

=head1 DESCRIPTION

This module contains a list of all known locales.

=head1 LOCALES

Any method taking locale id or name arguments should use one of the
values listed below.  Ids and names are case sensitive.

Always select the closest matching locale - for example, French
Canadians would choose fr_CA over fr - and B<always> use locale ids in
preference to names; locale ids offer greater compatibility when using
localized third party modules.

Many of the available locales are the same as other, more generic
locales for datetime information.  In that case, we simply load the
more generic class.  However, the various methods related to name,
territory, and variant return the values for the requested locale.

The available locales are:

 Locale id           Locale name
 ==================================================
$locales_in_pod

There are also many aliases available, mostly for three-letter
language codes, these are:

 Locale id           Is an alias for
 ==================================================
$aliases_in_pod

=cut
EOF

    close $fh or warn $!;
}

sub valid_iso639_2_aliases
{
    my %aliases = iso639_2_aliases();

    for my $k ( keys %aliases )
    {
        delete $aliases{$k}
            unless $XML{ $aliases{$k} };
    }

    return %aliases;
}

sub usage
{
    print <<'EOF';

This script parses the CLDR locale files and turns them into a set of
Perl modules.  It also generates the MANIFEST file.

It takes the following arguments:

  --dir      A directory containing CLDR XML files.  Required.

  --file     Parse just the file with the given name.  For debugging.

  --name     Only create the specified locale.  For debugging.

  --clean    Remove old generated modules (which may not be valid with
             the latest CLDR data)

  --quiet    Don't display any output while processing files.

  --verbose  Spew lots of output while processing.

  --help     What you are reading

If the --file or --name options are specified, the MANIFEST will not
be generated.

EOF

    exit;
}

sub iso639_2_aliases
{
    return ( 'afr'                  => 'af',
             'afr_ZA'               => 'af_ZA',
             'alb'                  => 'sq',
             'alb_AL'               => 'sq_AL',
             'amh'                  => 'am',
             'amh_ET'               => 'am_ET',
             'ara'                  => 'ar',
             'ara_AE'               => 'ar_AE',
             'ara_BH'               => 'ar_BH',
             'ara_DZ'               => 'ar_DZ',
             'ara_EG'               => 'ar_EG',
             'ara_IN'               => 'ar_IN',
             'ara_IQ'               => 'ar_IQ',
             'ara_JO'               => 'ar_JO',
             'ara_KW'               => 'ar_KW',
             'ara_LB'               => 'ar_LB',
             'ara_LY'               => 'ar_LY',
             'ara_MA'               => 'ar_MA',
             'ara_OM'               => 'ar_OM',
             'ara_QA'               => 'ar_QA',
             'ara_SA'               => 'ar_SA',
             'ara_SD'               => 'ar_SD',
             'ara_SY'               => 'ar_SY',
             'ara_TN'               => 'ar_TN',
             'ara_YE'               => 'ar_YE',
             'arm'                  => 'hy',
             'arm_AM'               => 'hy_AM',
             'arm_AM_REVISED'       => 'hy_AM_REVISED',
             'aze'                  => 'az',
             'aze_AZ'               => 'az_AZ',
             'baq'                  => 'eu',
             'baq_ES'               => 'eu_ES',
             'baq_ES_PREEURO'       => 'eu_ES',
             'bel'                  => 'be',
             'bel_BY'               => 'be_BY',
             'ben'                  => 'bn',
             'ben_IN'               => 'bn_IN',
             'bul'                  => 'bg',
             'bul_BG'               => 'bg_BG',
             'cat'                  => 'ca',
             'cat_ES'               => 'ca_ES',
             'cat_ES_PREEURO'       => 'ca_ES',
             'ces'                  => 'cs',
             'ces_CZ'               => 'cs_CZ',
             'chi'                  => 'zh',
             'chi_CN'               => 'zh_CN',
             'chi_HK'               => 'zh_HK',
             'chi_MO'               => 'zh_MO',
             'chi_SG'               => 'zh_SG',
             'chi_TW'               => 'zh_TW',
             'chi_TW_STROKE'        => 'zh_TW',
             'cor'                  => 'kw',
             'cor_GB'               => 'kw_GB',
             'cze'                  => 'cs',
             'cze_CZ'               => 'cs_CZ',
             'dan'                  => 'da',
             'dan_DK'               => 'da_DK',
             'deu'                  => 'de',
             'deu_AT'               => 'de_AT',
             'deu_AT_PREEURO'       => 'de_AT',
             'deu_BE'               => 'de_BE',
             'deu_CH'               => 'de_CH',
             'deu_DE'               => 'de_DE',
             'deu_DE_PREEURO'       => 'de_DE',
             'deu_LI'               => 'de_LI',
             'deu_LU'               => 'de_LU',
             'deu_LU_PREEURO'       => 'de_LU',
             'dut'                  => 'nl',
             'dut_BE'               => 'nl_BE',
             'dut_BE_PREEURO'       => 'nl_BE',
             'dut_NL'               => 'nl_NL',
             'dut_NL_PREEURO'       => 'nl_NL',
             'ell'                  => 'el',
             'ell_GR'               => 'el_GR',
             'ell_GR_PREEURO'       => 'el_GR',
             'eng'                  => 'en',
             'eng_AS'               => 'en_AS',
             'eng_AU'               => 'en_AU',
             'eng_BE'               => 'en_BE',
             'eng_BE_PREEURO'       => 'en_BE',
             'eng_BW'               => 'en_BW',
             'eng_BZ'               => 'en_BZ',
             'eng_CA'               => 'en_CA',
             'eng_GB'               => 'en_GB',
             'eng_GB_EURO'          => 'en_GB',
             'eng_GU'               => 'en_GU',
             'eng_HK'               => 'en_HK',
             'eng_IE'               => 'en_IE',
             'eng_IE_PREEURO'       => 'en_IE',
             'eng_IN'               => 'en_IN',
             'eng_JM'               => 'en_JM',
             'eng_MH'               => 'en_MH',
             'eng_MP'               => 'en_MP',
             'eng_MT'               => 'en_MT',
             'eng_NZ'               => 'en_NZ',
             'eng_PH'               => 'en_PH',
             'eng_SG'               => 'en_SG',
             'eng_TT'               => 'en_TT',
             'eng_UM'               => 'en_UM',
             'eng_US'               => 'en_US',
             'eng_US_POSIX'         => 'en_US_POSIX',
             'eng_VI'               => 'en_VI',
             'eng_ZA'               => 'en_ZA',
             'eng_ZW'               => 'en_ZW',
             'epo'                  => 'eo',
             'est'                  => 'et',
             'est_EE'               => 'et_EE',
             'eus'                  => 'eu',
             'eus_ES'               => 'eu_ES',
             'eus_ES_PREEURO'       => 'eu_ES',
             'fao'                  => 'fo',
             'fao_FO'               => 'fo_FO',
             'fas'                  => 'fa',
             'fas_IR'               => 'fa_IR',
             'fin'                  => 'fi',
             'fin_FI'               => 'fi_FI',
             'fin_FI_PREEURO'       => 'fi_FI',
             'fra'                  => 'fr',
             'fra_BE'               => 'fr_BE',
             'fra_BE_PREEURO'       => 'fr_BE',
             'fra_CA'               => 'fr_CA',
             'fra_CH'               => 'fr_CH',
             'fra_FR'               => 'fr_FR',
             'fra_FR_PREEURO'       => 'fr_FR',
             'fra_LU'               => 'fr_LU',
             'fra_LU_PREEURO'       => 'fr_LU',
             'fra_MC'               => 'fr_MC',
             'fre'                  => 'fr',
             'fre_BE'               => 'fr_BE',
             'fre_BE_PREEURO'       => 'fr_BE',
             'fre_CA'               => 'fr_CA',
             'fre_CH'               => 'fr_CH',
             'fre_FR'               => 'fr_FR',
             'fre_FR_PREEURO'       => 'fr_FR',
             'fre_LU'               => 'fr_LU',
             'fre_LU_PREEURO'       => 'fr_LU',
             'fre_MC'               => 'fr_MC',
             'geo'                  => 'ka',
             'geo_GE'               => 'ka_GE',
             'ger'                  => 'de',
             'ger_AT'               => 'de_AT',
             'ger_AT_PREEURO'       => 'de_AT',
             'ger_BE'               => 'de_BE',
             'ger_CH'               => 'de_CH',
             'ger_DE'               => 'de_DE',
             'ger_DE_PREEURO'       => 'de_DE',
             'ger_LI'               => 'de_LI',
             'ger_LU'               => 'de_LU',
             'ger_LU_PREEURO'       => 'de_LU',
             'gle'                  => 'ga',
             'gle_IE'               => 'ga_IE',
             'gle_IE_PREEURO'       => 'ga_IE',
             'glg'                  => 'gl',
             'glg_ES'               => 'gl_ES',
             'glg_ES_PREEURO'       => 'gl_ES',
             'glv'                  => 'gv',
             'glv_GB'               => 'gv_GB',
             'gre'                  => 'el',
             'gre_GR'               => 'el_GR',
             'gre_GR_PREEURO'       => 'el_GR',
             'guj'                  => 'gu',
             'guj_IN'               => 'gu_IN',
             'heb'                  => 'he',
             'heb_IL'               => 'he_IL',
             'hin'                  => 'hi',
             'hin_IN'               => 'hi_IN',
             'hrv'                  => 'hr',
             'hrv_HR'               => 'hr_HR',
             'hun'                  => 'hu',
             'hun_HU'               => 'hu_HU',
             'hye'                  => 'hy',
             'hye_AM'               => 'hy_AM',
             'hye_AM_REVISED'       => 'hy_AM_REVISED',
             'ice'                  => 'is',
             'ice_IS'               => 'is_IS',
             'ind'                  => 'id',
             'ind_ID'               => 'id_ID',
             'isl'                  => 'is',
             'isl_IS'               => 'is_IS',
             'ita'                  => 'it',
             'ita_CH'               => 'it_CH',
             'ita_IT'               => 'it_IT',
             'ita_IT_PREEURO'       => 'it_IT',
             'iw'                   => 'iw',
             'iw_IL'                => 'iw_IL',
             'jpn'                  => 'ja',
             'jpn_JP'               => 'ja_JP',
             'kal'                  => 'kl',
             'kal_GL'               => 'kl_GL',
             'kan'                  => 'kn',
             'kan_IN'               => 'kn_IN',
             'kat'                  => 'ka',
             'kat_GE'               => 'ka_GE',
             'kaz'                  => 'kk',
             'kaz_KZ'               => 'kk_KZ',
             'kir'                  => 'ky',
             'kir_KG'               => 'ky_KG',
             'kor'                  => 'ko',
             'kor_KR'               => 'ko_KR',
             'lav'                  => 'lv',
             'lav_LV'               => 'lv_LV',
             'lit'                  => 'lt',
             'lit_LT'               => 'lt_LT',
             'mac'                  => 'mk',
             'mac_MK'               => 'mk_MK',
             'mar'                  => 'mr',
             'mar_IN'               => 'mr_IN',
             'may'                  => 'ms',
             'may_BN'               => 'ms_BN',
             'may_MY'               => 'ms_MY',
             'mkd'                  => 'mk',
             'mkd_MK'               => 'mk_MK',
             'mlt'                  => 'mt',
             'mlt_MT'               => 'mt_MT',
             'mon'                  => 'mn',
             'mon_MN'               => 'mn_MN',
             'msa'                  => 'ms',
             'msa_BN'               => 'ms_BN',
             'msa_MY'               => 'ms_MY',
             'nld'                  => 'nl',
             'nld_BE'               => 'nl_BE',
             'nld_BE_PREEURO'       => 'nl_BE',
             'nld_NL'               => 'nl_NL',
             'nld_NL_PREEURO'       => 'nl_NL',
             'nno'                  => 'nn',
             'nno_NO'               => 'nn_NO',
             'nob'                  => 'nb',
             'nob_NO'               => 'nb_NO',
             'nor'                  => 'no',
             'nor_NO'               => 'no_NO',
             'nor_NO_NY'            => 'no_NO_NY',
             'orm'                  => 'om',
             'orm_ET'               => 'om_ET',
             'orm_KE'               => 'om_KE',
             'pan'                  => 'pa',
             'pan_IN'               => 'pa_IN',
             'per'                  => 'fa',
             'per_IR'               => 'fa_IR',
             'pol'                  => 'pl',
             'pol_PL'               => 'pl_PL',
             'por'                  => 'pt',
             'por_BR'               => 'pt_BR',
             'por_PT'               => 'pt_PT',
             'por_PT_PREEURO'       => 'pt_PT',
             'ron'                  => 'ro',
             'ron_RO'               => 'ro_RO',
             'rum'                  => 'ro',
             'rum_RO'               => 'ro_RO',
             'rus'                  => 'ru',
             'rus_RU'               => 'ru_RU',
             'rus_UA'               => 'ru_UA',
             'san'                  => 'sa',
             'san_IN'               => 'sa_IN',
             'scc'                  => 'sr',
             'scc_YU'               => 'sr_YU',
             'scr'                  => 'hr',
             'scr_HR'               => 'hr_HR',
             'slk'                  => 'sk',
             'slk_SK'               => 'sk_SK',
             'slo'                  => 'sk',
             'slo_SK'               => 'sk_SK',
             'slv'                  => 'sl',
             'slv_SI'               => 'sl_SI',
             'som'                  => 'so',
             'som_DJ'               => 'so_DJ',
             'som_ET'               => 'so_ET',
             'som_KE'               => 'so_KE',
             'som_SO'               => 'so_SO',
             'spa'                  => 'es',
             'spa_AR'               => 'es_AR',
             'spa_BO'               => 'es_BO',
             'spa_CL'               => 'es_CL',
             'spa_CO'               => 'es_CO',
             'spa_CR'               => 'es_CR',
             'spa_DO'               => 'es_DO',
             'spa_EC'               => 'es_EC',
             'spa_ES'               => 'es_ES',
             'spa_ES_PREEURO'       => 'es_ES',
             'spa_GT'               => 'es_GT',
             'spa_HN'               => 'es_HN',
             'spa_MX'               => 'es_MX',
             'spa_NI'               => 'es_NI',
             'spa_PA'               => 'es_PA',
             'spa_PE'               => 'es_PE',
             'spa_PR'               => 'es_PR',
             'spa_PY'               => 'es_PY',
             'spa_SV'               => 'es_SV',
             'spa_US'               => 'es_US',
             'spa_UY'               => 'es_UY',
             'spa_VE'               => 'es_VE',
             'sqi'                  => 'sq',
             'sqi_AL'               => 'sq_AL',
             'srp'                  => 'sr',
             'srp_YU'               => 'sr_YU',
             'swa'                  => 'sw',
             'swa_KE'               => 'sw_KE',
             'swa_TZ'               => 'sw_TZ',
             'swe'                  => 'sv',
             'swe_FI'               => 'sv_FI',
             'swe_SE'               => 'sv_SE',
             'tam'                  => 'ta',
             'tam_IN'               => 'ta_IN',
             'tat'                  => 'tt',
             'tat_RU'               => 'tt_RU',
             'tel'                  => 'te',
             'tel_IN'               => 'te_IN',
             'tha'                  => 'th',
             'tha_TH'               => 'th_TH',
             'tur'                  => 'tr',
             'tur_TR'               => 'tr_TR',
             'ukr'                  => 'uk',
             'ukr_UA'               => 'uk_UA',
             'urd'                  => 'ur',
             'urd_PK'               => 'ur_PK',
             'uzb'                  => 'uz',
             'uzb_UZ'               => 'uz_UZ',
             'vie'                  => 'vi',
             'vie_VN'               => 'vi_VN',
             'zho'                  => 'zh',
             'zho_CN'               => 'zh_CN',
             'zho_HK'               => 'zh_HK',
             'zho_MO'               => 'zh_MO',
             'zho_SG'               => 'zh_SG',
             'zho_TW'               => 'zh_TW',
             'zho_TW_STROKE'        => 'zh_TW',
           );
}
