package Lingua::LO::NLP::Romanize;
use strict;
use warnings;
use 5.012000;
use utf8;
use version 0.77; our $VERSION = version->declare('v0.0.1');
use Carp;
use Scalar::Util 'blessed';
use Lingua::LO::NLP::Syllabify;

=encoding UTF-8

=head1 NAME

Lingua::LO::NLP::Romanize - Romanize Lao syllables

=head1 FUNCTION

This s a factory class for Lingua::LO::NLP::Romanize::*. Currently there
is only L<Lingua::LO::NLP::Romanize::PCGN> but other variants are
planned.

=head1 SYNOPSIS

    my $o = Lingua::LO::NLP::Romanize->new(
        variant => 'PCGN',
        hyphen => 1,
    );

=cut

=head1 METHODS

=head2 new

See L</SYNOPSIS> on how to use the constructor. Arguments supported are:

=over 4

=item C<variant>: standard according to which to romanize. "PCGN" is the only
one currently implemented.

=item C<hyphen>: separate runs of Lao syllables with hyphens. Set this to the
character you would like to use as a hyphen - usually this will be the ASCII
"hyphen minus" (U+002D) but it can be the unambiguous Unicode hyphen ("‐",
U+2010), a slash or anything you like. As a special case, you can pass a 1 to
use the ASCII version. If this argument is missing or C<undef>, blanks are
used. Syllables duplicated using "ໆ" are always joined with a hyphen: either
the one you specify or the ASCII one.

=back

=cut

sub new {
    my ($class, %args) = @_;

    # Allow subclasses to omit a constructor
    return bless {}, $class if $class ne __PACKAGE__;

    # If we've been called on Lingua::LO::NLP::Romanize, require a variant
    my $variant = delete $args{variant} or confess("`variant' arg missing");
    my $hyphen = delete $args{hyphen} // ' '; # blanks are default

    my $subclass = __PACKAGE__ . "::$variant";
    (my $module = $subclass) =~ s!::!/!g;
    require "$module.pm";

    my $self = $subclass->new(%args);

    # Use an ASCII hyphen-minus if $hyphen is 1
    $self->{hyphen} = $hyphen eq 1 ? '-' : $hyphen;

    return $self;
}

=head2 romanize

    romanize( $text )

Return the romanization of C<$text> according to the standard passed to the
constructor. Text is split up by
L<Lingua::LO::NLP::Syllabify/get_fragments>; Lao syllables are processed
and everything else is passed through unchanged save for possible conversion of
combining characters to a canonically equivalent form in
L<Unicode::Normalize/NFC>.

=cut

sub romanize {
    my ($self, $text) = @_;
    my $result = '';

    my @frags = Lingua::LO::NLP::Syllabify->new( $text )->get_fragments;
    while(@frags) {
        my @lao;
        push @lao, shift @frags while @frags and $frags[0]->{is_lao};
        $result .= join($self->{hyphen}, map { $self->romanize_syllable( $_->{text} ) } @lao);
        $result .= (shift @frags)->{text} while @frags and not $frags[0]->{is_lao};
    }
    return $result;
}

=head2 romanize_syllable

    romanize_syllable( $syllable )

Return the romanization of a single C<$syllable> according to the standard passed to the
constructor. This is a virtual method that must be implemented by subclasses.

=cut

sub romanize_syllable {
    my $self = shift;
    ref $self or die "romanize_syllable is not a class method";
    die blessed($self) . " must implement romanize_syllable()";
}

1;

