use warnings;
use strict;
use ExtUtils::MakeMaker;
use FileHandle;

open my $words_tagger, '>', 'Lingua/DE/Tagger/words.yml';
print $words_tagger "--- #YAML:1.0\n";

open my $dict, '<', 'lang_de/word_types.taggerinput';
while (my $line = <$dict>) {
	chomp $line;
	$line =~ s/\r|\n//igm;
	next if $line =~ / /;
	next if !$line;

	next if $line =~ /[|]-($|[|])/;
	$line =~ s/[,]//igm;

	$line =~ s/[|]/: { /im;
	$line .= ': 1 }';
	$line =~ s/ n: / nn: /igm;
	next if $line =~ / nn: /;
	$line =~ s/ prep: / in: /igm;
	$line =~ s/ vt: / vb: /igm;
	$line =~ s/ adj: / jj: /igm;
	
	next if $line =~ /nn:/i;

	print $words_tagger $line . "\n" if $line =~ /[{].*?[}]/i;
#	print $line . "\n";
}
close $dict;
close $words_tagger;

opendir my $tagger_dir, 'Lingua/DE/Tagger';
foreach my $file ( grep { /[.]hash$/ } readdir $tagger_dir ) {
	unlink 'Lingua/DE/Tagger/' . $file;
}
closedir $tagger_dir;

my %_LEXICON;
my %_HMM;

sub install {
        use Storable;
        use File::Spec;
        my $lex_dir = 'Lingua/DE/Tagger';
        my $word_path = File::Spec->catfile( $lex_dir, 'pos_words.hash' );
	print "$word_path\n";
        my $tag_path = File::Spec->catfile( $lex_dir, 'pos_tags.hash' );
        
        unless( -f $word_path and -f $tag_path ){
                print "Creating part-of-speech lexicon\n";
                _load_tags( File::Spec->catfile( $lex_dir, 'tags.yml' ) );
                _load_words( File::Spec->catfile( $lex_dir, 'words.yml' ) );
                _load_words( File::Spec->catfile( $lex_dir, 'unknown.yml' ) );
                store \%_LEXICON, $word_path;
                store \%_HMM, $tag_path;
        }

        if( -f $word_path and -f $tag_path ){
                return 1;
        } else {
                return 0;
        }
}

sub _load_words {
        my ( $file ) = @_;
	print "$file\n";
        my $fh = new FileHandle $file;
        while ( <$fh> ){
                my ( $key, $data ) = m/^"?([^{"]+)"?: { (.*) }/;
                next unless $key and $data;
#		print "$data\n";
                my %tags = split /[:,]\s+/, $data;
                foreach( keys %tags ){
                        $_LEXICON{$key}{$_} = \$tags{$_};
                }
        }
        $fh->close;
}

sub _load_tags {
        my ( $file ) = @_;
	print "$file\n";
        my $fh = new FileHandle $file;
        while ( <$fh> ){
                my ( $key, $data ) = m/^"?([^{"]+)"?: { (.*) }/;
                next unless $key and $data;
                my %tags = split /[:,]\s+/, $data;
                foreach( keys %tags ){
                        $_HMM{$key}{$_} = $tags{$_};
                }
        }
        $fh->close;
}

install();

