#!/usr/bin/perl -s

use v5.10;
use strict;
use warnings;

use XML::TMX::Reader;

our (
     $junk, # remove if one of the languages just have junk
     $output,  # output filename
     $v, $verbose
    );

my $cleaned = 0;
my $processed = 0;
my $tmx = shift or help();
my $reader = XML::TMX::Reader->new($tmx);

$junk//=1;
$output ||= "_cleaned_$tmx";

print STDERR "loading..." if $v;

$reader->for_tu( {output => $output},
                  \&cleaner);

printf STDERR "\rRemoved %d/%d (%.3f%%).\n", 
              $cleaned, $processed, 100*$cleaned/$processed if $v;

sub cleaner {
    my $langs = shift;
    $processed++;
    my $remove = 0;
    for my $k (keys %$langs) {
        next if $k =~ /^-/;

        $remove = 1 if $junk  && $langs->{$k} =~ /^[-.,0-9\s]+$/;
        $remove = 1 if $junk  && $langs->{$k} =~ /^\W+$/;
    }
    $cleaned++ if $remove;
    printf STDERR "\rRemoved %d/%d (%.3f%%)...", $cleaned, $processed,
      100*$cleaned/$processed if $v && $processed%1000==0;
    return $remove ? undef : $langs;
}

sub help {
    print "   tmxclean [-junk=1] <file.tmx>\n";
    exit 1;
}

=encoding UTF-8

=head1 NAME

tmx-clean - clean TMX files ???

=head1 SYNOPSIS

   $ tmx-clean file.tmx  # ???

=head1 DESCRIPTION

???

=head1 SEE ALSO

XML::TMX

=cut
