#!/usr/bin/perl -w
#
# <@LICENSE>
# Copyright 2004 Apache Software Foundation
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# </@LICENSE>

use strict;
use warnings;
use FindBin;
use Getopt::Std;

# Lingua::Translate has a bunch of requirements (see bottom of its perldoc)
use Lingua::Translate;

# %rules and %scores from tmp/rules.pl
use vars qw { $opt_c $opt_e $opt_h $opt_n $opt_r %rules %scores };

sub usage {
  die "generate-translation language output_file

    -h       print this help
    -e STR   use STR as destination character set (using Lingua::Translate)
    -r STR   use STR as destination character set (using recode)
    -n N     translate first N rules (used for testing)
    -c DIR   use DIR as rules directory

  language should be a two letter language code from this list:

     zh: Chinese-simplified
     zt: Chinese-traditional
     nl: Dutch
     fr: French
     de: German
     el: Greek
     it: Italian
     ja: Japanese
     ko: Korean
     pt: Portuguese
     ru: Russian
     es: Spanish

  progress is displayed on standard error
";
}

getopts("hc:e:n:r:");
usage() if ($opt_h || @ARGV < 2);

# options
my $dest = shift @ARGV;
my $output = shift @ARGV;
my $cffile = $opt_c || "$FindBin::Bin/../rules";
my $enc = $opt_e || "utf8";
my $recode = $opt_r || "UTF-8";

# rule => configuration hashes
my %english;
my %old;
my %translation;

# translation cache
my %lang_cache;

# do the work
read_rules($cffile);
generate_translation();
print_translation();

sub read_rules {
  my ($cffile) = @_;

  system("$FindBin::Bin/parse-rules-for-masses -d \"$cffile\"")
      and die "unable to parse rules\n";
  require "$FindBin::Bin/tmp/rules.pl"
      or die "unable to read tmp/rules.pl\n";
}

sub generate_translation {
  my $fish = Lingua::Translate->new(src => "en",
				    dest => $dest,
				    dest_enc => $enc)
      or die "no translation server available for en -> $dest\n";

  # see if we had an old translation
  if (-f "$FindBin::Bin/../rules/30_text_$dest.cf") {
    open(OLD, "$FindBin::Bin/../rules/30_text_$dest.cf");
    while(<OLD>) {
      if (/^lang\s+$dest\s+describe\s+(\S+)\s+(.*?)\s*$/) {
	$old{$1} = "lang $dest describe $1 $2\n";
      }
    }
    close(OLD);
  }

  # try to generate new translation
  my $count = 0;
  for my $name (sort keys %rules) {
    my $lang_name = $name;
    my $lang_describe = '';
    if ($rules{$name}->{lang}) {
      next;
    }
    if (defined $rules{$name}->{describe}) {
      # translate name if it appears in the description
      my $describe = $rules{$name}->{describe};
      if ($describe =~ /$name/) {
	eval {
	  $lang_name = $fish->translate($name); # dies or croaks on error
	};
	if ($@) {
	  $lang_name = '[A-Z]+[A-Z0-9_]+[A-Z0-9]';
	}
      }
      # English version
      $english{$name} = "describe $name\t$describe\n";
      # translate description
      eval {
	if (defined $lang_cache{$describe}) {
	  $lang_describe = $lang_cache{$describe};
	}
	else {
	  # dies or croaks on error
	  $lang_describe = $fish->translate($describe);
	  $lang_describe =~ s/\s+/ /sg;
	  $lang_describe =~ s/ $//g;
	  $lang_cache{$describe} = $lang_describe;
	}
      };
      # didn't work
      if ($@) {
	print STDERR "x";
      }
      else {
	$lang_describe =~ s/$lang_name/$name/;
	$translation{$name} = "lang $dest describe $name\t$lang_describe\n";
	print STDERR ".";
      }
      $count++;
      last if ($opt_n && $count == $opt_n);
    }
  }
  print STDERR "\n" if $count > 0;
}

sub print_translation {
  my $charset = $enc;
  if ($opt_r) {
    $charset = $recode;
  }
  $charset =~ s/utf-?8/utf-8/i;

  open(OUTPUT, "> $output");
  print OUTPUT <<'EOF';
#
# Please don't modify this file as your changes will be overwritten with
# the next update. Use @@LOCAL_RULES_DIR@@/local.cf instead.
# See 'perldoc Mail::SpamAssassin::Conf' for details.
#
# <@LICENSE>
# Copyright 2004 Apache Software Foundation
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# </@LICENSE>
#
###########################################################################

# character set used in the following texts
EOF
  print OUTPUT <<EOF;
lang $dest report_charset $charset

# make me pretty
EOF
  open(MISC, "$FindBin::Bin/../rules/10_misc.cf");
  while (<MISC>) {
    if (/^(?:clear_report_template|report|clear_unsafe_report_template|unsafe_report)\b/) {
      print OUTPUT "lang $dest $_";
    }
  }

  print OUTPUT "\n\n";

  for (sort keys %english) {
    print OUTPUT "# $english{$_}";
    print OUTPUT "# $translation{$_}" if $translation{$_};
    print OUTPUT "# $old{$_}" if $old{$_};
    print OUTPUT "\n";
  }

  system("/usr/bin/recode $enc..$recode $output") if $opt_r;
}
