#!/usr/bin/perl -w

# use this to  debug the SORatio -> score-range mapping:
#for $rat (0.1, 0.3, 0.5, 0.7, 1.0) {
#my ($lo, $hi) = ratio_in_0to1_to_range($rat); warn "JMD $rat $lo $hi";
#} die;

my $argcffile = shift @ARGV;

my %freq_spam = ();
my %freq_nonspam = ();

my $num_spam;
my $num_nonspam;
my $num_total;

my %mutable_tests = ();

if (!defined $argcffile) { $argcffile = "../rules"; }
system ("./parse-rules-for-masses -d \"$argcffile\"") and die;
require "./tmp/rules.pl";

while (<>) {
  /^\s*([\d\.]+)\s+([\d\.]+)\s+([\d\.]+)\s+([\d\.]+)\s+\S+\s+(.+)\s*$/ or next;

  my $overall = $1+0;
  my $spam = $2+0;
  my $nonspam = $3+0;
  my $soratio = $4+0;
  my $test = $5;

  if ($test eq '(all messages)') {
    $num_spam = $spam;
    $num_nonspam = $nonspam;
    $num_total = $spam+$nonspam;
    next;
  }
  next if ($test eq '(all messages as %)');

  if (!defined ($rules{$test})) {
    warn "rule $test no longer exists; ignoring\n";
    next;
  }

  $freq{$test} = $overall;
  $freq_spam{$test} = $spam;
  $freq_nonspam{$test} = $nonspam;

  my $tflags = $rules{$test}->{tflags}; $tflags ||= '';
  if ($tflags =~ /\b(?:net|userconf)\b/) {
    $mutable_tests{$test} = 0;
  } else {
    $mutable_tests{$test} = 1;
  }

  if ($overall < 0.01) {        # less than 0.01% of messages were hit
    $mutable_tests{$test} = 0;
    $soratio{$test} = 0;

  } else {
    if ($nonspam == 0) {
      $nonspam = 0.001;	# avoid / by 0
      $spam *= 20;	# give the spam score a bonus to make up
    }

    # "nice" tests should always match more nonspam than spam. 
    # the figure so that 0.0 = bad, 1.0 = good, instead of the reverse.
    #
    if ($tflags =~ /\bnice\b/) {
      $soratio = 1.0 - $soratio;
    }

    $soratio{$test} = $soratio;
  }
}

system ("mkdir tmp >/dev/null 2>&1");
open (OUT, ">tmp/ranges.data");
foreach my $test (sort { $soratio{$b} <=> $soratio{$a} } keys %freq) {
  if (!defined ($rules{$test})) {
    warn "no rule $test";
    print OUT ("0 0 0 $test\n");
    next;
  }

  my $overall = $freq{$test};
  my $spam = $freq_spam{$test};
  my $nonspam = $freq_nonspam{$test};
  my $soratio = $soratio{$test};
  my $mutatable = $mutable_tests{$test};
  my $tflags = $rules{$test}->{tflags}; $tflags ||= '';

  if (!$mutatable) {
    printf OUT ("%3.1f %3.1f 0 $test\n",
                         $rules{$test}->{score},
                         $rules{$test}->{score});
    next;
  }

  # now we have a number between 0.0 and 1.0 indicating how
  # effective the test is. Come up with a reasonable range
  # for scores based on this.
  my ($lo, $hi) = ratio_in_0to1_to_range($soratio,
                ($tflags =~ /\bnice\b/));

  printf OUT ("%3.1f %3.1f 1 $test\n", $lo, $hi);
  #printf "range: %3.1f %3.1f $test ($spam / $nonspam = $soratio)\n",
  #$lo, $hi;
}
close OUT;
exit;

sub ratio_in_0to1_to_range {
  my $ratio = shift;
  my $is_nice = shift;

  # maps 1.0 to a range between 0.0001 and 4.0,
  # and 0.01 to a range between 0.0001 and 1.0.
  my $hi_lo = 1.0 / 0.0001;
  my $hi_hi = 1.0 / 5.0;
  my $lo_lo = 0.1 / 0.0001;
  my $lo_hi = 0.1 / 1.0;

  ## maps 1.0 to a range between 0.0001 and 4.0,
  ## and 0.01 to a range between 0.0001 and 1.0.
  #my $hi_lo = 1.0 / 0.0001;
  #my $hi_hi = 1.0 / 4.0;
  #my $lo_lo = 0.1 / 0.0001;
  #my $lo_hi = 0.1 / 1.0;

  my $lo_diff = abs($lo_lo - $hi_lo);
  my $hi_diff = abs($lo_hi - $hi_hi);

  my $lo = ($ratio / ($ratio * $lo_diff + $lo_lo));
  my $hi = ($ratio / ($ratio * $hi_diff + $lo_hi));
  
  if ($hi == 0) {
    $hi = 1.0;          # some "wiggle room"
  }
  
  if ($is_nice) { 
    # "nice" scores are negative, for obvious reasons. make
    # it so, and swap around so lo < hi. also allow nice scores
    # to go lower, so double the range.
    my $tmp = $lo;
    $lo = $hi * -2.0;
    $hi = $tmp * -2.0;
  }

  ($lo, $hi);
}

