#!/usr/bin/perl -w

my %freq_spam = ();
my %freq_nonspam = ();
my $num_spam;
my $num_nonspam;

readlogs();
readscores();

  printf "%10s  %10s  %10s  %s\n",
  	"OVERALL", "SPAM", "NONSPAM", "NAME";

  printf "%10d  %10d  %10d  (all messages)\n",
  	$num_spam + $num_nonspam, $num_spam, $num_nonspam;

foreach my $test (sort { 
		  (defined ($freq_spam{$b}) ? $freq_spam{$b} : 0) <=>
		  (defined ($freq_spam{$a}) ? $freq_spam{$a} : 0)
		} keys %freq)
{
  $freq_spam{$test} ||= 0;
  $freq_nonspam{$test} ||= 0;
  printf "%10d  %10d  %10d  %s\n",
  	$freq{$test}, $freq_spam{$test}, $freq_nonspam{$test}, $test;
}
exit;

sub readlogs {
  foreach my $file ("spam.log", "nonspam.log") {
    open (IN, "<$file");

    my $isspam = 0; ($file eq 'spam.log') and $isspam = 1;

    while (<IN>) {
      /^.\s+(\d+)\s+(\S+)\s*(\S*)/ or next;

      if ($isspam) {
	$num_spam++;
      } else {
	$num_nonspam++;
      }
      my $hits = $1;

      $_ = $3; s/,,+/,/g;
      my @tests = split (/,/, $_);
      foreach my $t (@tests) {
	next if ($t eq '');
	$freq{$t}++;
	if ($isspam) {
	  $freq_spam{$t}++;
	} else {
	  $freq_nonspam{$t}++;
	}
      }
    } 
    close IN;
  }
}


sub readscores {
	my @files = <../rules/[0-9]*.cf>;
	my $file;
	foreach $file (@files) {
  open (IN, "<$file");
  while (<IN>) {
    s/#.*$//g; s/^\s+//; s/\s+$//;

    if (/^(header|body|full)\s+(\S+)\s+/) {
      $freq{$2} ||= 0;
      $freq_spam{$2} ||= 0;
      $freq_nonspam{$2} ||= 0;
    } elsif (/^score\s+(\S+)\s+(.+)$/) {
      $freq{$1} ||= 0;
      $freq_spam{$1} ||= 0;
      $freq_nonspam{$1} ||= 0;
    }
  }
  close IN;
  }
}

