#! /usr/bin/perl -w
# $Id: isspam,v 1.1 2001/07/27 18:27:42 parkerpine Exp $

use strict;
#use lib "../../";
use Mail::MboxParser;
use Getopt::Long;

my %option;
GetOptions(\%option, 'all', 'dollar', 'upper', 'capital', 'suspicious',
					 'total');
my $all = $option{all};

my $mbox = Mail::MboxParser->new($ARGV[0]);
my @mails = $mbox->get_messages;
my @bodies = map ($_->body, @mails);
my @words  = map { split /\s|\n/, $_ } @bodies;
my $num_words = scalar @words;

my ($w_cap, $w_upp, $w_sus, $w_dol);

#-------------------------
# check for various things
#-------------------------

# capitalized words
if ($option{capital} or $all) {
	$w_cap  = (my @w_cap = grep(/^[A-Z][a-z]+$/, @words));
	print "-----\n";
	print_nice("capitalized words", $w_cap);
	print join(" ", @w_cap), "\n\n";
}

# uppercase words
if ($option{upper} or $all) {
	$w_upp  = (my @w_upp = grep(/^[A-Z]+$/, @words));
	print "-----\n";
	print_nice("uppercase words", $w_upp);
	print join(" ", @w_upp), "\n\n";
}

# suspicious words
if ($option{suspicious} or $all) {
	map chomp, (my @w = <DATA>);
	my $pat = join "|", @w;
	$w_sus  = (my @w_sus = grep(/^($pat)$/i, @words));
	print "-----\n";
	print_nice("suspicious words", $w_sus);
	print join(" ", @w_sus), "\n\n";
}

# dollars in prices
if ($option{dollar} or $all) {
	$w_dol  = (my @w_dol = grep(/^\$$|^\$\d+\.?\d*$|^\d+\.?\d*\$$/, @words));
	print "-----\n";
	print_nice("dollars", $w_dol);
	print join("\t", @w_dol), "\n\n";
}

# computer classification
my $classify =  ($w_dol/$num_words*100 * 5 + 
				 $w_sus/$num_words*100 * 20 + 
				 $w_upp/$num_words*100 * 5 + 
				 $w_cap/$num_words*100) / 31;
printf "***\nOverall classification: %.3f\n***\n", $classify;


sub print_nice {
	my ($what, $num) = @_;
	printf "%d $what found: %.3f%%\n",
			$num, ($num/$num_words*100);
}

__DATA__
pics
video
porn
.*porno.*
.*sex.*
pamela
kournikova
fucking
blowjob
suck
