#! /usr/local/bin/perl -- # -*-Perl-*-  $Revision: 1.3 $
#
# Scan the INN log file and produce a summary of article processing.
# Written by Mike Cooper <mcooper@usc.edu>

# =()<$INNLogFile = "@<_PATH_LOGFILE>@";>()=
$INNLogFile = "/var/log/news/news";
$Prog = "innreport";
$| = 1;
$Top = 10;

for ($I = 0; $I <= $#ARGV; $I++) {
    $arg = $ARGV[$I];
    if ($arg eq "-innlog") {
	if (++$I > $#ARGV) {
	    $ArgReq = 1;
	} else {
	    $INNLogFile = $ARGV[$I];
	}
    } elsif ($arg eq "-top") {
	if (++$I > $#ARGV) {
	    $ArgReq = 1;
	} else {
	    $Top = $ARGV[$I];
	}
    } else {
	printf STDERR "Unknown option \"%s\".\n", $arg;
	do Usage();
	exit(1);
    }
    if ($ArgReq) {
	printf STDERR "Argument required for \"%s\" option.\n", $arg;
	do Usage();
	exit(1);
    }
}

#
# The main things we do
#
do GatherINNLog($INNLogFile);
do Report();

exit(0);

#
# Subroutines
#

sub Usage {
    printf STDERR "usage: %s [ -innlog <file> ] [ -top <num> ]\n", $Prog;
}

#
# Gather data from the inn log file
#
sub GatherINNLog {
    local($File) = $_[0];

    $StartTime = "";
    $EndTime = "";

    if (!open(IN, "< $File")) {
	printf STDERR "%s: open failed: %s.\n", $File, $!;
	return;
    }

    while (<IN>) {
	chop;
	@Fields = split;
	if ($StartTime eq "") {
	    $StartTime = do GetTime(@Fields);
	} else {
	    $EndTime = do GetTime(@Fields);
	}

	$Status = $Fields[3];
	$RHost = $Fields[4];
	$Hosts{$RHost} = $RHost;

	if ($Status eq "+") {	# Article accepted
	    $Accept{$RHost}++;
	    for ($i = 6; $i <= $#Fields; $i++) {
		$Sent{$Fields[$i]}++;
	    }
	} elsif ($Status eq "-") { # Article Rejected
	    $Reject{$RHost}++;
	    if ($Top != 0) {
		do CheckReason(@Fields);
	    }
	} elsif ($Status eq "j") { # Article Junked
	    $Junked{$host}++;
	} else {
	    $Unknown{$_} = $_;
	    $TotalUnknown++;
	}
    }
    close(IN);

    foreach $v (values %Accept) {
	$TotalAccept += $v;
    }
    foreach $v (values %Reject) {
	$TotalReject += $v;
    }
    foreach $v (values %Junked) {
	$TotalJunked += $v;
    }
    foreach $v (values %DupArt) {
	$TotalDupArt += $v;
    }
    foreach $v (values %Sent) {
	$TotalSent += $v;
    }
}

#
# Main report function
#
sub Report {
    printf "\n";
    printf "                    *** USENET News Summary Report ***\n";
    printf "                    ***  From %s  ***\n", $StartTime;
    printf "                    ***    To %s  ***\n\n", $EndTime;

    do ReportProcessing();
    if ($Top != 0) {
	do ReportReasons();
    }
}

#
# Report on article processing
#
sub ReportProcessing {
    if (!($#Hosts)) {
	return;
    }

    printf "Summary of Incoming Article Processing\n\n";
    printf "%-25s %8s %4s %8s %4s %8s %7s %7s\n",
	"Host", "Accept", "Pct", "Reject", "Pct", "Sent", "Junked", "Dup";

    foreach $Host (sort keys(%Hosts)) {
	if ($Accept{$Host} || $Reject{$Host}) {
	    $PctReject = ($Reject{$Host} / 
			  ($Reject{$Host} + $Accept{$Host})) * 100;
	    $PctAccept = ($Accept{$Host} / 
			  ($Reject{$Host} + $Accept{$Host})) * 100;
	}
	if ($Host ne "") {
	    printf "%-25s %8d %3d%% %8d %3d%% %8d %7d %7d\n",
		$Host, $Accept{$Host}, $PctAccept,
	    	$Reject{$Host}, $PctReject, $Sent{$Host}, 
	    	$Junked{$Host}, $DupArt{$Host};
	}
    }

    if ($TotalAccept || $TotalReject) {
	$TotalPctReject = ($TotalReject / ($TotalAccept + $TotalReject)) * 100;
	$TotalPctAccept = ($TotalAccept / ($TotalAccept + $TotalReject)) * 100;
    }

    printf "\n%-25s %8d %3d%% %8d %3d%% %8d %7d %7d\n\n",
    	"TOTALS", $TotalAccept, $TotalPctAccept, $TotalReject, 
        $TotalPctReject, $TotalSent,
	$TotalJunked, $TotalDupArt;
}

#
# Get a nice time string.
#
sub GetTime {
    local(@Fields) = @_;

    return(sprintf("%s %2s %s", $Fields[0], $Fields[1], $Fields[2]));
}

#
# Check the reason why the article was not accepted.
#
sub CheckReason {
    local(@Fields) = @_;

    $PostHost = $Fields[5];
    if ($PostHost =~ /.*@.*/) {
	$PostHost =~ s/.*@//;
	$PostHost =~ s/>//;
	$PostHost =~ s/ //g;
    }
    $FromHost = $Fields[4];

    if ($Fields[7] . $Fields[8] eq "Unwanteddistribution") {
	$dist = $Fields[9];
	$dist =~ s/\"//g;
	$UnwantedDist{$dist}++;
	$UnwantedDistHost{$PostHost} = $dist;
	$TotalUnwantedDist++;
    } elsif ($Fields[7] eq "Missing") {
	local($str) = $Fields[8];
	$str =~ s/\"//g;
	$MissHdr{$str}++;
	$MissHdrHost{$PostHost} = $str;
	$TotalMissHdr++;
    } elsif ($Fields[7] . $Fields[9] eq "Duplicateheader") {
	local($name) = $Fields[8];
	$name =~ s/\"//g;
	$DupHdr{$name} = $name;
	$DupHdrHost{$PostHost} = $name;
	$TotalDupHdr++;
    } elsif ($Fields[7] . $Fields[9] eq "Badheader") {
	local($name) = $Fields[8];
	$name =~ s/\"//g;
	$BadHdr{$name}++;
	$BadHdrHost{$PostHost} = $name;
	$TotalBadHdr++;
    } elsif ($Fields[7] eq "Duplicate") {
	local($h) = $Fields[4];
	$DupArt{$h}++;
    } elsif ($Fields[7] eq "Unwanted") {
	local($name) = $Fields[8];
	$name =~ s/\"//g;
	$UnWanted{$name}++;
	$TotalUnWanted++;
    } elsif ($Fields[7] eq "Unapproved") {
	local($name) = $Fields[9];
	$name =~ s/\"//g;
	$UnApproved{$name}++;
	$UnApprovedHost{$PostHost} = $name;
	$TotalUnApproved++;
    } elsif ($Fields[7] eq "Linecount") {
	$LineCount{$PostHost}++;
	$TotalLineCount++;
    } elsif ($Fields[7] . $Fields[8] eq "Tooold") {
	$TooOld{$PostHost}++;
	$TotalTooOld++;
    } elsif ($Fields[7] . $Fields[11] eq "Articlefuture") {
	$InFuture{$PostHost}++;
	$TotalInFuture++;
    } elsif ($Fields[7] . $Fields[8] eq "Nocolon-space") {
	$NoColonSpace{$PostHost}++;
	$TotalNoColonSpace++;
    } elsif ($Fields[7] . $Fields[8] . $Fields[9] eq "Morethanone") {
	$MultiAmper{$PostHost}++;
	$TotalMultiAmper++;
    } elsif ($Fields[7] . $Fields[8] . $Fields[9] . $Fields[10] eq 
	     "\"Message-ID\"headertoolong") {
	$MsgIDTooLong++;
    } elsif ($Fields[7] . $Fields[8] . $Fields[9] . $Fields[10] eq 
	     "Spacein\"Newsgroups\"header") {
	$NGSpace{$PostHost}++;
	$TotalNGSpace++;
    } elsif ($Fields[7] . $Fields[8] eq "No'@'") {
	$NoAmper{$PostHost}++;
	$TotalNoAmper++;
    } elsif ($Fields[7] . $Fields[8] eq "Articleexceeds") {
	$TooLarge{$PostHost}++;
	$TotalTooLarge++;
    } elsif ($Fields[7] . $Fields[8] eq "Baddate") {
	$BadDate{$PostHost}++;
	$TotalBadDate++;
    } elsif ($Fields[7] . $Fields[8] eq "Badexpiration") {
	$BadExpire{$PostHost}++;
	$TotalBadExpire++;
    } elsif ($Fields[7] . $Fields[8] eq "Nobody") {
	$NoBody{$FromHost}++;
	$TotalNoBody++;
    } else {
	local($l) = join(' ', @Fields);
	$Unknown{$l} = $l;
	$TotalUnknown++;
    }
}

#
# Report error messages
#
sub ReportReasons {
    printf "\nSummary of Errors\n";

    if ($TotalUnwantedDist > 0) {
	printf "\n";
	if ($Top > 0) {
	    printf "Top %d ", $Top;
	}
	printf "Unwanted Distributions [Total %d]:\n", $TotalUnwantedDist;
	printf "%-30s %3s %s\n", "Distribution", "Num", "Posting Hosts";

	$OffSet = 35;
	local(@Sorted) = sort(ByUnwantedDist keys(%UnwantedDist));
	for ($c = 0; $c <= $#Sorted && ($Top < 0 || $c < $Top); ++$c) {
	    $Width = $OffSet;
	    $dist = $Sorted[$c];
	    printf "%-30s %3d ", $dist, $UnwantedDist{$dist};
	    foreach $host (sort(keys %UnwantedDistHost)) {
		if ($UnwantedDistHost{$host} eq $dist) {
		    if ($Width + length($host) > 78) {
			printf "\n%35s", "";
			$Width = $OffSet + length($host)+1;
		    } else {
			$Width += length($host)+1;
		    }
		    printf "%s ", $host;
		}
	    }
	    printf "\n";
	}
    }

    if ($TotalUnWanted > 0) {
	printf "\n";
	if ($Top > 0) {
	    printf "Top %d ", $Top;
	}
	printf "UnWanted Groups [Total %d]:\n", $TotalUnWanted;
	printf "%-30s %8s\n", "Group", "Count";
	local(@Sorted) = sort(ByUnWanted keys %UnWanted);
	for ($c = 0; $c <= $#Sorted && ($Top < 0 || $c < $Top); ++$c) {
	    local($name) = $Sorted[$c];
	    printf "%-30s %8d\n", $name, $UnWanted{$name};
	}
    }

    if ($TotalMissHdr > 0) {
	printf "\n";
	if ($Top > 0) {
	    printf "Top %d ", $Top;
	}
	printf "Missing Headers [Total %d]:\n", $TotalMissHdr;
	printf "%-20s %3s %s\n", "Header", "Num", "Posting Hosts";

	$OffSet = 25;
	local(@Sorted) = sort(ByMissHdr keys(%MissHdr));
	for ($c = 0; $c <= $#Sorted && ($Top < 0 || $c < $Top); ++$c) {
	    $Width = $OffSet;
	    local($Name) = $Sorted[$c];
	    printf "%-20s %3d ", $Name, $MissHdr{$Name};
	    foreach $host (sort(keys %MissHdrHost)) {
		if ($MissHdrHost{$host} eq $Name) {
		    if ($Width + length($host) > 78) {
			printf "\n%25s", "";
			$Width = $OffSet + length($host)+1;
		    } else {
			$Width += length($host)+1;
		    }
		    printf "%s ", $host;
		}
	    }
	    printf "\n";
	}
    }

    if ($TotalDupHdr > 0) {
	printf "\n";
	if ($Top > 0) {
	    printf "Top %d ", $Top;
	}
	printf "Duplicate Headers [Total %d]:\n", $TotalDupHdr;
	printf "%-20s %3s %s\n", "Header", "Num", "Posting Hosts";

	$OffSet = 25;
	local(@Sorted) = sort(ByDupHdr keys(%DupHdr));
	for ($c = 0; $c <= $#Sorted && ($Top < 0 || $c < $Top); ++$c) {
	    $Width = $OffSet;
	    local($Name) = $Sorted[$c];
	    printf "%-20s %3d ", $Name, $DupHdr{$Name};
	    foreach $host (sort(keys %DupHdrHost)) {
		if ($DupHdrHost{$host} eq $Name) {
		    if ($Width + length($host) > 78) {
			printf "\n%25s", "";
			$Width = $OffSet + length($host)+1;
		    } else {
			$Width += length($host)+1;
		    }
		    printf "%s ", $host;
		}
	    }
	    printf "\n";
	}
    }

    if ($TotalBadHdr > 0) {
	printf "\n";
	if ($Top > 0) {
	    printf "Top %d ", $Top;
	}
	printf "Bad Headers [Total %d]:\n", $TotalBadHdr;
	printf "%-20s %3s %s\n", "Header", "Num", "Posting Hosts";

	$OffSet = 25;
	local(@Sorted) = sort(ByBadHdr keys(%BadHdr));
	for ($c = 0; $c <= $#Sorted && ($Top < 0 || $c < $Top); ++$c) {
	    $Width = $OffSet;
	    local($Name) = $Sorted[$c];
	    printf "%-20s %3d ", $Name, $BadHdr{$Name};
	    foreach $host (sort(keys %BadHdrHost)) {
		if ($BadHdrHost{$host} eq $Name) {
		    if ($Width + length($host) > 78) {
			printf "\n%25s", "";
			$Width = $OffSet + length($host)+1;
		    } else {
			$Width += length($host)+1;
		    }
		    printf "%s ", $host;
		}
	    }
	    printf "\n";
	}
    }

    if ($TotalUnApproved > 0) {
	printf "\n";
	if ($Top > 0) {
	    printf "Top %d ", $Top;
	}
	printf "UnApproved Postings [Total %d]:\n", $TotalUnApproved;
	printf "%-20s %3s %s\n", "Group", "Num", "Posting Hosts";

	$OffSet = 25;
	local(@Sorted) = sort(ByUnApproved keys(%UnApproved));
	for ($c = 0; $c <= $#Sorted && ($Top < 0 || $c < $Top); ++$c) {
	    $Width = $OffSet;
	    local($Name) = $Sorted[$c];
	    printf "%-20s %3d ", $Name, $UnApproved{$Name};
	    foreach $host (sort(keys %UnApprovedHost)) {
		if ($UnApprovedHost{$host} eq $Name) {
		    if ($Width + length($host) > 78) {
			printf "\n%25s", "";
			$Width = $OffSet + length($host)+1;
		    } else {
			$Width += length($host)+1;
		    }
		    printf "%s ", $host;
		}
	    }
	    printf "\n";
	}
    }

    if ($TotalBadDate > 0) {
	printf "\n";
	if ($Top > 0) {
	    printf "Top %d ", $Top;
	}
	printf "Hosts with a Bad Date [Total %d]:\n", $TotalBadDate;
	printf "%-30s %8s\n", "Posting Host", "Count";
	local(@Sorted) = sort(ByBadDate keys %BadDate);
	for ($c = 0; $c <= $#Sorted && ($Top < 0 || $c < $Top); ++$c) {
	    local($name) = $Sorted[$c];
	    printf "%-30s %8d\n", $name, $BadDate{$name};
	}
    }

    if ($TotalBadExpire > 0) {
	printf "\n";
	if ($Top > 0) {
	    printf "Top %d ", $Top;
	}
	printf "Hosts with a Bad Expiration Date [Total %d]:\n", 
		$TotalBadExpire;
	printf "%-30s %8s\n", "Posting Host", "Count";
	local(@Sorted) = sort(ByBadExpire keys %BadExpire);
	for ($c = 0; $c <= $#Sorted && ($Top < 0 || $c < $Top); ++$c) {
	    local($name) = $Sorted[$c];
	    printf "%-30s %8d\n", $name, $BadExpire{$name};
	}
    }

    if ($TotalInFuture > 0) {
	printf "\n";
	if ($Top > 0) {
	    printf "Top %d ", $Top;
	}
	printf "Hosts with Articles in the Future [Total %d]:\n", 
		$TotalInFuture;
	printf "%-30s %8s\n", "Posting Host", "Count";
	local(@Sorted) = sort(ByInFuture keys %InFuture);
	for ($c = 0; $c <= $#Sorted && ($Top < 0 || $c < $Top); ++$c) {
	    local($name) = $Sorted[$c];
	    printf "%-30s %8d\n", $name, $InFuture{$name};
	}
    }

    if ($TotalTooOld > 0) {
	printf "\n";
	if ($Top > 0) {
	    printf "Top %d ", $Top;
	}
	printf "Hosts with Too Old Articles [Total %d]:\n", $TotalTooOld;
	printf "%-30s %8s\n", "Posting Host", "Count";
	local(@Sorted) = sort(ByTooOld keys %TooOld);
	for ($c = 0; $c <= $#Sorted && ($Top < 0 || $c < $Top); ++$c) {
	    local($name) = $Sorted[$c];
	    printf "%-30s %8d\n", $name, $TooOld{$name};
	}
    }

    if ($TotalNoColonSpace > 0) {
	printf "\n";
	if ($Top > 0) {
	    printf "Top %d ", $Top;
	}
	printf "Hosts with no Colon-Space [Total %d]:\n", $TotalNoColonSpace;
	printf "%-30s %8s\n", "Posting Host", "Count";
	local(@Sorted) = sort(ByNoColonSpace keys %NoColonSpace);
	for ($c = 0; $c <= $#Sorted && ($Top < 0 || $c < $Top); ++$c) {
	    local($name) = $Sorted[$c];
	    printf "%-30s %8d\n", $name, $NoColonSpace{$name};
	}
    }

    if ($TotalNGSpace > 0) {
	printf "\n";
	if ($Top > 0) {
	    printf "Top %d ", $Top;
	}
	printf "Hosts with no Newsgroups:-Space [Total %d]:\n", $TotalNGSpace;
	printf "%-30s %8s\n", "Posting Host", "Count";
	local(@Sorted) = sort(ByNGSpace keys(%NGSpace));
	for ($c = 0; $c <= $#Sorted && ($Top < 0 || $c < $Top); ++$c) {
	    local($name) = $Sorted[$c];
	    printf "%-30s %8d\n", $name, $NGSpace{$name};
	}
    }

    if ($TotalTooLarge > 0) {
	printf "Hosts with Too Large Articles [Total %d]:\n", $TotalTooLarge;
	printf "%-30s %8s\n", "Posting Host", "Count";
	local(@Sorted) = sort(ByTooLarge keys %TooLarge);
	for ($c = 0; $c <= $#Sorted && ($Top < 0 || $c < $Top); ++$c) {
	    local($name) = $Sorted[$c];
	    printf "%-30s %8d\n", $name, $TooLarge{$name};
	}
    }

    if ($TotalLineCount > 0) {
	printf "\n";
	if ($Top > 0) {
	    printf "Top %d ", $Top;
	}
	printf "Hosts with Bad LineCounts [Total %d]:\n", $TotalLineCount;
	printf "%-30s %8s\n", "Posting Host", "Count";
	local(@Sorted) = sort(ByLineCount keys %LineCount);
	for ($c = 0; $c <= $#Sorted && ($Top < 0 || $c < $Top); ++$c) {
	    local($name) = $Sorted[$c];
	    printf "%-30s %8d\n", $name, $LineCount{$name};
	}
    }

    if ($TotalMultiAmper > 0) {
	printf "\n";
	if ($Top > 0) {
	    printf "Top %d ", $Top;
	}
	printf "Hosts with more than one '@' in Message-ID's [Total %d]:\n", 
	    	$TotalMultiAmper;
	printf "%-30s %8s\n", "Posting Host", "Count";
	local(@Sorted) = sort(ByMultiAmper keys %MultiAmper);
	for ($c = 0; $c <= $#Sorted && ($Top < 0 || $c < $Top); ++$c) {
	    local($name) = $Sorted[$c];
	    printf "%-30s %8d\n", $name, $MultiAmper{$name};
	}
    }

    if ($TotalNoAmper > 0) {
	printf "\n";
	if ($Top > 0) {
	    printf "Top %d ", $Top;
	}
	printf "Articles with no '@' [Total %d]:\n", $TotalNoAmper;
	printf "%-30s %8s\n", "Message-ID", "Count";
	local(@Sorted) = sort(ByNoAmper keys %NoAmper);
	for ($c = 0; $c <= $#Sorted && ($Top < 0 || $c < $Top); ++$c) {
	    local($name) = $Sorted[$c];
	    printf "%-30s %8d\n", $name, $NoAmper{$name};
	}
    }

    if ($TotalNoBody > 0) {
	printf "\n";
	if ($Top > 0) {
	    printf "Top %d ", $Top;
	}
	printf "Hosts sending articles with No Body [Total %d]:\n", 
		$TotalNoBody;
	printf "%-30s %8s\n", "Sending Host", "Count";
	local(@Sorted) = sort(ByNoBody keys %NoBody);
	for ($c = 0; $c <= $#Sorted && ($Top < 0 || $c < $Top); ++$c) {
	    local($name) = $Sorted[$c];
	    printf "%-30s %8d\n", $name, $NoBody{$name};
	}
    }

    if ($MsgIDTooLong > 0) {
	printf 
	    "\nRejected %d articles with Message-ID's that were too long.\n",
	    $MsgIDTooLong;
    }

    if ($TotalUnknown > 0) {
	printf "\nThe following lines are unparsable [Total %d]:\n", 
		$TotalUnknown;
	foreach $line (keys(%Unknown)) {
	    printf "%s\n", $line;
	}
    }
}

#
# All the By* subroutines are for sort'ing.
#
sub ByUnwantedDist {
    $UnwantedDist{$b} <=> $UnwantedDist{$a};
}

sub ByMissHdr {
    $MissHdr{$b} <=> $MissHdr{$a};
}

sub ByDupHdr {
    $DupHdr{$b} <=> $DupHdr{$a};
}

sub ByBadHdr {
    $BadHdr{$b} <=> $BadHdr{$a};
}

sub ByLineCount {
    $LineCount{$b} <=> $LineCount{$a};
}

sub ByUnWanted {
    $UnWanted{$b} <=> $UnWanted{$a};
}

sub ByTooOld {
    $TooOld{$b} <=> $TooOld{$a};
}

sub ByInFuture {
    $InFuture{$b} <=> $InFuture{$a};
}

sub ByNoAmper {
    $NoAmper{$b} <=> $NoAmper{$a};
}

sub ByUnApproved {
    $UnApproved{$b} <=> $UnApproved{$a};
}

sub ByNoColonSpace {
    $NoColonSpace{$b} <=> $NoColonSpace{$a};
}

sub ByMultiAmper {
    $MultiAmper{$b} <=> $MultiAmper{$a};
}

sub ByNGSpace {
    $NGSpace{$b} <=> $NGSpace{$a};
}

sub ByBadDate {
    $BadDate{$b} <=> $BadDate{$a};
}

sub ByBadExpire {
    $BadExpire{$b} <=> $BadExpire{$a};
}

sub ByNoBody {
    $NoBody{$b} <=> $NoBody{$a};
}

sub ByTooLarge {
    $TooLarge{$b} <=> $TooLarge{$a};
}
