#!/usr/local/bin/perl
#
# Absorb Page 1 of the news
#
# Jim Gillogly, Mar 91

%month = (Jan,1,Feb,2,Mar,3,Apr,4,May,5,Jun,6,Jul,7,Aug,8,Sep,9,Oct,10,Nov,11,Dec,12);
%number = (one, 1, two, 2, three, 3, four, 4, five, 5, six, 6, seven, 7,
	  eight, 8, nine, 9, ten, 10, eleven, 11, twelve, 12, thirteen, 13,
	  fourteen, 14, fifteen, 15, sixteen, 16, seventeen, 17, eighteen, 18,
	  nineteen, 19, several, 666,
	  twenty, 20, thirty, 30, forty, 40, fifty, 50, sixty, 60,
	  seventy, 70, eighty, 80, ninety, 90);

@trans =
( "takes", "(.*) infantry capture (.*) territory",
  "takes", "(.*) shock .* of (.*)'s sectors",
  "fself", "(.*) gunners decimate (.*) aggressors",
  "fself", "(.*) gunners fire on (.*) in self.*",
  "bombs", "(.*) bombers .* on (.*)",
  "bombs", "(.*) planes divebomb one of (.*)'s sectors",
  "viols", "(.*) violates (.*) airspace",
  "viols", "(.*) overflies (.*) territory",
  "exciv", "(.*) execution .*less (.*) civilians",
  "exwor", "(.*) execution .*less (.*) workers",
  "exwor", "(.*) firing .*less (.*) workers",
  "nowar", "(.*) is no longer at war with (.*)",
  "nowar", "(.*) Foreign .* war with (.*)\"",
  "fship", "(.*) fires on (.*) ships",
  "fship", "(.*) shells a ship owned by (.*)",
  "fires", "(.*) artillery fires on (.*) sectors",
  "fires", "(.*) gunners bombard (.*) territory",
  "under", "(.*) underground .*gainst (.*) occupation",
  "under", "(.*) terrorists .*with (.*) special forces",
  "board", "(.*) seadogs board one of (.*)'s ships",
  "board", "(.*) pirates board (.*) ship",
  "nobrd", "(.*) is repelled by (.*) while attempting.*",
  "nobrd", "(.*) pirates prove inept at boarding (.*)'s ships",
  "plgue", "(.*) reports outbreak of the black plague(.*)",
  "plgue", "(.*) sector infected with bubonic plague(.*)",
  "beatn", "(.*) infantry beaten back by (.*) troops",
  "beatn", "(.*) shock troops .* failed attack on (.*)",
  "mined", "(.*) ship severely damaged in mine field(.*)",
  "mined", "(.*) ship hits a mine(.*)",
  "war  ", "(.*) declares TOTAL WAR on (.*)",
  "war  ", "(.*) gets serious with (.*) and declares WAR",
  "helps", "(.*) aids (.*) with divine intervention",
  "helps", "(.*) smiles upon (.*)",
  "helps", "(.*) makes a contribution to (.*)",
  "helps", "(.*) helps out (.*)",
  "hurts", "(.*) frowns upon (.*)",
  "hurts", "(.*) hurts (.*) with .*",
  "ally ", "(.*) \/ (.*) alliance declared",
  "ally ", "(.*) announces an alliance with (.*)",
  "xally", "(.*) diplomats .* alliance with (.*)",
  "xally", "(.*) is no longer allied with (.*)",
  "torpd", "(.*) ships torpedoed.*boats(.*)",
  "torpd", "(.*) ships torpedoed.*packs(.*)",
  "downs", "(.*) pilots down (.*) planes",
  "downs", "(.*) victorious in air.* against (.*)",
  "bship", "(.*) airforce bombs (.*) ships",
  "bship", "(.*) divebombs a ship flying the flag of (.*)",
  "missl", "(.*) missile wreaks .* on (.*)",
  "missl", "(.*) missile fired at .* of (.*)'s sectors",
  "tviol", "(.*) violates a treaty with (.*)",
  "tviol", "(.*) actions violate treaty with (.*)",
  "lnch ", "(.*) launches a satellite into orbit(.*)",
  "quits", "(.*) dissolves its government(.*)",
  "quits", "(.*) throws in the towel(.*)",
  "", ""
);

$time = 0;

# First crunch the translation table -- find a keyword to use
# that will identify one of a small list of translations.

for ($i = 1; $trans[$i] ne ""; $i += 2)
{
	$_ = $trans[$i];
	/\(\.\*\) (\S+) /;
	$table{$1} .= $i-1 . ",";            # Make a list of entries

}


while (<>)              # Read page 1 of the news
{
	last if /=== page 2 ===/;       # Start by reading page 1
	if (/^\s/ && $time > 0)                      # Space? continuation
	{       s/^\s+/ /;              # Collapse initial spaces
		chop;                   # Remove newline
		$blurb .= $_;           # Add the stuff
		next;
	}
	if ($time != 0)
	{
#               ($start[4]) = times;    # meter
		$blurb = &understand($blurb);
#               $timing[4] += (times)[0] - $start[4];       # meter
		push(blurbs, sprintf("$time $mo $day $hour:$min  $blurb\n"));
		$blurb = "";
		$time = 0;
	}
	/^\S\S\S (...) +(\d+) +(\S+):(\S+)  (.+)$/;
	next if ! $month{$1};
	$mo = $1;
	$mod = $month{$mo};
	$min = $4;
	$hour = $3;
	$day = $2;
	$time = sprintf("%08d", $min + 100 * ($hour + 100 * ($day + 100 * $mod)));
	$blurb = $5;
}


#($start[1]) = times;

@s = sort @blurbs;

foreach (@s)
{
	s/^\S+ //;      # Strip off the numeric date/time
	print;
	# Now total the sectors captured over this period
	next if ! /\d\d  (.*\S) +takes (\S.*\S) +(\d+)$/;
	$sect_taken{sprintf("%-12s sectors taken by %-12s: ", $2, $1)} += $3;
}

while (($names, $tot) = each %sect_taken)
{       print $names, $tot, "\n";
}

#($end[1]) = times;

#printf "User time for english to integer:  %5.2f\n", $timing[0];
#printf "User time for split:               %5.2f\n", $timing[1];
#printf "User time for 2-word names:        %5.2f\n", $timing[2];
#printf "User time for recognized keys:     %5.2f\n", $timing[3];

#printf "User time for total understanding: %5.2f\n", $timing[4];
#printf "User time for output:              %5.2f\n", $end[1] - $start[1];

sub understand
{       local($y) = pop(@_);

#        $start = (times)[0];                                    # meter
	$num = &e2i($y);  # strip out any numbers in there
#        $timing[0] += (times)[0] - $start;                      # meter

#        $start = (times)[0];                                    # meter

	$x = "";
	$y =~ /^\S+ (\S+) /;    # Find first word
	$u = $1;
	@indices = split(/,/, $table{$u} );

#        $timing[1] += (times)[0] - $start;                      # meter


	if ($#indices < 0)              # Didn't see any likely words
	{                               # Thanks a lot, AK.
#           $start = (times)[0];                                    # meter
	   for ($i = 0; $trans[$i] ne ""; $i += 2)
	   {
	      if (eval(($att, $def) = $y =~ /^$trans[$i + 1]$/))
	      {
		  $x = sprintf("%-12s $trans[$i] %-12s %3d",$att, $def, $num);
		  last;
	      }
	  }
#          $timing[2] += (times)[0] - $start;                      # meter
	}
	else
	{
#           $start = (times)[0];                                    # meter
	   foreach $i (@indices)
	   {
	      if (eval(($att, $def) = $y =~ /^$trans[$i + 1]$/))
	      {
		  $x = sprintf("%-12s $trans[$i] %-12s %3d",$att, $def, $num);
		  last;
	      }
	  }
#          $timing[3] += (times)[0] - $start;                      # meter
	}
	$x = "??? " . $y if $x eq "";   # Couldn't parse this line
	$x;
}

# To convert from English to integer:
#
#       If "times" does not appear, it's a 1
#       Otherwise, start at the times and work forward
#               eating words and throwing them away until we
#               get to one we don't understand or to the beginning.

sub e2i # English to integer
{
	$x = pop(@_);           # Calling by reference (ooh)

	if (! ($x =~ /times/))
	{       $n = 1;
	}
	else
	{
		@line = split(/[ -]/, $x);        # Separate into words
		# Find the "times"
		if ($line[$ptr = $#line - 1] ne "times")   # It's at the end
		{       # It's not at the end
			for ($ptr = 0; $ptr < $#line; $ptr++)
			{       last if $line[$ptr] eq "times";
			}
		}
		$line[$ptr] = "";       # Clear out the times
		$n = 0;
		while (--$ptr >= 0)     # pick off remaining numbers
		{
			$save = $n;
			last if ($n += $number{$line[$ptr]}) == $save;
			$line[$ptr] = "";       # Understood it; clear
		}
		$y = join(' ', @line);
		$y =~ s/  +//g;         # Collapse the space left by #s
	}
	$n;
}

