#! /usr/local/bin/perl --    # -*-Perl-*-
#
# inflow-plot  - plot data collected by inflow-collect
#
# History: This program is based on some ideas (and source code lines) of
#          Felix Kuglers, SWITCH, inflow-package and could be part of it.
#
# 970102 V1.0   released
# 970113 V1.2   aligned default paths and renumbered version (FK)
# 970221 V1.3   changed gnuplot command to plot color
# 970421 V1.3.1 added ImageMagick support (FK)
# 970421 V1.3.2 enhanced file naming (FK)
# 970506 V1.3.3 changed THRESHOLD, it's now a percent value (of TOTAL)
#               added plots for a weekly sliding window
#               removed opt_t; use THRESHOLD instead to get all sites
# 971019 V1.3.4 search path handling improved, minor shell related bug fixed
# 971109 V1.3.5 added external config file
# 971112 V1.5.0 version number aligned
# 980408 V1.6.0 tuned help text, version number aligned
# 980507 V2.0.0 now use GD and Chart package instead of gnuplot
#               (makes it faster, don't need external programs)
# 980508 V2.0.1 bug fix: filenames for pictures 
# 980609 V2.0.2 added warning if ext config file could not be found
# 980611 V2.1.0 fixed bug in handling of missing data (GW) and added limit
#               to 8 most busy sites (only 8 colors available in Chart module!)
# 980612 V2.1.1 fixed minor bug concerning sorting of lines

$Copy="(c) 1997 Gerhard.Winkler\@univie.ac.at";

$RELDATE = "Fri Jun 12 16:17:10 MET DST 1998";
$RELEASE = "V2.1.1";

## ---- begin config section -------------------------------------------------
##
$INFLOWCONF = "/home/news/config/inflow.conf"; # ext. local configs (optional)
##
## the following settings may be modified by an external config file
##
$WORKDIR      = "/usr/local/news/stat";         # dir to write pictures
$HOURDIR      = "/usr/local/news/stat/hour";    # dir of hourly summaries
$PIC1         = "$WORKDIR/arthourpic";          # articles per hour
$PIC2         = "$WORKDIR/artcumpic";           # articles cumulative
$PIC3         = "$WORKDIR/volhourpic";          # volume per hour
$PIC4         = "$WORKDIR/volcumpic";           # volume cumulative
$PIC5         = "$WORKDIR/artweekpic";          # articles per hour week-plot
$PIC6         = "$WORKDIR/volweekpic";          # volume per hour week-plot
$WidthGIF     = 670;                            # size of pictures
$HeightGIF    = 400;
$MAXCOLOR     = 8;                              # available colors in Chart mod.
$WANTHOSTNAME = 1;                              # add -host to outfile name ?
$THRESHOLD    = 0.1;  # don't plot sites which have sent less articles
                      # than $THRESHOLD percent of all articles
$THRESHOLDW   = 5;    # same for weekly plots
#
# external programs & config files
@ADDTOPATH = ( '/usr/local/bin' );
#
# ---- end config section ----------------------------------------------------
#
# initialisation ----------------------------------------------------
#
require "getopts.pl";
require "ctime.pl";
use Chart::Lines;

#require "timelocal.pl";

($path,$0) = ($0 =~ /^(.*)\/([^\/]+)$/);                # strip path...

&Getopts('df:hw');
&modify_config;
&update_PATH;

$Usage="$0    -  $Copy

Release $RELEASE  of $RELDATE

Use data produced by inflow-collect and produce some plotfiles
Output are some gif pictures

Usage:  $0 [-dhw] 

Parameters:

   -d:             turn on verbosity; for debugging only
   -f<configfile>: load external configuration file 
                   (default: $inflowconf)
   -h:             This help.
   -w:             plot weekly statistics

external config file: $inflowconfinfo
\n";


if ($opt_h) { print "$Usage"; exit 0; }

chdir($WORKDIR);

$date = `date +%y%m%d`;
chop($date);
&gethostandfqdn;
$timenow = time;
$thisweekday = (localtime($timenow))[6];
$thisday = (localtime($timenow))[3];
$thishour = (localtime($timenow))[2];
$thisminute = (localtime($timenow))[1];
$suffix = $WANTHOSTNAME ? "-$hostname" : "";
$PIC1 = $PIC1 . $suffix . ".gif";
$PIC2 = $PIC2 . $suffix . ".gif";
$PIC3 = $PIC3 . $suffix . ".gif";
$PIC4 = $PIC4 . $suffix . ".gif";
$PIC5 = $PIC5 . $suffix . ".gif";
$PIC6 = $PIC6 . $suffix . ".gif";

#
# process files ----------------------------------------------------
#

opendir(DIR,"$HOURDIR") || die "can't opendir $HOURDIR\n";
$pat = "inflow.sum." . $thisweekday . "-";
@files = grep(/$pat/,readdir(DIR));
closedir(DIR);

while ($curfile = shift(@files)) {
   print "reading $curfile\n" if ($opt_d);
   $curfile = $HOURDIR . "/" . $curfile;
   open(FIL,"$curfile") || die "can't open file $curfile\n";
   $filetime=$timenow;
   while(<FIL>) {
      chop;
      ($a,$b,$filetime,$filedate) = split if (/timenow:/);
      last if ($filetime < ($timenow-86400));
      $filehour = (localtime($filetime))[2];
      if (/^pn/) {
         ($a,$filesite,$fileart,$filevol) = split;
         $allsites{$filesite}++;
         $allhours{$filehour}++;
         $articles{$filehour,$filesite} = $fileart;
         $volume{$filehour,$filesite} = $filevol;
      }

   }
   close(FIL);
}
$hourcount = scalar(keys %allhours) - 1;

&total;
&reducesite($THRESHOLD);
&plotartperhour;
&plotartcum;
&plotvolperhour;
&plotvolcum;

if ($opt_w) {
   undef %allhours;
   undef %allsites;
   undef %articles;
   undef %volume;
   $weekhour++;
   $pat = $HOURDIR . "/inflow.sum.[0-6]-*";
   @files = `ls -t $pat`;
   while (chop($curfile = shift(@files))) {
      print "reading $curfile\n" if ($opt_d);
      $weekhour--;
      $filetime=$timenow;
      open(FIL,"$curfile") || die "can't open file $curfile\n";
      while(<FIL>) {
         chop;
         ($a,$b,$filetime,$filedate) = split if (/timenow:/);
         if ($filetime < ($timenow-86400*7)) {
            print "too old $curfile\n" if ($opt_d);
            $weekhour++;
            last;
         }
         if (/^pn/) {
            ($a,$filesite,$fileart,$filevol) = split;
            $allsites{$filesite}++;
            $allhours{$weekhour}++;
            $articles{$weekhour,$filesite} = $fileart;
            $volume{$weekhour,$filesite} = $filevol;
         }
      }
      close(FIL);
   }
   &total;
   &reducesite($THRESHOLDW);
   &plotartweek;
   &plotvolweek;
}


# reducesite
#----------------------------------------------------------------------
# delete sites from array which have less than threshold articles
sub reducesite {
   # Now includes a hack for plotting only the busiest $MAXCOLOR-1 sites, 
   # because there are only $MAXCOLOR colors available for charts 
   # (this is a bug in the CPAN Chart modul V0.94)   <wirtz@dfn.de>
   local ($TH) = @_;
   my($chart_counter,$t);

   foreach $h (keys %allhours) {
      foreach $s (keys %allsites) {
         $sum{$s} += $articles{$h,$s};
      }
   }
   $t = $sum{"TOTAL"} * $TH/100;
   warn "%% calculated # of articles as threshold: $t\n" if $opt_d;
   foreach $s (sort { $sum{$b} <=> $sum{$a} } keys %sum) {
      if ($sum{$s} < $t || ($chart_counter > $MAXCOLOR - 1)) {
         warn "%% deleted $s: $sum{$s} art (t=$t)\n" if $opt_d;
         delete $sum{$s};
         delete $allsites{$s};
      }
      else { 
         warn "%% accepted $chart_counter/$s: $sum{$s} art (t=$t)\n" if $opt_d;
	 $chart_counter++; 
      }
   }
}


# total
#----------------------------------------------------------------------
# compute summary of all sites
sub total {
   foreach $h (keys %allhours) {
      foreach $s (keys %allsites) {
         $articles{$h,"TOTAL"} += $articles{$h,$s};
         $volume{$h,"TOTAL"} += $volume{$h,$s};
      }
   }
   $allsites{"TOTAL"}++;
}




# plotartperhour
#---------------------------------------------------------------------- 
# construct plot; articles per site per hour
sub plotartperhour {
   local ($h, $i, $j);

   $pic = Chart::Lines->new ($WidthGIF,$HeightGIF);
   $pic->set ('title' => "Articles per hour for different sites ($fqdn: $date)");
   $pic->set ('x_label' => "hour of day");
   $pic->set ('y_label' => "articles");
   $pic->set ('grid_lines' => 'true');
###$pic->set ('colors' => [[10,10,10], [20,20,20], [30,30,30], [40,40,40], [50,50,50], [60,60,60], [70,70,70], [80,80,80], [90,90,90], [10,0,0], [0,10,0], [0,0,10],[10,10,0],[10,0,10],[0,10,10]]);
###   $pic->set ('transparent' => 'true');
   @LegendLabels = (sort keys %allsites);
   $pic->set ('legend_labels' => \@LegendLabels);
   $pic->add_dataset( 0 .. 23);

   for $h (0 .. $hourcount) {
      $i = 0;
      foreach $s (sort keys %allsites) {
         $val = $articles{$h,$s};
         if (!$val) {$val = undef};
         $data[$h][$i] = $val;
         $i++;
      }
   }
   for ($j=0; $j<$i; $j++) {
      @datarow = ();
      for $h (0 .. 23) {
         push @datarow, $data[$h][$j];
      }
      $pic->add_dataset(@datarow);
   }
   $pic->gif ("$PIC1");

}


# plotartcum
#----------------------------------------------------------------------
# construct plot; articles cumulative per site
sub plotartcum {
   local ($h, $i, $j);

   $pic = Chart::Lines->new ($WidthGIF,$HeightGIF);
   $pic->set ('title' => "Articles cumulative for different sites ($fqdn: $date)");
   $pic->set ('x_label' => "hour of day");
   $pic->set ('y_label' => "articles");
   $pic->set ('grid_lines' => 'true');
###   $pic->set ('transparent' => 'true');
   @LegendLabels = (sort keys %allsites);
   $pic->set ('legend_labels' => \@LegendLabels);
   $pic->add_dataset( 0 .. 23);

   for $h (0 .. $hourcount) {
      $i = 0;
      foreach $s (sort keys %allsites) {
         $val = $articles{$h,$s};
         if (!$val) {$val = 0};
         $cum{$s} += $val;
         $data[$h][$i] = $cum{$s};
         $i++;
      }
   }
   for ($j=0; $j<$i; $j++) {
      @datarow = ();
      for $h (0 .. 23) {
         push @datarow, $data[$h][$j];
      }
      $pic->add_dataset(@datarow);
   }
   $pic->gif ("$PIC2");

}



# plotvolperhour
#---------------------------------------------------------------------- 
# construct plot; volume per site per hour
sub plotvolperhour {
   local ($h, $i, $j);

   $pic = Chart::Lines->new ($WidthGIF,$HeightGIF);
   $pic->set ('title' => "Volume per hour for different sites ($fqdn: $date)");
   $pic->set ('x_label' => "hour of day");
   $pic->set ('y_label' => "volume (MB)");
   $pic->set ('grid_lines' => 'true');
###   $pic->set ('transparent' => 'true');
   @LegendLabels = (sort keys %allsites);
   $pic->set ('legend_labels' => \@LegendLabels);
   $pic->add_dataset( 0 .. 23);
   for $h (0 .. $hourcount) {
      $i = 0;
      foreach $s (sort keys %allsites) {
         $val = $volume{$h,$s};
         if (!$val) {$val = 0} else {$val /= 1000000};
         $data[$h][$i] = $val;
         $i++;
      }
   }
   for ($j=0; $j<$i; $j++) {
      @datarow = ();
      for $h (0 .. 23) {
         push @datarow, $data[$h][$j];
      }
      $pic->add_dataset(@datarow);
   }
   $pic->gif ("$PIC3");

}



# plotvolcum
#----------------------------------------------------------------------
# construct plot; volume cumulative per site
sub plotvolcum {
   local ($h, $i, $j);

   $pic = Chart::Lines->new ($WidthGIF,$HeightGIF);
   $pic->set ('title' => "Volume cumulative for different sites ($fqdn: $date)");
   $pic->set ('x_label' => "hour of day");
   $pic->set ('y_label' => "volume (MB)");
   $pic->set ('grid_lines' => 'true');
###   $pic->set ('transparent' => 'true');
   @LegendLabels = (sort keys %allsites);
   $pic->set ('legend_labels' => \@LegendLabels);
   $pic->add_dataset( 0 .. 23);

   for $h (0 .. $hourcount) {
      $i = 0;
      foreach $s (sort keys %allsites) {
         $val = $volume{$h,$s};
         if (!$val) {$val = 0} else {$val /= 1000000};
         $vcum{$s} += $val;
         $data[$h][$i] = $vcum{$s};
         $i++;
      }
   }
   for ($j=0; $j<$i; $j++) {
      @datarow = ();
      for $h (0 .. 23) {
         push @datarow, $data[$h][$j];
      }
      $pic->add_dataset(@datarow);
   }
   $pic->gif ("$PIC4");

}




# plotartweek
#---------------------------------------------------------------------- 
# construct plot; articles per site per hour for one week
sub plotartweek {
   local ($h, $i, $j);

   $pic = Chart::Lines->new ($WidthGIF,$HeightGIF);
   $pic->set ('title' => "Articles per hour for different sites ($fqdn: $date)");   $pic->set ('x_label' => "hours up to now");
   $pic->set ('y_label' => "articles");
   $pic->set ('grid_lines' => 'true');
###   $pic->set ('transparent' => 'true');
   @LegendLabels = (sort keys %allsites);
   $pic->set ('legend_labels' => \@LegendLabels);
   $pic->add_dataset( -168 .. 0 );
   $pic->set('skip_x_ticks' => 24);


   for $h (( -168 .. 0 )) {
      $i = 0;
      foreach $s (sort keys %allsites) {
         $val = $articles{$h,$s};
         if (!$val) {$val = undef};
         $data[$h+168][$i] = $val;
         $i++;
      }
   }
   for ($j=0; $j<$i; $j++) {
      @datarow = ();
      for $h (0 .. 168) {
         push @datarow, $data[$h][$j];
      }
      $pic->add_dataset(@datarow);
   }
   $pic->gif ("$PIC5");

}


# plotvolweek
#---------------------------------------------------------------------- 
# construct plot; volume per site per hour for one week
sub plotvolweek {
   local ($h, $i, $j);

   $pic = Chart::Lines->new ($WidthGIF,$HeightGIF);
   $pic->set ('title' => "Volume per hour for different sites ($fqdn: $date)");
   $pic->set ('x_label' => "hours up to now");
   $pic->set ('y_label' => "volume (MB)");
   $pic->set ('grid_lines' => 'true');
###   $pic->set ('transparent' => 'true');
   @LegendLabels = (sort keys %allsites);
   $pic->set ('legend_labels' => \@LegendLabels);
   $pic->add_dataset( -168 .. 0 );
   $pic->set('skip_x_ticks' => 24);

   for $h (( -168 .. 0 )) {
      $i = 0;
      foreach $s (sort keys %allsites) {
         $val = $volume{$h,$s};
         if (!$val) {$val = undef} else {$val /= 1000000};
         $data[$h+168][$i] = $val;
         $i++;
      }
   }
   for ($j=0; $j<$i; $j++) {
      @datarow = ();
      for $h (0 .. 168) {
         push @datarow, $data[$h][$j];
      }
      $pic->add_dataset(@datarow);
   }
   $pic->gif ("$PIC6");

}


# gethostandfqdn
#---------------------------------------------------------------------- 
# construct fully qualified domain name...
sub gethostandfqdn {
    chop($str=`uname -n`);
    if ($str =~ /\./) {             # str is fqdn
        $fqdn = $str;
        ($hostname) = ($str =~ /^([^.]+)\./);
    } else {                        # str is simple hostname
        $hostname = $str;
        $str = `/bin/grep domain /etc/resolv.conf`;
        $str =~ /domain\s*(\S+)$/;
        $fqdn = $hostname . "." . $1;
    }
}

# numerically
#----------------------------------------------------------------------
# sort numerically
sub numerically { $a <=> $b }


# update_PATH
# -----------------------------------------------------------------------------
# enhance PATH to support scripts run from cron
#
sub update_PATH {
    warn "OLD PATH=$ENV{'PATH'}\n" if $opt_d;
    @ENVPATH = split(/:/,$ENV{'PATH'});
    push(@ENVPATH,@ADDTOPATH);
    
    $ENV{'PATH'}='';
    foreach $p (@ENVPATH) {
	next if $envpathseen{$p}; # skip duplicates
	$envpathseen{$p} = 1;
	$ENV{'PATH'} .= $p . ":";
    }
    chop($ENV{'PATH'});           # cut last ":"
    warn "NEW PATH=$ENV{'PATH'}\n" if $opt_d;
}


# modify_config
# -----------------------------------------------------------------------------
#
sub modify_config {
    local($p,$n,$incpath);
    $inflowconf = $opt_f ? $opt_f : $INFLOWCONF;
    if ($inflowconf =~ /^(.*)\/([^\/]+)$/) {
	$p = $1; 
	$n = $2;
	push(@INC,$p);
    } else { 
	$n = $inflowconf; 
    }
    foreach (@INC) { $incpath .= "    $_\n"; }
    if (-e $inflowconf) {
	warn "loading local configs from $inflowconf...\n" if $opt_d;
	require $n;
	$inflowconfinfo = "$inflowconf $CFRELEASE\n";
	$inflowconfinfo .= " " x 16 . "from: $CFRELDATE";
    } else {
	if ($opt_f) {
	    die "abort - couldn't find or open $n on INC=\n$incpath\n";
	} 
	warn "didn't find any local configs $n on INC=\n$incpath\n" if $opt_d;
	$inflowconfinfo = "- none -";
    }
}
