#! /usr/local/bin/perl --    # -*-Perl-*-
#
# inflow-plot  - plot data collected by inflow-collect
#
# History: This program is based on some ideas (and source code lines) of
#          Felix Kuglers, SWITCH, inflow-package and could be part of it.
#
# 970102 V1.0   released
# 970113 V1.2   aligned default paths and renumbered version (FK)
# 970221 V1.3   changed gnuplot command to plot color
# 970421 V1.3.1 added ImageMagick support (FK)
# 970421 V1.3.2 enhanced file naming (FK)
# 970506 V1.3.3 changed THRESHOLD, it's now a percent value (of TOTAL)
#               added plots for a weekly sliding window
#               removed opt_t; use THRESHOLD instead to get all sites
# 971019 V1.3.4 search path handling improved, minor shell related bug fixed
# 971109 V1.3.5 added external config file
# 971112 V1.5.0 version number aligned
#
$Copy="(c) 1997 Gerhard.Winkler\@univie.ac.at";

$RELDATE = "Wed Nov 12 11:16:02 MET 1997";
$RELEASE = "V1.5.0";

## ---- begin config section -------------------------------------------------
##
$INFLOWCONF = "/home/news/config/inflow.conf"; # ext. local configs (optional)
##
## the following settings may be modified by an external config file
##
$WORKDIR      = "/opt/www/docs/news/stat/pics"; # dir to write pictures
$HOURDIR      = "/home/news/inflow/hour";       # dir of hourly summaries
$PIC1         = "$WORKDIR/arthourpic";          # articles per hour
$PIC2         = "$WORKDIR/artcumpic";           # articles cumulative
$PIC3         = "$WORKDIR/volhourpic";          # volume per hour
$PIC4         = "$WORKDIR/volcumpic";           # volume cumulative
$PIC5         = "$WORKDIR/artweekpic";          # articles per hour week-plot
$PIC6         = "$WORKDIR/volweekpic";          # volume per hour week-plot
$WANTHOSTNAME = 0;                              # add -host to outfile name ?
$THRESHOLD    = 2;    # don't plot sites which have sent less articles
                      # than $THRESHOLD percent of all articles
$THRESHOLDW   = 5;    # same for weekly plots
#
# This script needs helper applications, which you can download from many good
# file servers. ImageMagick is a successor of pbmplus package. URL examples:
# gnuplot: 
#   http://sunsite.cnlab-switch.ch/ftp/mirror/gnu/gnuplot-3.5.tar.gz
# ppmtogif: 
#   ftp://sunsite.cnlab-switch.ch/mirror/X11/R5-contrib/pbmplus10dec91.tar.Z
# convert:
#   ftp://sunsite.cnlab-switch.ch/mirror/X11/contrib/applications/ImageMagick/ImageMagick-3.8.2.tar.gz
#
$GNUPLOT   = "gnuplot -rv";                # we need gnuplot and one of
$PPMTOGIF  = "";	                   # ppmtogif (pbmplus pkg)
$IMCONVERT = "convert";                    # or convert (ImageMagick pkg)
#
# external programs & config files
@ADDTOPATH = ( '/opt/local/bin','/opt/gnu/bin' );
#
# ---- end config section ----------------------------------------------------
#
# initialisation ----------------------------------------------------
#
# don't change this; gnuplot makes cd and uses relative filenames
$TMPFILE   = "inflow-plot.tmp";   # temp for plot data (relative to $WORKDIR)
$TMPPIC    = "inflow-plot.pbm";   # temp for plot pics (relative to $WORKDIR)

require "getopts.pl";
require "ctime.pl";
#require "timelocal.pl";

($path,$0) = ($0 =~ /^(.*)\/([^\/]+)$/);                # strip path...

&Getopts('df:hw');
&modify_config;
&update_PATH;

$Usage="$0    -  $Copy

Release $RELEASE  of $RELDATE

Use data produced by inflow-collect and produce some plotfiles
Output are some gif pictures

Usage:  $0 [-dhw] 

Parameters:

   -d:             turn on verbosity; for debugging only
   -f<configfile>: load external configuration file 
                   (default: $inflowconf)
   -h:             This help.
   -w:             plot weekly statistics

required helper programs:

gnuplot:                           $GNUPLOT
one of ppmtogif (from pbmplus):    $PPMTOGIF
    or convert (from ImageMagick): $IMCONVERT

external config file: $inflowconfinfo

\n";


if ($opt_h) { print "$Usage"; exit 0; }

chdir($WORKDIR);

$date = `date +%y%m%d`;
chop($date);
&gethostandfqdn;
$timenow = time;
$thisweekday = (localtime($timenow))[6];
$thisday = (localtime($timenow))[3];
$thishour = (localtime($timenow))[2];
$thisminute = (localtime($timenow))[1];
$suffix = $WANTHOSTNAME ? "-$hostname" : "";

#
# process files ----------------------------------------------------
#

opendir(DIR,"$HOURDIR") || die "can't opendir $HOURDIR\n";
$pat = "inflow.sum." . $thisweekday . "-";
@files = grep(/$pat/,readdir(DIR));
closedir(DIR);

while ($curfile = shift(@files)) {
   print "reading $curfile\n" if ($opt_d);
   $curfile = $HOURDIR . "/" . $curfile;
   open(FIL,"$curfile") || die "can't open file $curfile\n";
   $filetime=$timenow;
   while(<FIL>) {
      chop;
      ($a,$b,$filetime,$filedate) = split if (/timenow:/);
      last if ($filetime < ($timenow-86400));
      $filehour = (localtime($filetime))[2];
      if (/^pn/) {
         ($a,$filesite,$fileart,$filevol) = split;
         $allsites{$filesite}++;
         $allhours{$filehour}++;
         $articles{$filehour,$filesite} = $fileart;
         $volume{$filehour,$filesite} = $filevol;
      }

   }
   close(FIL);
}

&total;
&reducesite($THRESHOLD);
&plotartperhour;
&plotartcum;
&plotvolperhour;
&plotvolcum;

unlink("$TMPFILE");
unlink("$TMPPIC");

if ($opt_w) {
   undef %allhours;
   undef %allsites;
   undef %articles;
   undef %volume;
   $weekhour++;
   $pat = $HOURDIR . "/inflow.sum.[0-6]-*";
   @files = `ls -t $pat`;
   while (chop($curfile = shift(@files))) {
      print "reading $curfile\n" if ($opt_d);
      $weekhour--;
      $filetime=$timenow;
      open(FIL,"$curfile") || die "can't open file $curfile\n";
      while(<FIL>) {
         chop;
         ($a,$b,$filetime,$filedate) = split if (/timenow:/);
         if ($filetime < ($timenow-86400*7)) {
            print "too old $curfile\n" if ($opt_d);
            $weekhour++;
            last;
         }
         if (/^pn/) {
            ($a,$filesite,$fileart,$filevol) = split;
            $allsites{$filesite}++;
            $allhours{$weekhour}++;
            $articles{$weekhour,$filesite} = $fileart;
            $volume{$weekhour,$filesite} = $filevol;
         }
      }
      close(FIL);
   }
   &total;
   &reducesite($THRESHOLDW);
   &plotartweek;
   &plotvolweek;
}

unlink("$TMPFILE");
unlink("$TMPPIC");


# reducesite
#----------------------------------------------------------------------
# delete sites from array which have less than threshold articles
sub reducesite {
   local ($TH) = @_;
   foreach $h (keys %allhours) {
      foreach $s (keys %allsites) {
         $sum{$s} += $articles{$h,$s};
      }
   }
   $t = $sum{"TOTAL"}/100*$TH;
   print "calculated # of articles as threshold: $t\n" if ($opt_d);
   foreach $s (keys %sum) {
      if ($sum{$s} < $t) {
         print "deleted: $s (articles: $sum{$s} )\n" if ($opt_d);
         delete $sum{$s};
         delete $allsites{$s};
      }
   }

}



# total
#----------------------------------------------------------------------
# compute summary of all sites
sub total {
   foreach $h (keys %allhours) {
      foreach $s (keys %allsites) {
         $articles{$h,"TOTAL"} += $articles{$h,$s};
         $volume{$h,"TOTAL"} += $volume{$h,$s};
      }
   }
   $allsites{"TOTAL"}++;
}




# plotartperhour
#---------------------------------------------------------------------- 
# construct plot; articles per site per hour
sub plotartperhour {
   open(OUT,">$TMPFILE") || die "unable to open $TMPFILE: $!\n";
   foreach $h (sort numerically keys %allhours) {
      print OUT "$h";
      foreach $s (sort keys %allsites) {
         $val = $articles{$h,$s};
         if (!$val) {$val = 0};
         print OUT " $val";
      }
      print OUT "\n";
   }
   close(OUT);

   $plotstr = "plot ";
   $row = 2;
   foreach $s (sort keys %allsites) {
      $plotstr .= "\"$TMPFILE\" using 1:$row title \"$s\" with lines,";
      $row++;
   }
   chop($plotstr);
   $cmd = "$GNUPLOT << EOF
      cd \"$WORKDIR\"
      set term pbm color
      set output \"$TMPPIC\"
      set xrange [0:23]
      set title \"Articles per hour for different sites ($fqdn: $date)\"
      set xlabel \"hour of day\"
      set ylabel \"articles\"
      $plotstr
EOF
";

   system($cmd);
   &TMP2gif($PIC1);

}


# plotartcum
#----------------------------------------------------------------------
# construct plot; articles cumulative per site
sub plotartcum {
   open(OUT,">$TMPFILE") || die "unable to open $TMPFILE: $!\n";
   foreach $h (sort numerically keys %allhours) {
      print OUT "$h";
      foreach $s (sort keys %allsites) {
         $val = $articles{$h,$s};
         if (!$val) {$val = 0};
         $cum{$s} += $val;
      }
      foreach $s (sort keys %cum) {
         print OUT " $cum{$s}";
      }
      print OUT "\n";
   }
   close(OUT);

   $plotstr = "plot ";
   $row = 2;
   foreach $s (sort keys %allsites) {
      $plotstr .= "\"$TMPFILE\" using 1:$row title \"$s\" with lines,";
      $row++;
   }
   chop($plotstr);
   $cmd = "$GNUPLOT << EOF
      cd \"$WORKDIR\"
      set term pbm color
      set output \"$TMPPIC\"
      set xrange [0:23]
      set title \"Articles cumulative for different sites ($fqdn: $date)\"
      set xlabel \"hour of day\"
      set ylabel \"articles\"
      $plotstr
EOF
";

   system($cmd);
   &TMP2gif($PIC2);

}



# plotvolperhour
#---------------------------------------------------------------------- 
# construct plot; volume per site per hour
sub plotvolperhour {
   open(OUT,">$TMPFILE") || die "unable to open $TMPFILE: $!\n";
   foreach $h (sort numerically keys %allhours) {
      print OUT "$h";
      foreach $s (sort keys %allsites) {
         $val = $volume{$h,$s};
         if (!$val) {$val = 0} else {$val /= 1000000};
         print OUT " $val";
      }
      print OUT "\n";
   }
   close(OUT);

   $plotstr = "plot ";
   $row = 2;
   foreach $s (sort keys %allsites) {
      $plotstr .= "\"$TMPFILE\" using 1:$row title \"$s\" with lines,";
      $row++;
   }
   chop($plotstr);
   $cmd = "$GNUPLOT << EOF
      cd \"$WORKDIR\"
      set term pbm color
      set output \"$TMPPIC\"
      set xrange [0:23]
      set title \"Volume per hour for different sites ($fqdn: $date)\"
      set xlabel \"hour of day\"
      set ylabel \"volume (MB)\"
      $plotstr
EOF
";

   system($cmd);
   &TMP2gif($PIC3);

}



# plotvolcum
#----------------------------------------------------------------------
# construct plot; volume cumulative per site
sub plotvolcum {
   open(OUT,">$TMPFILE") || die "unable to open $TMPFILE: $!\n";
   foreach $h (sort numerically keys %allhours) {
      print OUT "$h";
      foreach $s (sort keys %allsites) {
         $val = $volume{$h,$s};
         if (!$val) {$val = 0} else {$val /= 1000000};
         $vcum{$s} += $val;
      }
      foreach $s (sort keys %vcum) {
         print OUT " $vcum{$s}";
      }
      print OUT "\n";
   }
   close(OUT);

   $plotstr = "plot ";
   $row = 2;
   foreach $s (sort keys %allsites) {
      $plotstr .= "\"$TMPFILE\" using 1:$row title \"$s\" with lines,";
      $row++;
   }
   chop($plotstr);
   $cmd = "$GNUPLOT << EOF
      cd \"$WORKDIR\"
      set term pbm color
      set output \"$TMPPIC\"
      set xrange [0:23]
      set title \"Volume cumulative for different sites ($fqdn: $date)\"
      set xlabel \"hour of day\"
      set ylabel \"volume (MB)\"
      $plotstr
EOF
";

   system($cmd);
   &TMP2gif($PIC4);


}




# plotartweek
#---------------------------------------------------------------------- 
# construct plot; articles per site per hour for one week
sub plotartweek {
   open(OUT,">$TMPFILE") || die "unable to open $TMPFILE: $!\n";
   foreach $h (sort numerically keys %allhours) {
      print OUT "$h";
      foreach $s (sort keys %allsites) {
         $val = $articles{$h,$s};
         if (!$val) {$val = 0};
         print OUT " $val";
      }
      print OUT "\n";
   }
   close(OUT);

   $plotstr = "plot ";
   $row = 2;
   foreach $s (sort keys %allsites) {
      $plotstr .= "\"$TMPFILE\" using 1:$row title \"$s\" with lines,";
      $row++;
   }
   chop($plotstr);
   $cmd = "$GNUPLOT << EOF
      cd \"$WORKDIR\"
      set term pbm color
      set output \"$TMPPIC\"
      set size 1.5,1
      set xrange [$weekhour:0]
      set xtics -168,24,0
      set title \"Articles per hour for different sites ($fqdn: $date)\"
      set xlabel \"hours up to now\"
      set ylabel \"articles\"
      $plotstr
EOF
";

   system($cmd);
   &TMP2gif($PIC5);

}


# plotvolweek
#---------------------------------------------------------------------- 
# construct plot; volume per site per hour for one week
sub plotvolweek {
   open(OUT,">$TMPFILE") || die "unable to open $TMPFILE: $!\n";
   foreach $h (sort numerically keys %allhours) {
      print OUT "$h";
      foreach $s (sort keys %allsites) {
         $val = $volume{$h,$s};
         if (!$val) {$val = 0} else {$val /= 1000000};
         print OUT " $val";
      }
      print OUT "\n";
   }
   close(OUT);

   $plotstr = "plot ";
   $row = 2;
   foreach $s (sort keys %allsites) {
      $plotstr .= "\"$TMPFILE\" using 1:$row title \"$s\" with lines,";
      $row++;
   }
   chop($plotstr);
   $cmd = "$GNUPLOT << EOF
      cd \"$WORKDIR\"
      set term pbm color
      set output \"$TMPPIC\"
      set size 1.5,1
      set xrange [$weekhour:0]
      set xtics -168,24,0
      set title \"Volume per hour for different sites ($fqdn: $date)\"
      set xlabel \"hours up to now\"
      set ylabel \"volume (MB)\"
      $plotstr
EOF
";

   system($cmd);
   &TMP2gif($PIC6);

}


# gethostandfqdn
#---------------------------------------------------------------------- 
# construct fully qualified domain name...
sub gethostandfqdn {
    chop($str=`uname -n`);
    if ($str =~ /\./) {             # str is fqdn
        $fqdn = $str;
        ($hostname) = ($str =~ /^([^.]+)\./);
    } else {                        # str is simple hostname
        $hostname = $str;
        $str = `/bin/grep domain /etc/resolv.conf`;
        $str =~ /domain\s*(\S+)$/;
        $fqdn = $hostname . "." . $1;
    }
}

# numerically
#----------------------------------------------------------------------
# sort numerically
sub numerically { $a <=> $b }


# TMP2gif
# --------------------------------------------------------------------
# convert TMPPIC to GIF-format picture: takes output filename as arg
sub TMP2gif {
    local($outpic) = @_;
    print "converting picture format for: $outpic\n" if ($opt_d);
    if ($IMCONVERT ne "") {
	print "using converter $IMCONVERT\n" if ($opt_d);
	$cmd = "$IMCONVERT $WORKDIR/$TMPPIC gif:$outpic$suffix.gif 2>/dev/null";
    }
    elsif ($PPMTOGIF ne "") {
	print "using converter $PPMTOGIF\n" if ($opt_d);
	$cmd = "$PPMTOGIF < $WORKDIR/$TMPPIC > $outpic$suffix.gif 2>/dev/null";
    } 
    else {
	die "no GIF-converter available for $outpic\n";
    }
    print "  cmd: $cmd\n" if ($opt_d);
    system($cmd);
}


# update_PATH
# -----------------------------------------------------------------------------
# enhance PATH to support scripts run from cron
#
sub update_PATH {
    warn "OLD PATH=$ENV{'PATH'}\n" if $opt_d;
    @ENVPATH = split(/:/,$ENV{'PATH'});
    push(@ENVPATH,@ADDTOPATH);
    
    $ENV{'PATH'}='';
    foreach $p (@ENVPATH) {
	next if $envpathseen{$p}; # skip duplicates
	$envpathseen{$p} = 1;
	$ENV{'PATH'} .= $p . ":";
    }
    chop($ENV{'PATH'});           # cut last ":"
    warn "NEW PATH=$ENV{'PATH'}\n" if $opt_d;
}


# modify_config
# -----------------------------------------------------------------------------
#
sub modify_config {
    local($p,$n);
    $inflowconf = $opt_f ? $opt_f : $INFLOWCONF;
    if ($inflowconf =~ /^(.*)\/([^\/]+)$/) {
	$p = $1; 
	$n = $2;
	push(@INC,$p);
    } else { 
	$n = $inflowconf; 
    }
    warn "p=$p  n=$n  inflowconf=$inflowconf\n" if $opt_d;
    if (-e $inflowconf) {
	warn "loading local configs from $inflowconf...\n" if $opt_d;
	require $n;
	$inflowconfinfo = "$CFRELEASE - $inflowconf";
    } else {
	warn "didn't find any local configs $inflowconf on INC=@INC\n" if $opt_d;
	$inflowconfinfo = "- none -";
    }
}


