#!/usr/local/bin/perl

# dont use eval for cgi scripts

#-----------------------------------------------------------------------------
# bdlogs  - Creates a better decription file by adding to it:
#		  a.) Commonly Asked Queries
#		  b.) Embedded Objects - most commonly returned
#	          c.) A list =  words indexed by Glimpse - stop list of all
#                               brokers.
# Author : Satya Venneti
# Date   : Jun 30, '95
# Version: 1.1
#          c.) of above still needs to be done.
#-----------------------------------------------------------------------------
$ENV{'HARVEST_HOME'} = "/usr/local/harvest"
	if (!defined($ENV{'HARVEST_HOME'}));
$ENV{"HSR_HOME"} = "$ENV{'HARVEST_HOME'}/Admin";
push(@INC,$ENV{"HSR_HOME"} . "/lib");
require 'bdlibrary.pl';
require 'cgi.pl';
require 'hsrlibrary.pl';

#-----------------------------------------------------------------------------
# Globals
#-----------------------------------------------------------------------------
undef @SortedQ;
undef @SortedO;
undef $brokerdir;

#-----------------------------------------------------------------------------
#   Subroutines
#-----------------------------------------------------------------------------



#-----------------------------------------------------------------------------
# SortArr - sort associative array based on values
#-----------------------------------------------------------------------------

sub SortArr{
    local(%arr, $k, @vals);
    %arr = @_;
    foreach $k (keys %arr)
    {
	$arr{$k} .= "|" . $k;
    }
    @vals = sort values %arr;
}

#-----------------------------------------------------------------------------
# ProcessLog - processes log file
#-----------------------------------------------------------------------------

sub ProcessLog{
    local (%Queries, %Objects,$object,$query,$totalqs);
    #%Queries keeps track of how many times a query is asked
    #%Objects keeps track of how many times an object is accessed

    undef %Queries;
    undef %Objects;
    $totalqs = 0;

    while (<IFILE>)
    {

    if(/^QUERY\s.*/)
    {
	$totalqs++;
	$_ = substr($_, rindex($_ , "#END")); 
	chop;
	s/#END\s//;
	$query = $_;
        if (! defined $Queries{$query} )
	{
	    $Queries{$query}=1;
	}
	else
	{
	    $Queries{$query}++;
	}
    }
    if(/^QUERY\-RETURN.*/)
    {
	s/(\w+\-\w+\s+\d+::\s+)(\d+)(.*)/$2/;
	chop;
        $object = $_;
	if (! defined $Objects{$object} )
	{
	    $Objects{$object}=1;
	}
	else
	{
	    $Objects{$object}++;
	}
    }

    }    


#@SortedQ contains queries sorted in ascending order of their frequency of 
#occurance
#@SortedO contains objects sorted in ascending order of their frequency of
#occurance
#an entry in the above arrays would look like: frequency|query/object

    @SortedQ = &SortArr (%Queries);
    @SortedO = &SortArr (%Objects);
    return ($totalqs);
}

#-----------------------------------------------------------------------------
# GetData() - get data from logfile
#-----------------------------------------------------------------------------
sub GetData {
    local ($filename) = @_;
    open(IFILE, $filename) || &CGIError("Unable to open $filename ");
    local ($totalqs) = &ProcessLog;
    close (IFILE);
    return ($totalqs);
}

#-----------------------------------------------------------------------------
# EmbedObject - reads object directories and embeds the objects into desc file
#  Repeat for the last how many ever entries in the array @SortedO
#  Here, we do it only for the last entry
#-----------------------------------------------------------------------------
sub EmbedObject {
    local(*obj) = @_;
    local($cnt,$max,$object,$dir,$filename,$k);
    local($type,$url,%SOIF);

    $max = 3;
    $max = $Config{"Objects"} if (defined $Config{"Objects"});

    for ($cnt = 1; $cnt < $max; $cnt++) {
	$object = $SortedO[$#SortedO - $cnt + 1];
	last if (! defined $object);

	$object =~ s/(^\d+\|)(.*)/$2/;
	$dir = substr($object, -2, 2);
	$dir =~ s/^0//;
	$filename = "$brokerdir/objects/$dir/OBJ$object";

	#aug 4th - if you cant access this object, it has been deleted - 
	#get the next one!!
	unless (open(IFILE, "<$filename")){next};
	($type, $url , %SOIF) = &HSR_ParseSOIF('IFILE');
	close(IFILE);

	foreach $k (keys %SOIF)	{
	    $obj{"Embed<$cnt>-$k"} = $SOIF{$k};
	}
    }

}

#-----------------------------------------------------------------------------
# EmbedQuery - embeds queries into description file
#-----------------------------------------------------------------------------
sub EmbedQuery {
    local(*obj) = @_;
    local($qstring,$query,$cnt,$max);

    $max = 2;
    $max = $Config{"Queries"} if (defined $Config{"Queries"});

    $qstring = "";

    # do for how many ever popular queries you want to put in.
    for ($cnt = 0; $cnt < $max;$cnt++) {
	$query =  $SortedQ[$#SortedQ-$cnt];
	$query =~ s/(^\d+\|)(.*)/$2/;
	$qstring .= $query . "\n";
    }

    $obj{"Common-Queries"}  = $qstring;
}

#-----------------------------------------------------------------------------
#   Append - append stuff to appropriate description file
#-----------------------------------------------------------------------------
sub AppendFields {

    local($totalqs,*obj) = @_;
    local($sec,$min,$hour,$md,$mo,$yr,$wd,$yday,$is);

    #Add Total Number of Queries field!
    $obj{"Total-Number-of-Queries"} = $totalqs;

    #Add Number of Unique Queries filed!
    $obj{"Number-of-Unique-Queries"} = $#SortedQ+1;

    #Add Total number of Objects field!
    $obj{"Number-of-Objects"} = $#SortedO+1;

    ($sec,$min,$hour,$md,$mo,$yr,$wd,$yday,$is) = localtime(time);
    $min = "0$min" if ($min < 10);
    $hour = "0$hour" if ($hour < 10);
    $obj{"Last-Update-Time"} = "$hour:$min " . @HSR_Month[$mo] . " $md, 19$yr";
    $obj{"Last-Update"} = "Update Logs";
}

#-----------------------------------------------------------------------------
#   Main
#-----------------------------------------------------------------------------
sub Main {
    local($type,$descfile,$bdir) = @ARGV;
    local($ntype,$url,%obj,$totalqs);

    &HSR_Initialize;
    if (! defined $type || ! defined $descfile || ! defined $bdir) {
	&CGIError("Usage : bdlogs <type> <descfile> <brokerdirectory>");
    }

    # Parse description file 
    local($ntype,$url,%obj) = &HSR_ParseDescription($descfile);
    &CGIError("Type mismatch in description file $ntype") 
	if ($ntype ne $type);
    
    #Global variable $brokerdir
    $brokerdir = $bdir;

    # Process the log files and update the object
    $totalqs = &GetData("$brokerdir/admin/LOG");
    &EmbedObject(*obj);
    &EmbedQuery(*obj);
    &AppendFields($totalqs,*obj);
    &HSR_WriteDescription ($descfile,$type,$url,%obj);
    &HSR_DisplayDescription($descfile);
    exit(0);
}

# -----------------------------------------------------------------
&Main;





