#!/usr/local/bin/perl

#-----------------------------------------------------------------------------
# bdstop
#
# Author     : Satya Venneti
# Date       : Jul 07, 1995
# Version    : 1.1
# Description: This program parses the .glimpse_index file in the broker 
#	       directory and extracts keywords from it.
#-----------------------------------------------------------------------------

$ENV{'HARVEST_HOME'} = "/usr/local/harvest"
	if (!defined($ENV{'HARVEST_HOME'}));
$ENV{"HSR_HOME"} = "$ENV{'HARVEST_HOME'}/Admin";
push(@INC,$ENV{"HSR_HOME"} . "/lib");
require 'bdlibrary.pl';
require 'cgi.pl';
require 'hsrlibrary.pl';

#-----------------------------------------------------------------------------
# subroutines
#-----------------------------------------------------------------------------

#-----------------------------------------------------------------------------
# ParseIndex - parses the index file
#-----------------------------------------------------------------------------

sub ParseIndex {

    local($brokerdir) = @_;

    local($filename) = $brokerdir . "/.glimpse_index";
    open (IFILE, $filename) || &CGIError("Unable to open $filename");
    while (<IFILE>)
    {
	($word,$junk) = split (/\002/,$_);
	#Remove the leading 001 character before each word
	$word = substr($word,1);
	push(@words,$word);
    } 
    close IFILE;
    return (@words);
}

#-----------------------------------------------------------------------------
# ModifyStopList - takes an array of words as an argument and modifies 
#                  existing stoplist to intersection of stoplist and the array
#                  or, if stoplist does not exist , creates a stoplist = array
#                  returns an array of words = original array - new stoplist
#-----------------------------------------------------------------------------

sub ModifyStopList {
   
    local (@newarray) = @_;
    local(@stoplist);
    local ($stoplistname) = 
    "/local/harvest/Admin/descriptions/stop.list";
    # name of the file in which stop list is stored
    unless (open (SLIST, $stoplistname))
    {
	&WriteStop(@newarray); 
        return (@newarray);
    }
    while (<SLIST>)
    {
	chop;
	push(@stoplist, $_);
    }
    @InStopNotinNew = &diff( *newarray, *stoplist);
    #modify stoplist to intersection of stoplist and newarray
    @stoplist = &diff( *InStopNotinNew, *stoplist);
    &WriteStop(@stoplist);
    #modify new array to difference of newarray and modified stoplist
    @newarray = &diff( *stoplist, *newarray);
    return (@newarray);
}

#-----------------------------------------------------------------------------
# Diff - computes the difference between arrays
#-----------------------------------------------------------------------------
	    
sub diff {
    local(%MARK, @result);
    local (*array1, *array2) = @_;
    grep($MARK{$_}++,@array1);
    @result = grep(!$MARK{$_},@array2);
}
     

#-----------------------------------------------------------------------------
# WriteArray - writes an array onto STDOUT - in html format
#-----------------------------------------------------------------------------

sub WriteArray {

    local(@array) = @_;
    print STDOUT <<EOM;
<HTML>
<HEAD>
<TITLE>List of Unique Keywords in this broker</TITLE>
</HEAD>
<BODY>
<H1>List of Unique Keywords in this broker</H1>
<P> This is a list of keywords unique to this broker.  It is computed by 
taking the difference of the set of all keywords of this broker and the 
Stoplist.  The StopList consists of words common to all brokers in the HSR.</P>
EOM

     for (@array)
    {
	print STDOUT $_, "<P>\n";
    }
    print STDOUT "</BODY>\n</HTML>\n";
}

#-----------------------------------------------------------------------------
# WriteStop - writes an array into the stoplist file
#-----------------------------------------------------------------------------

sub WriteStop {

    local(@array) = @_;
    local ($stoplistname) = 
    "/local/harvest/Admin/descriptions/stop.list";
    # name of the file in which stop list is stored
    open (OFILE, ">$stoplistname") ||&CGIError("Unable to open $stoplistname");
    for (@array)
    {
        print OFILE $_, "\n";
    }
    close OFILE;
}



#-----------------------------------------------------------------------------
# Main
#-----------------------------------------------------------------------------
sub Main{
    local($type, $descfile, $bdir) = @ARGV;
    local (@uniquearray,@words);
    &HSR_Initialize;
    if (! defined $type || ! defined $descfile || ! defined $bdir) 
    { &CGIError("Usage: bdstop <type> <description file> <broker directory>");}
    @words = &ParseIndex($bdir);
    @uniquearray= &ModifyStopList(@words);
    &WriteArray(@uniquearray);
    exit(0); 

}

&Main;












