########################## -*- Mode: Perl -*- ##########################
##
## File             : search_index.pl
##
## Description      : dispatcher, main program
##
#
# Copyright (C) 1995, 1996 Ulrich Pfeifer, Norbert Goevert
#
# This file is part of SFgate.
#
# SFgate is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# SFgate is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with SFgate; see the file COPYING.  If not, write to
# the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
#
##
## Author           : Ulrich Pfeifer
## Created On       : Tue Feb 15 11:57:03 1994
##
## Last Modified By : Norbert Goevert
## Last Modified On : Mon Mar  3 16:31:04 1997
##
## $State: Exp $
##
## $Id: search_index.pl,v 5.1.1.6 1997/04/04 17:31:43 goevert Exp goevert $
##
## $Log: search_index.pl,v $
## Revision 5.1.1.6  1997/04/04 17:31:43  goevert
## patch11: clean up form
##
## Revision 5.1.1.5  1997/02/17 12:58:33  goevert
## patch10: conversion for WAIT databases
##
## Revision 5.1.1.4  1997/01/10 10:37:40  goevert
## patch9: changed parsing of _lines parameter for WAIT
## patch9: changed relative links
##
## Revision 5.1.1.3  1996/12/23 12:55:31  goevert
## patch6: use of the SFgate::Databases::{Search|Retrieve} functions
##
## Revision 5.1.1.2  1996/11/07 08:17:07  goevert
## patch5: highlight and directget bug
##
## Revision 5.1.1.1  1996/11/06 11:41:28  goevert
## patch4: revision number 5.1
##
## Revision 5.1  1996/11/05 16:56:13  goevert
## *** empty log message ***
##
## Revision 5.0.1.9  1996/11/04 13:11:26  goevert
## patch21: cons instead of MakeMaker
##
## Revision 5.0.1.8  1996/07/03 13:29:47  goevert
## patch19: encoding of multiple queries disabled when printing
##
## Revision 5.0.1.7  1996/06/04 16:18:47  goevert
## patch17: handling of <HR>
##
## Revision 5.0.1.6  1996/05/31 15:48:17  goevert
## patch14: SFgate-server
## patch14: request method handling
## patch14: minor fixings
##
## Revision 5.0.1.5  1996/05/23 17:10:11  goevert
## patch13: conversion for headlines
##
## Revision 5.0.1.4  1996/05/21 15:53:15  goevert
## patch12: changed LINK to new site
##
## Revision 5.0.1.3  1996/05/15 17:08:18  goevert
## patch10:
##
## Revision 5.0.1.2  1996/05/13 11:32:21  goevert
## patch1:
##
########################################################################


#-----------------------------------------------------------------
# Main dispatcher customize to your needs
# $query $ENV{'QUERY_STRING'}
#-----------------------------------------------------------------
sub do_request
{
    my($query) = @_;
    ## local variables
    local($_);
    my($tmp);
    my(@headlines, @descriptor, @tags, %tags);
    my($text, $docid, $url, $http_failure);
    my($further_query, $prior_query);
    my(%table, %values, $tquery, $tqueries, %databases, $databases, @databases);

    &log("$ENV{'REMOTE_HOST'}: " . &decode($query));
    
    %table = &parse_query_string($query);
    # save query table
    %values = %table;

    # just print external form for query refinement?
    $tmp = delete($table{$printform_field});
    if ($tmp) {
        $formextern = delete($table{$formextern_field});
        require "SFgate/Forms/$formextern.pm";
        $formextern = "SFgate::Forms::$formextern";
        $refinequery = $table{$refinequery_field};
        $form = new $formextern (&parse_query_string($refinequery));
        print "Content-Type: text/html\n\n";
        print $form->as_HTML;
        $form->delete;
        return;
    }
    
    # detex has been handled already in parse_query_string
    delete($table{$detex_field});

    # request method
    $tmp = delete($table{$requestm_field});
    if ($tmp =~ /GET|POST/i) {
        $requestm = $tmp;
    }
    else {
        $requestm = $request_method;
    }
    
    # language for SFgate output defaults to 'english'
    $tmp = delete($table{$language_field});
    $language = "\u$tmp" if $tmp;
    if ($actual_language ne $language) {
        $not_regexp = $not_regexp; # make perl -w happy
        eval { require "SFgate/Language/$language.pm"; };
        if ($@) {
            $language = 'English';
            require SFgate::Language::English;
        }

        no strict 'refs';
        my $prefix = 'SFgate::Language::' . $language . '::';
        my $language_var;
        
        $language_var = $prefix . 'and_regexp'; 
        $and_regexp = $$language_var;
        $language_var = $prefix . 'or_regexp'; 
        $or_regexp = $$language_var;
        $language_var = $prefix . 'not_regexp'; 
        $not_regexp = $$language_var;
        $language_var = $prefix . 'plaintext_regexp'; 
        $plaintext_regexp = $$language_var;
        $language_var = $prefix . 'soundex_regexp'; 
        $soundex_regexp = $$language_var;
        $language_var = $prefix . 'phonix_regexp'; 
        $phonix_regexp = $$language_var;
        $language_var = $prefix . 'wildcard_regexp'; 
        $wildcard_regexp = $$language_var;

        $language_var = $prefix . 'language';
        %language = %$language_var;
        
        $actual_language = $language;
    }
    
    # verbose headlines, defaults to 0
    $tmp = delete($table{$verbose_field});
    $verbose_headlines = 1 if $tmp =~ /^(yes|1)$/i;
    $verbose_headlines = 0 if $tmp =~ /^(no|0)$/i;

    # translated query
    $tmp = delete($table{$transquery_field});
    $transquery = &decode($tmp) if $tmp;
    
    # listenvironment defaults to description list
    $tmp = delete($table{$listenv_field});
    $listenv = $tmp if $tmp =~ /^(TABLE|DL|PRE)$/i;

    # application field defaults to ''
    $tmp = delete($table{$application_field});
    $application = $tmp if $tmp;

    # conversion of documents, defaults to ''
    $tmp = delete($table{$convert_field});
    $convert = $tmp if $tmp;

    # conversion of headlines, defaults to ''
    $tmp = delete($table{$converthl_field});
    if ($tmp) {
        $converthl = $tmp;
        require "SFgate/Converter/$converthl.pm";
        $converterhl = 'SFgate::Converter::' . $converthl . '::convert_headline';
    }

    # conversion of documents with multiple choice headline menus, defaults to ''
    $tmp = delete($table{$convertm_field});
    $convertm = $tmp if $tmp;

    # mapping of attributes
    $tmp = delete($table{$attributes_field});
    $attributes = 1 if $tmp =~ /^(yes|1)$/i;
    $attributes = 0 if $tmp =~ /^(no|0)$/i;

    # determine file with lattice
    $tmp = delete($table{$lattice_field});
    $latticefile = $tmp if -r $tmp;
    $latticefile = "$application_dir/$tmp" if -r "$application_dir/$tmp";

    # print debug messages, defaults to 0
    $tmp = delete($table{$debug_field});
    $debug = 1 if $tmp =~ /^(on|yes|1)$/i;
    $debug = 0 if $tmp =~ /^(off|no|0)$/i;
    if ($debug) {
        # '_do_request4' = Debug
        &print_header("$program $language{'do_request4'}");
    }

    # are there internal/external forms to refine query
    $tmp = delete($table{$formintern_field});
    $formintern = $tmp if $tmp;
    $tmp = delete($table{$formextern_field});
    $formextern = $tmp if $tmp;
    if ($formextern || $formintern) {
        if ($tmp = delete($table{$refinequery_field})) {
            $refinequery = $tmp;
        }
        else {
            $refinequery = $query;
        }
    }

    # do highlighting within documents retrieved?
    $tmp = delete($table{$highlight_field});
    $highlight = $tmp if defined($tmp);

    # WAIT: print how many lines around match
    $tmp = delete($table{$_lines_field});
    $_lines = $tmp if $tmp =~ /^\d+$/;
    
    # extract conditions for highlighting
    $tmp = delete($table{$conditions_field});
    if ($tmp) {
        $tmp = &decode($tmp);
        my $db_cond;
        foreach $db_cond (split(/$seperatorr{3}/, $tmp)) {
            my($db, @cond) = split(/$seperatorr{2}/, $db_cond);
            my $cond;
            my @new_cond;
            foreach $cond (@cond) {
                push(@new_cond, [split(/$seperatorr/, $cond)]);
            }
            if ($db) {
                $databases->set_conditions($db, \@new_cond);
            }
            else {
                $conditions = \@new_cond;
            }
        }
    }
    
    # multiple choice headlines, defaults to 0
    if ($table{$multiple_field} == 2) {
        $multiple_choice = 2;
        # get documents

        # give a direct http url if it fails to fetch a http url
        $directhttp = 1;

        foreach (@{$table{$document_field}}) {
            $tags{(split("$seperatorr", $_))[0]} = 1;
        }

        @tags = keys %tags;
        $databases = new SFgate::Databases::Databases (\@tags,
                                                       $default_wais_dir,
                                                       $application_dir,
                                                       0);
        require Wais;
        foreach (@{$table{$document_field}}) {
            @descriptor = split("$seperatorr", $_);
            $docid = new Wais::Docid ($descriptor[5],
                                      $descriptor[6],
                                      $descriptor[7]);
            ($text, $http_failure) = $databases->Retrieve($descriptor[0],
                                                          $docid,
                                                          $descriptor[4],
                                                          $transquery,
                                                          $_lines);
            if ($http_failure) {
                $url = (split(' ', $descriptor[7]))[2];
                # '_do_request1' = couldn't retrieve the document:
                # '_do_request3' = Try to get it directly!
                $text = '<A HREF="' . $url . '">' . $me . ' ' .
                    $language{'do_request1'} . ' ' . $http_failure .
                        "\n" . $language{'do_request3'} . "</A>\n";
            }

            push(@headlines, [[$descriptor[0],
                               $descriptor[1],
                               '',
                               $descriptor[2],
                               $descriptor[3],
                               [$descriptor[4]],
                               $docid],
                              {$descriptor[4] => $text}]);
        }

        # open html-page
        # '_do_request2' = WAIS documents
        &print_header($language{'do_request2'}) unless $debug;

        &print_queries($transquery);

        if ($formintern || $formextern) {
            %values = &parse_query_string($refinequery);
            &print_forms($formintern, $formextern, %values);
        }

        &print_diagnostics($databases->get_diagnostics);
        &print_headlines($databases, \@headlines, '', $conditions);
        # close html-page
        &print_footer('');
        
        return;
    }

    $tmp = delete($table{$multiple_field});
    $multiple_choice = 1 if $tmp =~ /^(yes|1)$/i;
    $multiple_choice = 0 if $tmp =~ /^(no|0)$/i;
    
    # begin displaying results with which document?
    $tmp = delete($table{$range_field});
    $range = $tmp if $tmp > 0 && $tmp == int($tmp);

    # maximum number of wais hits returned, defaults to 40
    $tmp = delete($table{$maxhits_field});
    $maxhits = $tmp if $tmp > 0 && $tmp == int($tmp);
    $maxhits += $range;
    if ($range) {
        $further_query = $query;
        $further_query =~ s/(&?)$range_field=\d+(&?)/$1$range_field=$maxhits$2/;
        $further_query .= "&$requestm_field=$requestm"
            if $further_query !~ /&$requestm_field=$request/;
        $prior = $range + $range - $maxhits;
        if ($prior >= 1) {
            $prior_query = $query;
            $prior_query =~ s/(&?)$range_field=\d+(&?)/$1$range_field=$prior$2/;
            $prior_query .= "&$requestm_field=$requestm"
                if $prior_query !~ /&$requestm_field=$request/;
        }
        elsif ($range > 1) {
            $prior_query = $query;
            $prior_query =~ s/(&?)$range_field=\d+(&?)/$1$range_field=$prior$2/;
            $prior_query .= "&$requestm_field=$requestm"
                if $prior_query !~ /&$requestm_field=$request/;
        }
    }

    # fetch documents by http, not by SFgate, defaults to 1
    $tmp = delete($table{$directhttp_field});
    $directhttp = 1 if $tmp =~ /^(yes|1)$/i;
    $directhttp = 0 if $tmp =~ /^(no|0)$/i;

    # connection of fields, defaults to ''
    $tmp = delete($table{$tie_field});
    $tie = 'and' if $tmp =~ /^($and_regexp)$/i;
    $tie = ''  if $tmp =~ /^($or_regexp)$/i;

    # connection in fields, defaults to ''
    $tmp = delete($table{$tieinternal_field});
    $tieinternal = 'and' if $tmp =~ /^($and_regexp)$/i;
    $tieinternal = ''  if $tmp =~ /^($or_regexp)$/i;

    # skip headline menu? defaults to 0
    $tmp = delete($table{$directget_field});
    if ($tmp =~ /^(yes|1)$/i) {
        $directget = 1;
        # if retrieving documents via http fails,
        # give direct http urls instead
        $directhttp = 0;
    }
    else {
        $directget = 0;
    }

    # databases to query
    $tmp = delete($table{$database_field});
    $databases = new SFgate::Databases::Databases ($tmp,
                                                   $default_wais_dir,
                                                   $application_dir,
                                                   $attributes);

    # dump environement? defaults to 0
    $tmp = delete($table{$dmpenv_field});
    if ($tmp =~ /^(yes|1)$/i) {
        delete($table{$debug_field});
        ($tquery, $tqueries) = &translate_query($databases, %table);
        &dumpenv($query, $tquery, $databases);
        return;
    }

    if (defined($table{$descriptor_field})) {
        # get single document
        @descriptor = split(/$seperatorr/, $table{$descriptor_field});
        &dprint("<B>getting single document:</B>\n ", join("\n ", @descriptor), "\n");

        $databases = new SFgate::Databases::Databases ([$descriptor[0]],
                                                       $default_wais_dir,
                                                       $application_dir,
                                                       0);
        
        require Wais;
        $docid = new Wais::Docid ($databases->get_server($descriptor[0]),
                                  $descriptor[6],
                                  $descriptor[7]);

        ($text, $http_failure) = $databases->Retrieve($descriptor[0],
                                                      $docid,
                                                      $descriptor[4],
                                                      $transquery,
                                                      $_lines);
        if ($http_failure) {
            $url = (split(' ', $descriptor[7]))[2];
            # '_do_request1' = couldn't retrieve the document:
            # '_do_request3' = Try to get it directly!
            $text = '<A HREF="' . $url . '">' . $me . ' ' .
                $language{'do_request1'} . ' ' . $http_failure .
                    "\n" . $language{'do_request3'} . "</A>\n";
        }

        my %positions;
        if ($conditions) {
            # do highlighting
            my %highlight;
            my $database_file = $databases->get_database_file($descriptor[0]);
            if (-e "$database_file.doc") {
                # highlighting only possible with local datbases
                push(@{$highlight{$database_file}->[0]}, $docid);
                $highlight{$database_file}->[1] = $conditions;
                my $highlight = new SFgate::Highlight (%highlight);
                %positions = $highlight->highlight;
            }
        }

        &print_text($descriptor[4], $text, $descriptor[3], 0, $databases, $descriptor[0],
                    $positions{$docid});
        &print_footer('');
    }
    else {
        # start the query
        &dprint("<B>databases to query:</B>\n " .
                join("\n ", $databases->get_databases) .
                "\n");
        @databases = $databases->get_descriptions;
        
        # parse query
        my $conditions;
        ($tquery, $tqueries, $conditions) = &translate_query($databases, %table);
        $transquery = &encode($tquery);
        $conditions = '' unless $highlight;
        if ($errno) {
            # error occured while translating
            # '_do_sorry1' = Syntax error in query
            &print_header($language{'do_sorry1'}) unless $debug;
            &print_queries($tquery, $tqueries);
            &print_forms($formintern, $formextern, %values);
            &do_sorry;
            return;
        }
        elsif ($tquery =~ /^\s*$/) {
            &print_header("@databases") unless $debug;
            &print_forms($formintern, $formextern, %values);
            # '_do_request5' = You didn\'t specify a query.
            print "\n\n$language{'do_request5'}\n\n <HR>\n";
            $hr = 1;
            &print_footer('');
            return;
        }

        if (!@databases) {
            ## error: no database specified
            # '_do_request6' = in
            &print_header("$tquery $language{'do_request6'} @databases")
                unless $debug;
            &print_queries($tquery, $tqueries);
            &print_forms($formintern, $formextern, %values);
            # '_do_request7' = You didn't select any databases.
            print "\n$language{'do_request7'}\n\n <HR>\n";
            $hr = 1;
            &print_footer('');
        }
        else {
            # do query, results are in @headlines
            @headlines = $databases->Search($tquery, $maxhits, $directget, $_lines, $convert);

            if ($range) {
                if (@headlines == $maxhits) {
                    pop(@headlines);
                }
                else {
                    $further_query = '';
                }
                splice(@headlines, 0, $range - 1);
            }

            # do output of headlines
            # '_do_request6' = in
            &print_header("$tquery $language{'do_request6'} @databases")
                unless $debug;
            &print_queries($tquery, $tqueries);
            
            &print_forms($formintern, $formextern, %values);

            &print_diagnostics($databases->get_diagnostics);
            @headlines = &print_wsrc(@headlines);
            &print_headlines($databases, \@headlines, $prior_query, $conditions);
            &print_footer($further_query);
        }
    }
}


sub print_header 
{
    my($title) = @_;
    ## local variable
    local($_);

    $title = &encode_entities($title);
    
    print <<"EOF";
Content-Type: text/html

<HTML>

  <HEAD>
    <TITLE>$title</TITLE>
    <LINK HREF="http://ls6-www.informatik.uni-dortmund.de/ir/projects/SFgate/" REV="generated" TITLE="$program$revision">
  </HEAD>

  <BODY>
EOF
    ;
    
    if ($application
        && (open (HEADER, "< ${application_dir}/${application}_header_$language")
            || open (HEADER, "< ${application_dir}/${application}_header"))) {
        while (<HEADER>) {
            next if /^#/;
            print;
        }
        close(HEADER);
    }
    else {
        print <<"EOF";
        
<A HREF="$virt_docdir/author.html">
<IMG SRC="$virt_docdir/$program-small.gif" ALT="SFgate"></A>

 <P>
EOF
    ;
    }
}


sub print_queries
{
    my($query, $queries) = @_;

    $query   = &encode_entities($query);

    # 'print_queries1' = Your query was:
    print "\n\n<DL>\n<DT> $language{'print_queries1'}\n<DD> <B>$query</B>\n";
    # 'print_queries2' = Queries in Databases:
    print "<DT> $language{'print_queries2'}\n<DD> $queries\n" if $attributes;
    print "</DL>\n";
}


sub print_forms
{
    my($formintern, $formextern, %values) = @_;
    ## local variable
    my($form);
    
    return unless $formintern || $formextern; 

    if ($formintern) {
        ($formintern) = $formintern =~ /^([A-z_]\w*)/;
        require "SFgate/Forms/$formintern.pm";
        $formintern = "SFgate::Forms::$formintern";
        $form = new $formintern (%values);
        print "\n <HR>\n\n";
    }
    
    if ($formextern) {
        print "<A HREF=\"$htbin/$me?" .
            "$printform_field=1&$formextern_field=$formextern&$refinequery_field=" .
            &encode($refinequery) . '">';
    }
    
    # 'print_forms' = Refine your query
    print $language{'print_forms'} if $formintern || $formextern;
    
    print '</A>' if $formextern;
    
    if ($formintern) {
        print ": <P>\n\n"; 
        $form->print_form;
        print "\n <HR>\n\n";
        $form->delete;
        $hr = 1;
    }
    elsif ($formextern) {
        print "<P>";
    }

    return $hr;
}


sub do_sorry
{
    $perror =~ s/^\d*: //;
    $perror =~ s/\n\d*: //;

    print "\n <HR>\n\n" unless $hr;
    
    # '_do_sorry1' = Syntax error in query
    # '_do_sorry2' = Sorry! Can\'t parse your query:
    print <<"EOF"
<H2>$language{'do_sorry1'}</H2>

<DL>
<DT> $language{'do_sorry2'}
<DD> $perror
</DL>

 <HR>
EOF
    ;
    $hr = 1;
    &print_footer('');

    &log("parse error: $perror");
}


sub print_footer
{
    my($further_query) = @_;
    ## local variable
    local($_);
    
    if ($listenv_open) {
        print "</$listenv>\n";
        $listenv_open = 0;
    }
    print "\n <HR>\n\n" unless $hr;
    
    # '_print_footer1' = Further documents...
    if ($further_query) {
        print "<A HREF=\"$htbin/$me?$further_query\">" .
            $language{'print_footer1'}, "</A>\n\n <HR>\n\n";
    }

    if ($application
        && (open (FOOTER, "< ${application_dir}/${application}_footer_$language")
            || open (FOOTER, "< ${application_dir}/${application}_footer"))) {
        while (<FOOTER>) {
            next if /^#/;
            print;
        }
        close(FOOTER);
    }
    else {
        # '_print_footer2' = This page was created by
        print <<"EOF"

    <ADDRESS>
      $language{'print_footer2'}
      <A HREF="$virt_docdir/author.html">$program$revision</A>.
    </ADDRESS>

EOF
    ;
    }

    print <<"EOF";
  </BODY>

</HTML>
EOF
    ;
}


sub dumpenv
{
    my($query, $tquery, $databases) = @_;

    for (keys %ENV) {
        $ENV .= "<DT> <B>$_</B>\n<DD> $ENV{$_}\n";
    }
    
    # '_dumpenv' = Dump Environement
    &print_header("$program $language{'dumpenv'}") unless $debug;

    print <<"EOF";

 <HR>

     
<H2>Debug</H2>

<DL>
<DT> <B>ARGV[0]</B>
<DD> $ARGV[0]                
<DT> <B>ARGV[1]</B>
<DD> $ARGV[1]                 
<DT> <B>REQUEST_METHOD</B>
<DD> $ENV{'REQUEST_METHOD'}    
<DT> <B>QUERY_STRING</B>
<DD> $ENV{'QUERY_STRING'}     
<DT> <B>PATH_INFO</B>
<DD> $ENV{'PATH_INFO'}        
<DT> <B>GATEWAY_INTERFACE</B>
<DD> $ENV{'GATEWAY_INTERFACE'} 
<DT> <B>query</B>
<DD> $query                   
<DT> <B>translated query</B>
<DD> $tquery
</DL>

<B>Databases</B>

<PRE>
EOF
    ;
    $databases->display;

    print <<"EOF";
</PRE>
    
 <HR>
     
<H2>Environment</H2>

<DL>
$ENV
</DL>
    
 <HR>
EOF
    ;
    &print_footer('');
}



