########################## -*- Mode: Perl -*- ##########################
##
## File             : query.pl
##
## Description      : query processor
##
#
# Copyright (C) 1995, 1996 Ulrich Pfeifer, Norbert Goevert
#
# This file is part of SFgate.
#
# SFgate is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# SFgate is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with SFgate; see the file COPYING.  If not, write to
# the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
#
##
## Author           : Ulrich Pfeifer
## Created On       : Thu Mar 31 13:48:36 1994
##
## Last Modified By : Norbert Goevert
## Last Modified On : Tue Jan 21 13:15:21 1997
##
## $State: Exp $
##
## $Id: query.pl,v 5.1.1.3 1997/04/04 17:31:21 goevert Exp goevert $
##
## $Log: query.pl,v $
## Revision 5.1.1.3  1997/04/04 17:31:21  goevert
## patch11: bug in format_date
##
## Revision 5.1.1.2  1997/02/17 12:58:08  goevert
## patch10: fixing for date parsing
##
## Revision 5.1.1.1  1996/12/23 12:54:41  goevert
## patch6: wildcards as index type
##
## Revision 5.1  1996/11/05 16:56:08  goevert
## *** empty log message ***
##
## Revision 5.0.1.7  1996/11/05 15:56:09  goevert
## patch26: ordering of query conditions when groups are in use
##
## Revision 5.0.1.6  1996/11/04 13:11:23  goevert
## patch21: cons instead of MakeMaker
##
## Revision 5.0.1.5  1996/07/03 13:29:16  goevert
## patch19: bug with date field
##
## Revision 5.0.1.4  1996/06/04 16:18:37  goevert
## patch17: bu fix with indextypes in languages other than english
##
## Revision 5.0.1.3  1996/05/15 17:08:12  goevert
## patch10:
##
## Revision 5.0.1.2  1996/05/13 11:32:18  goevert
## patch1:
##
########################################################################


sub error
{
    local($_) = @_;
    
    $perror .= "$_\n";
    $errno++;
}


sub decode
{
    local($_) = @_;
    
    s/\+/ /g;
    s/%([\dA-F]{2})/pack('C',hex($1))/eig;
    s/\s+/ /g;
        
    $_;
}


sub encode
{
    local($_) = @_;

    s/([^-.:\w])/sprintf("%%%02x", ord($1))/eg; 

    $_;
}


## #################################################################
## detex($_)
## #################################################################
## Convert german LaTeX Umlaute to iso and downcase them
##
## (string) $_: string to detex
##
## returns:
## - string: the detexed string
##
sub detex
{
    local($_) = @_;

    s/\"a/\344/g; # 
    s/\"A/\344/g; # 
    s/\"o/\366/g; # 
    s/\"O/\366/g; # 
    s/\"u/\374/g; # 
    s/\"U/\374/g; # 
    s/\"s/\337/g; # 

    $_;
}


sub parse_query_string
{
    my($url) = @_;
    ## local variables
    local($_);
    my($name, $value, $tmp, $pattern);
    ## return value
    my(%table);

    # detex german special characters?
    $pattern = '(^|&)' . $detex_field . '=([^&]*)';
    $url =~ s/$pattern//;
    $tmp = $2;
    $detex = 1 if $tmp =~ /^(yes|1)$/i;
    $detex = 0 if $tmp =~ /^(no|0)$/i;

    $table{$detex_field} = $detex;
    
    for (split(/&/,$url)) {
        
        if (/=/) {                                 
            ($name, $value) = split(/=/,$_,2);
        }
        else {
            $name = $freetext_field;
            $value = $_;
        }

        $value = &decode($value);
        $value = &detex($value) if $detex;
        
        next if $value =~ /^\s*$/; # whitespace only

        if ($name eq $document_field) {
            # save in array; $vale is encoded twice
            push(@{$table{$name}}, &decode($value));
        }
        elsif ($name eq $database_field) {
            # save in array
            push(@{$table{$name}}, $value);
        }
        elsif ($table{$name}) {
            $table{$name} .= ' '.$value;
        }
        else {
            $table{$name} = $value;
        }
    }

    %table;
}


sub translate_query
{
    local($databases, %table) = @_;
    ## local variables
    local($_);
    local($date_fields, @date_fields, $and_fields, $groups, $date, $field, $query_part);
    local(%groups, %grouptie, $groupname, $grouptie);
    local($query, $query_and, @group, @group_and, $group, $group_and);
    ## return value
    local($query);

    ## determine grouping of fields
    ($groups, %table) = &group_table(%table);
    
    foreach (split('\|', $groups)) {
        ($groupname, $fields, $grouptie) = split(':');
        foreach $field (split(',', $fields)) {
            $groups{$field} = $groupname;
        }
        $grouptie{$groupname} = $grouptie;
    }
            
    ## resolve fieldselection fields
    %table = &fieldsel_table(%table);

    ## get field options (_and, _[year|month|day])
    ($date_fields, $and_fields, %table) = &translate_table(%table);
    
    @date_fields = split(':', $date_fields);
    $date_fields = ":$date_fields:";
    $and_fields  = ":$and_fields:";
    
    ## handle date fields
    foreach (@date_fields) {
        $field = $_;
        $field =~ s/_\d+$//;
        $date = &format_date($field,
                             $table{$_ . '_year'},
                             $table{$_ . '_month'},
                             $table{$_ . '_day'},
                             $table{$_ . '_p'});
        delete($table{$_ . '_year'});
        delete($table{$_ . '_month'});
        delete($table{$_ . '_day'});
        delete($table{$_ . '_p'});
        if (!$date) {
            # error occured
            # '_translate_query' = Couldn't parse date field
            &error("$language{'translate_query'} $field.");
            next;
        }
        $table{$_} = $date;
    }

    foreach (keys %table) {

        # predicate fields and indextypes are handled below
        next if /_[pi]$/ || /_[pi]_/;

        ($field) = /^([^_]*)/;
        $query_part = $table{$_};
        
        # handle predicates
        if ($predicate = $table{$_.'_p'}) {
            $predicate =~ s/\s//g;
        }
        else {
            $predicate = '=';
        }

        # handle indextypes
        $indextypelist = '';
        if ($table{$_.'_i'}) {
            $tmp = $table{$_.'_i'};
            # downcase: tr/A-Z/a-z/;
            $tmp =~ tr/A-Z\304\326\334/a-z\344\366\374/;
            if ($tmp =~ /($plaintext_regexp)/) {
                $indextypelist .= "$1,";
            } 
            if ($tmp =~ /($soundex_regexp)/) {
                $indextypelist .= "$1,";
            }
            if ($tmp =~ /($phonix_regexp)/) {
                $indextypelist .= "$1,";
            }
            if ($tmp =~ /($wildcard_regexp)/) {
                $indextypelist .= "$1,";
            }
            $indextypelist =~ s/,$//;
        }

        # process query part
        $query_part = "($query_part)" if $query_part =~ /\s/;

        if ($indextypelist) {
            $query_part = $indextypelist."{$query_part}";
        }

        if ($field ne $freetext_field && $date_fields !~ /:$_:/) {
            $query_part = $field.$predicate.$query_part;
        }
        elsif ($predicate ne '=') {
            $query_part = $predicate.$query_part;
        }

        # handle connection between query parts
        if ($groupname = $groups{$_}) {
            ($groupno) = $groupname =~ /_(\d+)/;
            if (defined($groupno)) {
                if ($and_fields =~ /:$_:/) {
                    $group_and[$groupno] .= "$query_part and ";
                }
                elsif ($tie =~ /^($and_regexp)$/i) {
                    $group[$groupno] .= "$query_part and ";
                }
                else {
                    $group[$groupno] .= "$query_part or ";
                }
            }
            else {
                if ($and_fields =~ /:$_:/) {
                    $group_and .= "$query_part and ";
                }
                elsif ($tie =~ /^($and_regexp)$/i) {
                    $group .= "$query_part and ";
                }
                else {
                    $group .= "$query_part or ";
                }
            }
        }
        else {
            if ($and_fields =~ /:$_:/) {
                $query_and .= "$query_part and ";
            }
            elsif ($tie =~ /^($and_regexp)$/i) {
                $query .= "$query_part and ";
            }
            else {
                $query .= "$query_part or ";
            }
        }
    }
    
    $group     =~ s/ (and |or )?$//;
    $group_and =~ s/ (and |or )?$//;
    if ($group_and && $group) {
        $group = "($group) and ($group_and)";
    }
    elsif ($group_and) {
        $group = $group_and;
    }
    if ($group) {
        if ($grouptie{$group_field} =~ /^($and_regexp)$/
            || $tie =~ /^($and_regexp)$/) {
            $query_and .= "($group) and ";
        }
        else {
            $query .= "($group) or ";
        }
    }
    
    foreach (keys %grouptie) {
        ($groupno) = /_(\d+)/;
        next unless defined($groupno);
        $group[$groupno]     =~ s/ (and |or )?$//;
        $group_and[$groupno] =~ s/ (and |or )?$//;
        if ($group_and[$groupno] && $group[$groupno]) {
            $group[$groupno] = "($group[$groupno]) and ($group_and[$groupno])";
        }
        elsif ($group_and[$groupno]) {
            $group[$groupno] = $group_and[$groupno];
        }
        if ($group[$groupno]) {
            if ($grouptie{$_} =~ /^($and_regexp)$/
                || $tie =~ /^($and_regexp)$/) {
                $query_and .= "($group[$groupno]) and ";
            }
            else {
                $query .= "($group[$groupno]) or ";
            }
        }
    }

    $query     =~ s/ (and |or )?$//;
    $query_and =~ s/ (and |or )?$//;
    if ($query_and && $query) {
        $query = "($query) and ($query_and)";
    }
    elsif ($query_and) {
        $query = $query_and;
    }

    &dprint("<B>translated query:</B>\n $query\n");
    my($conditions);
    ($query, $error, $conditions) = &parse($databases, $query, $tieinternal);
    &dprint("<B>parsed query:</B>\n $query\n");

    $queries = $databases->get_queries;
    &dprint($queries);

    &error($error) if $error;

    return($query, $queries, $conditions);
}


sub group_table
{
    my(%table) = @_;
    ## local_variables
    local($_);
    my($fields, $groupmember, $indextypes);
    ## return values
    my($groups);

    foreach (keys %table) {

        ($field, $modifier) = split(/_/, $_, 2);

        next unless $field    eq $group_field;
        next unless $modifier =~ /^(\d+)?$/;

        $modifier = '_' . $modifier if $modifier ne "";
        
        $fields =  delete($table{$_});
        $fields =~ s/\s//g;
                
        $groups .= '|' . $group_field . $modifier . ':' . $fields;

        # determine indextypes
        if (($indextypes = $table{$group_field . $modifier . '_i'})
            || ($indextypes = $table{$group_field . '_i' . $modifier})) {
            # indextypes for all group members
            foreach $groupmember (split(',', $fields)) {
                $table{$groupmember . '_i'} = $indextypes;
            }
        }
        delete($table{$group_field . $modifier . '_i'});
        delete($table{$group_field . '_i' . $modifier});
                
        # determine boolean connector for group
        if ($table{$group_field . $modifier . '_tie'} =~ /^($and_regexp)$/
            || $table{$group_field . '_tie' . $modifier} =~ /^($and_regexp)$/) {
            $groups .= ':and';
        }
        else {
            $groups .= ':or';
        }
        delete($table{$group_field . $modifier . '_tie'});
        delete($table{$group_field . '_tie' . $modifier});
    }

    $groups      =~ s/^\|//g if $groups;

    return($groups, %table);
}


sub fieldsel_table
{
    my(%table) = @_;
    # !!!Note: this subroutine also uses the global %groups hash!!!
    ## local variables
    local($_);
    my($match, $number, $content, $field, %description, $i);
    my($predicate, $indextype, $bool, $fieldsel_id, $group);

    # get fielddescriptions from the form
    $match = $fieldsel_field . '_name_(.+)';
    foreach (keys %table) {
        if (/^$match$/) {
            $description{$table{$_}} = $1;
        }
    }

    # get field content together with the field
    # (over field description)
    $match = $fieldsel_field . '(_\d+)?_description';
    foreach (keys %table) {
        if (/^$match$/) {
            $number = $1;
            next unless $field   = $description{$table{$_}};
            next unless $content = $table{$fieldsel_field . $number . '_content'};

            $indextype = $table{$fieldsel_field . $number . '_i'};
            $predicate = $table{$fieldsel_field . $number . '_p'};
            
            $bool = '';
            if ($table{$fieldsel_field . $number . '_tie'} =~ /^($and_regexp)$/) {
                $bool = '_and';
            }

            $i++;
            while (defined($groups{$field . '_' . $i})) {
                $i++
            }
            $table{$field . '_' . $i . $bool} = $content;
            $table{$field . '_' . $i . '_i'}  = $indextype if $indextype;
            $table{$field . '_' . $i . '_p'}  = $predicate if $predicate;
            $fieldsel_id = $_;
            $fieldsel_id =~ s/_description$//;
            if ($group = delete($groups{$fieldsel_id})) {
                $groups{$field . '_' . $i} = $group;
            }
        }
    }

    # get field content together with the field
    # (The fieldselector holds the name of the field name already,
    # not only the description)
    $match = $fieldsel_field . '(_\d+)?_name';
    foreach (keys %table) {
        if (/^$match$/) {
            $number = $1;
            next unless $field   = $table{$_};
            next unless $content = $table{$fieldsel_field . $number . '_content'};
            
            $indextype = $table{$fieldsel_field . $number . '_i'};
            $predicate = $table{$fieldsel_field . $number . '_p'};

            $bool = '';
            if ($table{$fieldsel_field . $number . '_tie'} =~ /^($and_regexp)$/) {
                $bool = '_and';
            }

            $i++;
            while (defined($groups{$field . '_' . $i})) {
                $i++
            }
            $table{$field . '_' . $i . $bool} = $content;
            $table{$field . '_' . $i . '_i'}  = $indextype if $indextype;
            $table{$field . '_' . $i . '_p'}  = $predicate if $predicate;
            $fieldsel_id = $_;
            $fieldsel_id =~ s/_name$//;
            if ($group = delete($groups{$fieldsel_id})) {
                $groups{$field . '_' . $i} = $group;
            }
        }
    }

    # delete 'fieldsel_...' entries in %table
    $match = $fieldsel_field . '_';
    foreach (keys %table) {
        if (/^$match/) {
            delete $table{$_};
        }
    }

    return(%table);
}


sub translate_table
{
    local(%table) = @_;
    ## local_variables
    local($_, $field, $modifier);
    ## return values
    local($date_fields, $and_fields, %new_table);

    foreach (keys %table) {
        ($field, $modifier) = split(/_/, $_, 2);

        if (!$modifier) {
            $new_table{$field} = $table{$_};
        }
        else {
            $modifier = "_$modifier";
            if ($modifier =~ /_(\d+)/) {
                $field .= "_$1";
            }
            if ($modifier =~ /_([ip])/) {
                $field .= "_$1";
            }
            if ($modifier =~ /_and/) {
                $and_fields .= ":$field:" if $and_fields !~ /:$field:/;
            }
            if ($modifier =~ /_(year|month|day)/) {
                my($mod) = $1;
                $date_fields .= ":$field:" if $date_fields !~ /:$field:/;
                $field .= "_$mod";
            }
            if ($modifier =~ /_tie/) {
                if ($table{$_} =~ /^($and_regexp)$/i) {
                    $and_fields .= ":$field:" if $and_fields !~ /:$field:/;
                }
            }
            else {
                $new_table{$field} = $table{$_};
            }
        }
    }

    $date_fields =~ s/:+/:/g;
    $date_fields =~ s/:$//;
    $date_fields =~ s/^://;
    
    $and_fields  =~ s/:+/:/g;
    $and_fields  =~ s/:$//;
    $and_fields  =~ s/^://;

    return($date_fields, $and_fields, %new_table);
}


sub format_date
{
    my($field, $year, $month, $day, $pred) = @_;
    ## local variables
    local($_);
    ## return value
    my($date);

    return undef unless $year || $month || $day;
    $pred = '==' if !$pred;
    
    @now = localtime(time);
    $now[4]++;

    $month = $now[4] if !$year && !$month;
    $year  = $now[5] if !$year;
    
    if (!$month && !$day) {
        # try to get info from year part
        if ($year =~ /^(\d{2}|\d{4})(\d{2})(\d{2})$/) {
            # (yy|yyyy)mmdd
            $day   = $3;
            $month = $2;
            $year  = $1;
        }
        elsif ($year =~ /^(\d{1,2})\.(\d{1,2})\.(\d{2}|\d{4})$/) {
            # (dd|d).(mm|m).(yyyy|yy) 
            $day   = $1;
            $month = $2;
            $year  = $3;
        }
        elsif ($year =~ /^(\d{1,2})\.(\d{1,2})\.$/) {
            # (dd|d).(mm|m).
            $day   = $1;
            $month = $2;
            $year  = $now[5];
        }
        elsif ($year =~ /^(\d{2}|\d{4})-(\d{1,2})-(\d{1,2})$/) {
            # (yyyy|yy)-(mm|m)-(dd|d)
            $day   = $3;
            $month = $2;
            $year  = $1;
        }
        elsif ($year =~ /^(\d{1,2})\/(\d{1,2})\/(\d{2}|\d{4})$/) {
            # (mm|m)/(dd|d)/(yyyy|yy)
            $day   = $2;
            $month = $1;
            $year  = $3;
        }
        elsif ($year !~ /^(\d{2}|\d{4})$/) {
            return;
        }
    }

    if (!$year) {
        $year  = $now[5];
        $month = $now[4] if !$month;
    }

    if ($year =~ /^\d{2}$/) {
        $year = "19$year";
    }
    elsif ($year !~ /^\d{4}$/) {
        return;
    }

    if ($day =~ /^\d{1}$/) {
        $day = "0$day";
    }
    elsif ($day && $day !~ /^\d{2}$/) {
        return;
    }

    if ($month =~ /^\d{1}$/) {
        $month = "0$month";
    }
    elsif ($month && $month !~ /^\d{2}$/) {
        return;
    }

    return if $day && !$month;

    if ($day && $month && $year) {
        $date = "$field$pred$year$month$day";
    }
    elsif (!$day && !$month) {
        if ($pred eq '<') {
            $date =  $field . '<' . $year . '0101';
        }
        elsif ($pred eq '>') {
            $date =  $field . '>' . $year . '1231';
        }
        elsif ($pred eq '<=') {
            $year++;
            $date =  $field . '<' . $year . '0101';
        }
        elsif ($pred eq '>=') {
            $year--;
            $date =  $field . '>' . $year . '1231';
        }
        elsif ($pred eq '=' || $pred eq '==') {
            $year--;
            $date = '(' . $field . '>' . $year . '1231';
            $year += 2;
            $date .= ' and ' . $field . '<' . $year . '0101)'; 
        }
    }
    elsif (!$day) {
        if ($pred eq '<') {
            $date =  $field . '<' . $year . $month . '01';
        }
        elsif ($pred eq '>') {
            $date =  $field . '>' . $year . $month . '31';
        }
        elsif ($pred eq '<=') {
            $month++;
            $date =  $field . '<' . $year . $month . '01';
        }
        elsif ($pred eq '>=') {
            $month--;
            $date =  $field . '>' . $year . $month . '31';
        }
        elsif ($pred eq '=' || $pred eq '==') {
            $month--;
            $date = '(' . $field . '>' . $year . $month . '31';
            $month += 2;
            $date .= ' and ' . $field . '<' . $year . $month . '01)'; 
        }
    }

    return $date;
}           
1;


