########################## -*- Mode: Perl -*- ##########################
##
## File             : parse_query.pl
##
## Description      : query parser
##
#
# Copyright (C) 1995, 1996 Ulrich Pfeifer, Norbert Goevert
#
# This file is part of SFgate.
#
# SFgate is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# SFgate is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with SFgate; see the file COPYING.  If not, write to
# the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
#
##
## Author           : Norbert Goevert
## Created On       : Mon May 29 16:59:08 1995
##
## Last Modified By : Norbert Goevert
## Last Modified On : Thu Nov 21 12:50:40 1996
##
## $State: Exp $
##
## $Id: parse_query.pl,v 5.1.1.1 1996/12/23 12:54:26 goevert Exp goevert $
##
## $Log: parse_query.pl,v $
## Revision 5.1.1.1  1996/12/23 12:54:26  goevert
## patch6: wildcards as index type
##
## Revision 5.1  1996/11/05 16:56:04  goevert
## *** empty log message ***
##
## Revision 5.0.1.5  1996/11/04 13:11:19  goevert
## patch21: cons instead of MakeMaker
##
## Revision 5.0.1.4  1996/07/03 13:28:55  goevert
## patch19: minor fixes: text as search term, blank between lt-operator and operand
##
## Revision 5.0.1.3  1996/05/15 17:08:09  goevert
## patch10:
##
## Revision 5.0.1.2  1996/05/13 11:32:14  goevert
## patch1:
##
########################################################################


## #################################################################
## parse($database_list, $query)
## #################################################################
## Parses a query according to the SFgate query syntax; acts as
## s front end
##
## (string) $_: query to parse
##
## returns:
## - string: the parsed query
## - string: error messsage (empty if no error occured)
##
sub parse
{
    local($database_list, $_, $tieinternal) = @_; # query as string, databases to query

    ## here's a list of variables which are global within the parser
    # array of chars: query as array of chars
    local(@query) = split(//, $_); 
    # integer: points to next char to read in query
    local($pointer);
    # string: error causing the parser to interrupt parsing
    local($error);
    # integer: set if working on fieldexpression
    local($fieldexpression);
    # array of indextypes: indextypes to apply to terms
    local(@curr_indextypes);
    # string: holds the token to work on
    local($token);
    # string: holds the type of the current token
    # valid values:
    #  { } ( ) , = ' "
    #  or_operator and_operator not_operator proximity_operator
    #  end atleast_operator indextype num1 term 
    #  wildcard_term field num_predicate num_term
    #  d_literal_expression s_literal_expression
    local($curr_tok);
    # integer: 1 if the next token is read already, 0 else
    local($lookahead);
    # integer: number of opened parentheses
    local($parentheses);
    # integer: 1 if the fieldexpression currently parsed should
    #          be put in parentheses, 0 else
    local($fieldatomic);

    ## further parser global variables
    local($no_of_db);
    local(@fields);
    local($global_field);
    local($databases);
    local($form);
    local($lattice);
    local(@conditions);
    my($atomic);
    my(@wais_queries);
    my($wais_query);

    # check if attribute mapping is necessary
    if ($attributes) {
        require SFgate::Attributes::Lattice;
        require SFgate::Attributes::Form;
        @databases    = $database_list->get_map_databases;
        if ($application) {
            $form     = new SFgate::Attributes::Form ("$application_dir/$application");
        }
        else {
            $form     = new SFgate::Attributes::Form;
        }
        $lattice      = new SFgate::Attributes::Lattice ("$latticefile");
        $global_field = 'global';
        $no_of_db     = @databases;
    }
    else {
        $global_field = $freetext_field;
        $no_of_db     = 0;
    }

    # get the 1st token to work on 
    $token = &get_token('');
    
    if ($curr_tok eq 'end') {
        # error handling: empty query
        foreach $i (0 .. $no_of_db) {
            $wais_queries[$i]->{$global_field} = '';
        }
        $error = '';
    }
    else {
        # let's start parsing
        # set default field
        foreach $i (0 .. $no_of_db) {
            $fields[$i] = [$global_field];
        }
        ($atomic, @wais_queries) = &query;
    }

    # set queries and conditions for map databases
    foreach $i (0 .. $no_of_db-1) {
        $databases[$i]->set_query($wais_queries[$i]->{$global_field});
        $databases[$i]->set_conditions($conditions[$i]);
    }

    # set rest of queries
    $wais_query = $wais_queries[$no_of_db]->{$global_field};
    $database_list->set_query($wais_query);

    # end of parsing
    return ($wais_query, $error, $conditions[$no_of_db]);
} 


## #################################################################
## query
## #################################################################
## Parses a query part according to the 'query' production within
## the SFgate query syntax.
## Assumes that the next token to handle is read already.
## Returns if an error occured or if the query is parsed correctly.
##
## returns:
## - string: parsed query part
## - integer: 1 if the parsed query part should be put in
##            parentheses, 0 else
##
sub query
{
    local(@parsed_query, $atomic, @part2, $atomic2);

    ($atomic, @parsed_query) = &or_expression;
    if ($error) {
        return ($atomic, @parsed_query)
    }
    
    for (;;) {
        $token = &get_token('');
        
        if ($curr_tok eq 'end'
            || $curr_tok eq '}' && @curr_indextypes
            || $curr_tok eq ')' && $parentheses) {
            $lookahead = 1;
            last;
        }
        elsif ($curr_tok eq 'or_operator') {
            if ($fieldexpression == 1) {
                $lookahead = 1;
                last;
            }
            
            $token = &get_token('');
            
            if ($curr_tok eq 'end') {
                # error handling: end of query after operator or
                $error = "$pointer: $language{'parse_query1'} `or'";
                return ($atomic, @parsed_query);
            }
            
            ($atomic2, @part2) = &or_expression;

            foreach $i (0 .. $no_of_db) {
                foreach $field (@{$fields[$i]}) {
                    $parsed_query[$i]->{$field} .= ' ' . $part2[$i]->{$field};
                }
            }
            $fieldatomic = 0;
            $atomic = 0;
            
            if ($error) {
                return ($atomic, @parsed_query);
            }
        }
        else {
            if ($fieldexpression == 1) {
                $lookahead = 1;
                last;
            }
            
            ($atomic2, @part2) = &or_expression;

            if ($tieinternal eq 'and') {
                foreach $i (0 .. $no_of_db) {
                    foreach $field (@{$fields[$i]}) {
                        $parsed_query[$i]->{$field} = '(' . $parsed_query[$i]->{$field} . ')'
                            if !$atomic;
                        $part2[$i]->{$field} = '(' . $part2[$i]->{$field}. ')'
                            if !$atomic2;
                        $parsed_query[$i]->{$field} .= ' and ' . $part2[$i]->{$field};
                    }
                }
                $atomic = 1;
            }
            else {
                foreach $i (0 .. $no_of_db) {
                    foreach $field (@{$fields[$i]}) {
                        $parsed_query[$i]->{$field} .= ' ' . $part2[$i]->{$field};
                    }
                }
                $atomic = 0;
            }
            $fieldatomic = 0;
            
            if ($error) {
                return ($atomic, @parsed_query);
            }
        }
    }

    return ($atomic, @parsed_query);
}


## #################################################################
## or_expression
## #################################################################
## Parses a query part according to the 'or_expression' production
## within the SFgate query syntax.
## Assumes that the next token to handle is read already.
## Returns if an error occured or if the query is parsed correctly.
##
## returns:
## - string: parsed query part
## - integer: 1 if the parsed query part should be put in
##            parentheses, 0 else
##
sub or_expression
{
    local(@parsed_query, $atomic);
    local($and_operator);
    my($atomic1, $fieldatomic1);
    
    ($atomic, @parsed_query) = &and_expression;
    if ($error) {
        return ($atomic, @parsed_query);
    }
    
    for (;;) {
        $token = &get_token('');
        
        if ($curr_tok eq 'end') {
            $lookahead = 1;
            last;
        }
        elsif ($curr_tok eq 'and_operator'
               || $curr_tok eq 'not_operator') {
            if ($fieldexpression == 1) {
                $lookahead = 1;
                last;
            }
            
            $and_operator = $token;
            
            $token = &get_token('');
            
            if ($curr_tok eq 'end') {
                # error handling: end of query after operator and/not
                $error = "$pointer: $language{'parse_query1'} `$and_operator'";
                return ($atomic, @parsed_query);
            }

            ($atomic2, @part2) = &and_expression;

            $atomic1 = 1;
            $fieldatomic1 = 0;
            foreach $i (0 .. $no_of_db) {
                foreach $field (@{$fields[$i]}) {

                    if ($part2[$i]->{$field}) {

                        if (!$atomic) {
                            $parsed_query[$i]->{$field}
                            = '(' . $parsed_query[$i]->{$field} . ')';
                        }

                        if (!$atomic2) {
                            $part2[$i]->{$field} =
                                '(' . $part2[$i]->{$field} . ')';
                        }

                        $parsed_query[$i]->{$field} .=
                            ' ' . $and_operator . ' ' . $part2[$i]->{$field};

                        $fieldatomic1 = 0;
                    }
                    else {
                        $atomic1 = $atomic;
                    }
                }
            }
            $fieldatomic = $fieldatomic1;
            $atomic = $atomic1;
            
            if ($error) {
                return ($atomic, @parsed_query);
            }
        }
        else {
            $lookahead = 1;
            last;
        }
    }
    
    return ($atomic, @parsed_query);
}


## #################################################################
## and_expression
## #################################################################
## Parses a query part according to the 'and_expression' production
## within the SFgate query syntax.
## Assumes that the next token to handle is read already.
## Returns if an error occured or if the query is parsed correctly.
##
## returns:
## - string: parsed query part
## - integer: 1 if the parsed query part should be put in
##            parentheses, 0 else
##
sub and_expression
{
    local(@parsed_query, $atomic);

    ## first part: which tokens are not allowed at the beginning
    ##             of an and_expression?

    if ($curr_tok eq ')') {
        # error handling: unexpected closing parenthesis
        $error = "$pointer: $language{'parse_query2'}";
        foreach $i (0 .. $no_of_db) {
            foreach $field (@{$fields[$i]}) {
                $parsed_query[$i]->{$field} = '';
            }
        }
        return (1, @parsed_query);
    }

    if ($curr_tok eq '}') {
        # error handling: unexpected closing bracket
        $error = "$pointer: $language{'parse_query3'}";
        foreach $i (0 .. $no_of_db) {
            foreach $field (@{$fields[$i]}) {
                $parsed_query[$i]->{$field} = '';
            }
        }
        return (1, @parsed_query);
    }
    
    if ($curr_tok eq 'or_operator'
        || $curr_tok eq 'not_operator'
        || $curr_tok eq 'and_operator') {
        # error handling: unexpected operator `$token'
        $error = "$pointer: $language{'parse_query4'} `$token'";
        foreach $i (0 .. $no_of_db) {
            foreach $field (@{$fields[$i]}) {
                $parsed_query[$i]->{$field} = $token;
            }
        }
        return (1, @parsed_query);
    }
    
    if ($curr_tok eq 'num_predicate') {
        # error handling: unexpected numeric predicate `$token'
        $error = "$pointer: $language{'parse_query5'} `$token'";
        foreach $i (0 .. $no_of_db) {
            foreach $field (@{$fields[$i]}) {
                $parsed_query[$i]->{$field} = '';
            }
        }
        return (1, @parsed_query);
    }
    
    if ($curr_tok eq '=') {
        # error handling: unexpected =
        $error = "$pointer: $language{'parse_query6'} =";
        foreach $i (0 .. $no_of_db) {
            foreach $field (@{$fields[$i]}) {
                $parsed_query[$i]->{$field} = '';
            }
        }
        return (1, @parsed_query);
    }

    ## second part: start parsing an and_expression
    
    if ($curr_tok eq '(') {
        # parse query in parentheses
        
        $token = &get_token('');
        if ($curr_tok eq 'end') {
            # error handling: missing query after opening parenthesis
            $error = "$pointer: $language{'parse_query7'}";
            foreach $i (0 .. $no_of_db) {
                foreach $field (@{$fields[$i]}) {
                    $parsed_query[$i]->{$field} = '';
                }
            }
            return (1, @parsed_query);
        }

        $parentheses++;
        $fieldexpression++ if $fieldexpression;
        
        ($atomic, @parsed_query) = &query;
        if ($error) {
            return ($atomic, @parsed_query);
        }

        $token = &get_token('');
        if ($curr_tok ne ')') {
            # error handling: missing closing parenthesis
            $error = "$pointer: $language{'parse_query8'}";
            foreach $i (0 .. $no_of_db) {
                foreach $field (@{$fields[$i]}) {
                    $parsed_query[$i]->{$field} = '(' . $parsed_query[$i]->{$field};
                }
            }
            return (1, @parsed_query);
        }
        
        $fieldexpression-- if $fieldexpression > 1;
        $parentheses--;
    }
    elsif ($curr_tok eq "'") {
        # parse s_literal_expression
        
        $s_literal_expression = &get_token('s_literal_expression');

        $token = &get_token('');
        if ($curr_tok eq 'end') {
            # error handling: no literal search delimiter `''
            $error = "$pointer: $language{'parse_query9'} `''";
            foreach $i (0 .. $no_of_db) {
                foreach $field (@{$fields[$i]}) {
                    $parsed_query[$i]->{$field} = "'" . $s_literal_expression;
                }
            }
            return (1, @parsed_query);
        }
        
        foreach $i (0 .. $no_of_db) {
            foreach $field (@{$fields[$i]}) {
                $parsed_query[$i]->{$field} = "'" . $s_literal_expression . "'";
                # compute query conditions for highlighting
                my $lit_part;
                foreach $lit_part (split(' ', $s_literal_expression)) {
                    push(@{$conditions[$i]}, [$field, '=', $lit_part]);
                } 
            }
        }
        $atomic = 1;
    }
    elsif ($curr_tok eq '"') {
        # parse d_literal_expression
        
        $d_literal_expression = &get_token('d_literal_expression');

        $token = &get_token('');
        if ($curr_tok eq 'end') {
            # error handling: no literal search delimiter `"'
            $error = "$pointer: $language{'parse_query9'} `\"'";
            foreach $i (0 .. $no_of_db) {
                foreach $field (@{$fields[$i]}) {
                    $parsed_query[$i]->{$field} = '"' . $d_literal_expression;
                }
            }
            return (1, @parsed_query);
        }

        foreach $i (0 .. $no_of_db) {
            foreach $field (@{$fields[$i]}) {
                $parsed_query[$i]->{$field} = '"' . $d_literal_expression . '"';
                # compute query conditions for highlighting
                my $lit_part;
                foreach $lit_part (split(' ', $d_literal_expression)) {
                    push(@{$conditions[$i]}, [$field, '=', $lit_part]);
                } 
            }
        }
        $atomic = 1;
    }
    elsif ($curr_tok eq 'atleast_operator') {
        $token = &get_token('');
        if ($curr_tok ne 'num1') {
            # error handling: atleast operator without numeric argument
            $error = "$pointer: $language{'parse_query10'}";
            foreach $i (0 .. $no_of_db) {
                foreach $field (@{$fields[$i]}) {
                    $parsed_query[$i]->{$field} = 'atleast/';
                }
            }
            return (1, @parsed_query);
        }

        $num1 = $token;

        $token = &get_token('');
        if ($curr_tok ne 'term'
            && $curr_tok ne 'num_term'
            && $curr_tok ne 'num1') {
            # error handling: atleast operator without term
            $error = "$pointer: $language{'parse_query11'}";
            foreach $i (0 .. $no_of_db) {
                foreach $field (@{$fields[$i]}) {
                    $parsed_query[$i]->{$field} = 'atleast/' . $num1;
                }
            }
            return (1, @parsed_query);
        }

        foreach $i (0 .. $no_of_db) {
            foreach $field (@{$fields[$i]}) {
                $parsed_query[$i]->{$field} = "(atleast/$num1 $token)";
                # compute query conditions for highlighting
                push(@{$conditions[$i]}, [$field, '=', $token]);
            }
        }
        $fieldatomic = 1;
        $atomic = 1;
    }
    elsif ($curr_tok eq 'indextype') {
        # look ahead: what comes next?
        $indextype = $token;

        $token = &get_token('');
        
        if ($curr_tok eq 'term'
            || $curr_tok eq 'num_term'
            || $curr_tok eq 'num1') {
            foreach $i (0 .. $no_of_db-1) {
                foreach $field (@{$fields[$i]}) {
                    if ($indextype eq 'text'
                        || !$databases[$i]->test_type($field, $indextype)) {
                        $parsed_query[$i]->{$field} = $token;
                        # compute query conditions for highlighting
                        push(@{$conditions[$i]}, [$field, '=', $token]);
                    }
                    elsif ($indextype eq 'wildcard') {
                        $parsed_query[$i]->{$field} = $token . '*';
                        push(@{$conditions[$i]}, [$field, '*', $token]);
                    }
                    else {
                        $parsed_query[$i]->{$field} = $indextype . ' ' . $token;
                        # compute query conditions for highlighting
                        push(@{$conditions[$i]}, [$field, $indextype, $token]);
                    }
                }
            }
            foreach $field (@{$fields[$no_of_db]}) {
                if ($indextype eq 'text') {
                    $parsed_query[$no_of_db]->{$field} = $token;
                    # compute query conditions for highlighting
                    push(@{$conditions[$no_of_db]}, [$field, '=', $token]);
                }
                elsif ($indextype eq 'wildcard') {
                    $parsed_query[$no_of_db]->{$field} = $token . '*';
                    push(@{$conditions[$no_of_db]}, [$field, '*', $token]);
                }
                else {
                    $parsed_query[$no_of_db]->{$field} = $indextype . ' ' . $token;
                    # compute query conditions for highlighting
                    push(@{$conditions[$no_of_db]}, [$field, $indextype, $token]);
                }
            }

            $atomic = 1;
        }
        elsif ($curr_tok eq ',' || $curr_tok eq '{') {
            if (@curr_indextypes) {
                # error handling: nested indextypelists not possible
                $error = "$pointer: $language{'parse_query12'}";
                foreach $i (0 .. $no_of_db) {
                    foreach $field (@{$fields[$i]}) {
                        $parsed_query[$i]->{$field} = '';
                    }
                }
                return (1, @parsed_query);
            }

            push(@curr_indextypes, $indextype);
                
            if ($curr_tok eq ',') {
                $token = &get_token('');
                while ($curr_tok eq 'indextype') {
                    push(@curr_indextypes, $token);
                    $token = &get_token('');
                    last if $curr_tok ne ',';
                    $token = &get_token('');
                }
                
                if ($curr_tok ne '{') {
                    # error handling: missing opening bracket after indextypelist
                    $error = "$pointer: $language{'parse_query13'}";
                    foreach $i (0 .. $no_of_db) {
                        foreach $field (@{$fields[$i]}) {
                            $parsed_query[$i]->{$field} = '';
                        }
                    }
                    return (1, @parsed_query);
                }
            }

            # go on parsing, but use @curr_indextypes
            $token = &get_token('');
            if ($curr_tok eq 'end') {
                # error handling: missing query after indextypelist
                $error = "$pointer: $language{'parse_query14'}";
                foreach $i (0 .. $no_of_db) {
                    foreach $field (@{$fields[$i]}) {
                        $parsed_query[$i]->{$field} = '';
                    }
                }
                return (1, @parsed_query);
            }

            ($atomic, @parsed_query) = &query;
            if ($error) {
                return ($atomic, @parsed_query);
            }
            
            $token = &get_token('');
            if ($curr_tok ne '}') {
                # error handling: missing closing bracket after indextypelist
                $error = "$pointer: $language{'parse_query15'}";
                return (1, @parsed_query);
            }
            @curr_indextypes = ();
        }
        else {
            # error handling: no term with indextype`$indextype'
            $error = "$pointer: $language{'parse_query16'} `$indextype'";
            foreach $i (0 .. $no_of_db) {
                foreach $field (@{$fields[$i]}) {
                    $parsed_query[$i]->{$field} = $indextype;
                }
            }
            return (1, @parsed_query);
        }
    }
    elsif ($curr_tok eq 'term'
           || $curr_tok eq 'num_term'
           || $curr_tok eq 'num1') {
        # look ahead: what comes next?
        $term = $token;
        # downcase: tr/A-Z/a-z/;
        $term =~ tr/A-Z\304\326\334/a-z\344\366\374/;
        
        $token = &get_token('');
        
        if ($curr_tok eq 'proximity_operator') {
            $proximity_operator = $token;
            
            $token = &get_token('');
            if ($curr_tok ne 'num1') {
                # error handling: proximity operator `$proximity_operator' without
                #                 numeric argument greater than zero
                $error = "$pointer: $language{'parse_query17'} " .
                    "`$proximity_operator' $language{'parse_query18'}";
                foreach $i (0 .. $no_of_db) {
                    foreach $field (@{$fields[$i]}) {
                        $parsed_query[$i]->{$field} =
                            $term . ' ' . $proximity_operator . $token;
                    }
                }
                return (1, @parsed_query);
            }
            
            $num1 = $token;
            
            $token = &get_token('');
            if ($curr_tok ne 'term'
                && $curr_tok ne 'num_term'
                && $curr_tok ne 'num1') {
                # error handling: proximity operator `$proximity_operator$num1'
                #                 without 2nd term
                $error = "$pointer: $language{'parse_query17'} " .
                    "`$proximity_operator$num1' $language{'parse_query19'}";
                foreach $i (0 .. $no_of_db) {
                    foreach $field (@{$fields[$i]}) {
                        $parsed_query[$i]->{$field} =
                            $term . ' ' . $proximity_operator . $num1 . $token;
                    }
                }
                return (1, @parsed_query);
            }

            foreach $i (0 .. $no_of_db) {
                foreach $field (@{$fields[$i]}) {
                    $parsed_query[$i]->{$field} =
                        "($term $proximity_operator$num1 $token)";
                    # compute query conditions for highlighting
                    push(@{$conditions[$no_of_db]}, [$field, '=', $term]);
                    push(@{$conditions[$no_of_db]}, [$field, '=', $token]);
                }
            }
            $atomic = 1;
            $fieldatomic = 1;
        }
        else {
            $lookahead = 1;
            # ',' not in context with indextypes: interpret it as or
            $lookahead = 0 if $curr_tok eq ',';
            if (@curr_indextypes) {
                foreach $i (0 .. $no_of_db) {
                    foreach $field (@{$fields[$i]}) {
                        $parsed_query[$i]->{$field} = '';
                        $type_text = 0;
                        foreach $itype (@curr_indextypes) {
                            if ($itype eq 'text' && !$type_text) {
                                $parsed_query[$i]->{$field} .=
                                    $term . ' ';
                                $type_text = 1;
                                # compute query conditions for highlighting
                                push(@{$conditions[$i]}, [$field, '=', $term]);
                            }
                            elsif ($itype eq 'wildcard') {
                                $parsed_query[$i]->{$field} .=
                                    $term . '* ';
                                # compute query conditions for highlighting
                                push(@{$conditions[$i]}, [$field, '*', $term]);
                            }
                            else {
                                if ($i == $no_of_db
                                    || $databases[$i]->test_type($field, $itype)) {
                                    # do not call test_type for the default query
                                    $parsed_query[$i]->{$field} .=
                                        $itype . ' ' . $term . ' ';
                                    # compute query conditions for highlighting
                                    push(@{$conditions[$i]}, [$field, $itype, $term]);
                                }
                                elsif (!$type_text) {
                                    $parsed_query[$i]->{$field} .=
                                        $term . ' ';
                                    $type_text = 1;
                                    # compute query conditions for highlighting
                                    push(@{$conditions[$i]}, [$field, '=', $term]);
                                }
                            }
                        }
                        $parsed_query[$i]->{$field} =~ s/ $//;
                    }
                }

                $atomic = 1;
                $atomic = 0 if @curr_indextypes >= 2;
            }
            else {
                foreach $i (0 .. $no_of_db) {
                    foreach $field (@{$fields[$i]}) {
                        $parsed_query[$i]->{$field} = $term;
                        # compute query conditions for highlighting
                        push(@{$conditions[$i]}, [$field, '=', $term]);
                    }
                }
                $atomic = 1;
            }
        }
    }
    elsif ($curr_tok eq 'wildcard_term') {
        # downcase: tr/A-Z/a-z/;
        $token =~ tr/A-Z\304\326\334/a-z\344\366\374/;
        foreach $i (0 .. $no_of_db) {
            foreach $field (@{$fields[$i]}) {
                $parsed_query[$i]->{$field} = $token;
                # compute query conditions for highlighting
                my $cond_token = $token;
                $cond_token =~ s/\*$//;
                push(@{$conditions[$i]}, [$field, '*', $cond_token]);
            }
        }
        $atomic = 1;
    }
    elsif ($curr_tok eq 'field') {
        
        if ($fieldexpression) {
            # error handling: nested fieldexpressions not possible
            $error = "$pointer: $language{'parse_query20'}";
            foreach $i (0 .. $no_of_db) {
                foreach $field (@{$fields[$i]}) {
                    $parsed_query[$i]->{$field} = '';
                }
            }
            return (1, @parsed_query);
        }
        
        $field = $token;
        # get corresponding attributes within the various databases
        foreach $i (0 .. $no_of_db-1) {
            $fields[$i] = $databases[$i]->match_attribute($field, $form, $lattice);
        }
        $fields[$no_of_db] = [$field];
        
        $token = &get_token('');

        if ($curr_tok eq 'num_predicate') {
            $num_predicate = $token;

            $token = &get_token('');
            
            if ($curr_tok ne 'num_term'
                && $curr_tok ne 'num1') {
                # error handling: non numeric argument with numeric
                #                 predicate `$num_predicate'
                $error = "$pointer: $language{'parse_query21'} `$num_predicate'";
                $atomic = 1;
                foreach $i (0 .. $no_of_db) {
                    $parsed_query[$i]->{$global_field} = '';
                    foreach $field (@{$fields[$i]}) {
                        if ($field ne $global_field
                            && ($i == $no_of_db
                                || $databases[$i]->test_type($field, 'numeric'))) {
                            $parsed_query[$i]->{$global_field} .=
                                $field . $num_predicate . $token . ' ';
                        }
                    }
                    $fields[$i] = [$global_field];
                    $parsed_query[$i]->{$global_field} =~ s/ $//;
                    $atomic = 0 if $parsed_query[$i]->{$global_field} =~ / /;
                }
                return ($atomic, @parsed_query);
            }

            if ($num_predicate eq '<'
                || $num_predicate eq '>'
                || $num_predicate eq '==') {
                $atomic = 1;
                my($neg_token) = $token;
                $neg_token = " $token" if $num_predicate eq '<' && $token < 0;
                foreach $i (0 .. $no_of_db) {
                    $parsed_query[$i]->{$global_field} = '';
                    foreach $field (@{$fields[$i]}) {
                        $atomic = 0 if $parsed_query[$i]->{$global_field};
                        if ($field ne $global_field
                            && ($i == $no_of_db
                                || $databases[$i]->test_type($field, 'numeric'))) {
                            $parsed_query[$i]->{$global_field} .=
                                $field . $num_predicate . $neg_token . ' ';
                            # compute query conditions for highlighting
                            push(@{$conditions[$i]}, [$field, $num_predicate, $token]);
                        }
                        elsif ($num_predicate eq '==') {
                            if ($field eq $global_field) {
                                $parsed_query[$i]->{$global_field} .= $token . ' ';
                                # compute query conditions for highlighting
                                push(@{$conditions[$i]}, ['text', '=', $token]);
                            }
                            else {
                                $parsed_query[$i]->{$global_field} .=
                                    $field . '=' . $token . ' ';
                                # compute query conditions for highlighting
                                push(@{$conditions[$i]}, [$field, '=', $token]);
                            }
                        }
                    }
                    $fields[$i] = [$global_field];
                    $parsed_query[$i]->{$global_field} =~ s/ $//;
                }
            }
            elsif ($num_predicate eq '<=') {
                my($neg_token) = $token;
                $neg_token = " $token" if $token < 0;
                foreach $i (0 .. $no_of_db) {
                    $parsed_query[$i]->{$global_field} = '';
                    foreach $field (@{$fields[$i]}) {
                        if ($i == $no_of_db
                            || $databases[$i]->test_type($field, 'numeric')) {
                            $parsed_query[$i]->{$global_field} .=
                                $field . '<' . $neg_token . ' ' . $field . '==' . $token . ' ';
                            # compute query conditions for highlighting
                            push(@{$conditions[$i]}, [$field, '<=', $token]);
                        }
                    }
                    $fields[$i] = [$global_field];
                    $parsed_query[$i]->{$global_field} =~ s/ $//;
                }
                $atomic = 0;
            }
            elsif ($num_predicate eq '>=') {
                foreach $i (0 .. $no_of_db) {
                    $parsed_query[$i]->{$global_field} = '';
                    foreach $field (@{$fields[$i]}) {
                        if ($i == $no_of_db
                            || $databases[$i]->test_type($field, 'numeric')) {
                            $parsed_query[$i]->{$global_field} .=
                                $field . '>' . $token . ' ' . $field . '==' . $token . ' ';
                            # compute query conditions for highlighting
                            push(@{$conditions[$i]}, [$field, '>=', $token]);
                        }
                    }
                    $fields[$i] = [$global_field];
                    $parsed_query[$i]->{$global_field} =~ s/ $//;
                }
                $atomic = 0;
            }
        }
        else {
            $fieldexpression = 1;
            $fieldatomic = 1;
            
            $token = &get_token('');
            if ($curr_tok eq 'end') {
                # error handling: missing query in fieldexpression
                $error = "$pointer: $language{'parse_query22'}";
                $atomic = 1;
                foreach $i (0 .. $no_of_db) {
                    $parsed_query[$i]->{$global_field} = '';
                    foreach $field (@{$fields[$i]}) {
                        $parsed_query[$i]->{$global_field} .= $field . '= '
                            if $field ne $global_field;
                    }
                    $fields[$i] = [$global_field];
                    $parsed_query[$i]->{$global_field} =~ s/ $//;
                    $atomic = 0 if $parsed_query[$i]->{$global_field} =~ / /;
                }
                return ($atomic, @parsed_query);
            }

            ($atomic, @fieldexpression_query) = &query;

            $atomic2 = 1;
            foreach $i (0 .. $no_of_db) {
                $parsed_query[$i]->{$global_field} = '';
                foreach $field (@{$fields[$i]}) {
                    $atomic2 = 0 if $parsed_query[$i]->{$global_field};
                    if ($fieldexpression_query[$i]->{$field}) {
                        if ($field eq $global_field) {
                            if ($atomic) {
                                $parsed_query[$i]->{$global_field} .=
                                    $fieldexpression_query[$i]->{$field} . ' ';
                            }
                            else {
                                $parsed_query[$i]->{$global_field} .=
                                    '(' . $fieldexpression_query[$i]->{$field} . ') ';
                            }
                        }
                        elsif ($atomic && $fieldatomic) {
                            $parsed_query[$i]->{$global_field} .=
                                $field . '=' . $fieldexpression_query[$i]->{$field} . ' ';
                        }
                        else {
                            $parsed_query[$i]->{$global_field} .=
                                $field . '=(' . $fieldexpression_query[$i]->{$field} . ') ';
                        }
                    }
                }
                $fields[$i] = [$global_field];
                $parsed_query[$i]->{$global_field} =~ s/ $//;
            }
            $fieldexpression = 0;
            $atomic = $atomic2;

            if ($error) {
                return ($atomic, @parsed_query);
            }
        }
    }

    return ($atomic, @parsed_query);
}


## #################################################################
## get_token($type)
## #################################################################
## Returns next token, sets global var $curr_tok. $pointer points
## to the next char to read in global @query
##
## (string) $type: set to type of token to return (empty if no
##                 special token wished)
##
## returns:
## - string: next token
##
sub get_token
{
    local($type) = @_;
    local($char);

    if ($type) {
        # the caller wants a special token
        $curr_tok = $type;

        if ($type eq 's_literal_expression') {
            $token = '';
            while ($pointer <= $#query
                   && ($char = $query[$pointer++]) ne "'") {
                $token .= $char;
            }
            $pointer-- if $char eq "'";
        }
        elsif ($type eq 'd_literal_expression') {
            $token = '';
            while ($pointer <= $#query
                   && ($char = $query[$pointer++]) ne '"') {
                $token .= $char;
            }
            $pointer-- if $char eq '"';
        }
        
        return $token;
    }
    
    if ($lookahead) {
        # use last token read 
        $lookahead = 0;

        return $token;
    }
    
    while ($pointer <= $#query
           && ($char = $query[$pointer++]) =~ /\s/) {
        # discard whitespace
    }

    if ($pointer > $#query && !$char) {
        # end of query (after discarding whitespace)
        $curr_tok = 'end';

        return '';
    }

    $single = '[\'"(){},]';
    if ($char =~ /$single/) {
        # single character token
        $curr_tok = $char;
        $token = $char;
    }
    elsif ($char eq '=' && $query[$pointer] eq '=') {
        # num_predicate '=='
        $pointer++;
        $curr_tok = 'num_predicate';
        $token = '==';
    }
    elsif ($char =~ /[<>]/) {
        # num_predicate '>', '<', '>=', '<='
        $token = $char;
        if ($query[$pointer] eq '=') {
            $pointer++;
            $token .= '=';
        }
        $curr_tok = 'num_predicate';
    }
    elsif ($char eq '=') {
        # single character token '='
        $curr_tok = $char;
        $token = $char;
    }
    elsif (&look_forward(substr(join('', @query), $pointer-1)) eq 'num_term') {
        # numerals

        $token = $char;
        while (($char = $query[$pointer++]) =~ /[0-9]/) {
            $token .= $char;
        }
        $pointer--;

        if ($char =~ /[,.]/
            && $query[$pointer+1] =~ /[0-9]/) {
            # num_term with decimalpoint
            $curr_tok = 'num_term';
            $pointer++;
            $token .= '.';
            while (($char = $query[$pointer++]) =~ /[0-9]/) {
                $token .= $char;
            }
            $pointer--;
        }

        if ($token == int($token) && $token > 0) {
            # num1: integer greater than one
            $curr_tok = 'num1';
        }
        else {
            # numterm (maybe zero or negative)
            $curr_tok = 'num_term';
        }
    }
    elsif ($char =~ /[^\"\'*<>=(){}\/\s,]/) {
        # parsing literals
        $token = $char;
        while (($char = $query[$pointer++]) =~ /[^\"\'*<>=(){}\/\s,]/) {
            $token .= $char;
        }
        $pointer--;

        if ($char eq '*') {
            $curr_tok = 'wildcard_term';
            $token .= '*';
            $pointer++;
        }
        elsif ($char eq '/') {
            if ($token =~ /pre/i || $token =~ /w/i) {
                $curr_tok = 'proximity_operator';
                $token .= '/';
                $pointer++;
            }
            elsif ($token eq 'atleast') {
                $curr_tok = 'atleast_operator';
                $token .= $char;
                $pointer++;
            }
        }
        elsif ($char eq ',') {
            $tmp = $token;
            # downcase: tr/A-Z/a-z/;
            $tmp =~ tr/A-Z\304\326\334/a-z\344\366\374/;
            if ($tmp =~ /^($plaintext_regexp)$/) {
                $curr_tok = 'indextype';
                $token = 'text';
            }
            elsif ($tmp =~ /^($soundex_regexp)$/) {
                $curr_tok = 'indextype';
                $token = 'soundex';
            }
            elsif ($tmp =~ /^($phonix_regexp)$/) {
                $curr_tok = 'indextype';
                $token = 'phonix';
            }
            elsif ($tmp =~ /^($wildcard_regexp)$/) {
                $curr_tok = 'indextype';
                $token = 'wildcard';
            }
        }
        elsif ($char =~ /[=<>]/) {
            $curr_tok = 'field';
        }
        else {
            $tmp = $token;
            # downcase: tr/A-Z/a-z/;
            $tmp =~ tr/A-Z\304\326\334/a-z\344\366\374/;
            if ($tmp =~ /^($plaintext_regexp)$/ && $char =~ /[,{]/) {
                $curr_tok = 'indextype';
                $token = 'text';
            }
            elsif ($tmp =~ /^($soundex_regexp)$/) {
                $curr_tok = 'indextype';
                $token = 'soundex';
            }
            elsif ($tmp =~ /^($phonix_regexp)$/) {
                $curr_tok = 'indextype';
                $token = 'phonix';
            }
            elsif ($tmp =~ /^($wildcard_regexp)$/) {
                $curr_tok = 'indextype';
                $token = 'wildcard';
            }
            elsif ($tmp =~ /^($and_regexp)$/i) {
                $curr_tok = 'and_operator';
                $token = 'and';
            }
            elsif ($tmp =~ /^($or_regexp)$/i) {
                $curr_tok = 'or_operator';
                $token = 'or';
            }
            elsif ($tmp =~ /^($not_regexp)$/) {
                $curr_tok = 'not_operator';
                $token = 'not';
            }
            else {
                $curr_tok = 'term';
            }
        }
    }

    return $token;
}


sub look_forward
{
    local($_) = @_;

    if (/^-?[0-9]+([.,][0-9]+)?[\s()]/
        || /^-?[0-9]+([.,][0-9]+)?$/) {
        return 'num_term';
    }
}


1;


