/**************************************************************************
 *
 * QueryParser.cpp -- Query parser for a simple query language
 * Copyright (C) 2000  Rodger McNab
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 * $Id: GSDLQueryParser.cpp,v 1.3 2000/12/07 22:36:32 kjm18 Exp $
 *
 **************************************************************************/

#include "GSDLQueryParser.h"
#include "GSDLQueryLex.h"


static QueryNode *ParseExpression (UCArray::const_iterator &here,
				   UCArray::const_iterator end, 
				   int defaultBoolCOmbine,
				   int defaultStemMethod);

static QueryNode *AndAdd (QueryNode *t1, QueryNode *t2) {
  if (t1 == NULL) return t2;
  if (t2 == NULL) return t1;

  AndQueryNode *andNode = new AndQueryNode;
  andNode->leftNode = t1;
  andNode->rightNode = t2;
  return andNode;
}

static QueryNode *OrAdd (QueryNode *t1, QueryNode *t2) {
  if (t1 == NULL) return t2;
  if (t2 == NULL) return t1;

  OrQueryNode *orNode = new OrQueryNode;
  orNode->leftNode = t1;
  orNode->rightNode = t2;
  return orNode;
}

static QueryNode *NotAdd (QueryNode *t1, QueryNode *t2) {
  if (t1 == NULL) return t2;
  if (t2 == NULL) return t1;

  NotQueryNode *notNode = new NotQueryNode;
  notNode->queryNode = t1;
  notNode->notNode = t2;
  return notNode;
}

// expects the opening bracket to have already been parsed
// and discarded
static QueryNode *ParseBracketExpression (UCArray::const_iterator &here,
					  UCArray::const_iterator end, 
					  int defaultBoolCombine,
					  int defaultStemMethod) {
  // get everything in the expression
  QueryNode *curTree = ParseExpression (here, end, defaultBoolCombine,
					defaultStemMethod);
  
  // gobble up tokens until a closing bracket is found
  // or the end of the string
  LexEl el;
  while (ParseLexEl (here, end, el)) {
    if (el.lexType == CloseBracketE) break;
  }

  return curTree;
}

static int ParseInt (UCArray::const_iterator &here,
		     UCArray::const_iterator end) {
  LexEl el;
  UCArray::const_iterator oldHere = here;
  if (ParseLexEl (here, end, el) && el.lexType == IntegerE)
    return el.num;
  
  here = oldHere; // not an integer
  return 0;
}

// default is within 20 words
static void SetRangeValues (TermNode &termNode,
			    UCArray &near) {
  UCArray NEAR; SetCStr(NEAR, "NEAR");
  if (near == NEAR) { // no modifier
    termNode.startRange = -21;
    termNode.endRange = 20;
  }
  else { // extract number
    UCArray::const_iterator here = near.begin()+4;
    UCArray::const_iterator end = near.end();
    int size=0;
    while (here != end) {
      size = size*10 + (*here-'0');
      here++;
    }
    cerr <<"size= "<<size<<endl;
    termNode.startRange = -1 * (size+1);
    termNode.endRange = size;
  }


}
static unsigned long GetStemMethod(LexEl &el, int defaultStemMethod) {
  // here expect el to contain some of c,s,i,u
  // stem method 0 = c,u  00
  // stem method 1 = i,u  01  - default for DL
  // stem method 2 = c, s  10
  // stem method 3 = i,s  11

  unsigned long stem = (unsigned long)defaultStemMethod;
      
  UCArray::const_iterator here = el.text.begin();
  UCArray::const_iterator end = el.text.end();

  unsigned char c1 = *here; 
  if (!(c1 == 'c'| c1 == 'i' | c1 == 'u' | c1 == 's')) 
    return 4; // incorrect format
  
  here++;
  unsigned char c2 = 'a';
  if (here !=end) {
    c2 = *here;
    if (!(c2 == 'c'| c2 == 'i' | c2 == 'u' | c2 == 's')) 
      return 4; // incorrect format
  }
  
  if (c1 == 'i'|| c2=='i') stem |= 1; // set bit 0 to 1
  if (c1 == 'c' || c2 == 'c') stem &=0xe; //set bit 0 to 0
  if (c1 == 's'|| c2 == 's') stem |= 2; // set bit 1 to 1
  if (c1 == 'u' || c2 =='u') stem &=0xd; // set bit 1 to 0
  cerr << "stem is "<<stem<<endl;
  return stem;
}
  

static void ParseTermModifiers (UCArray::const_iterator &here,
				UCArray::const_iterator end,
				TermNode &termNode,
				int defaultStemMethod) {
 
  termNode.stemMethod = defaultStemMethod;

  LexEl el;
  UCArray::const_iterator oldHere = here;
  while (ParseLexEl (here, end, el)) {
    if (el.lexType == TermWeightE) {
      termNode.termWeight = ParseInt (here, end);
      
    } else if (el.lexType == StemMethodE) {
      oldHere = here;
      LexEl stem;
      if (ParseLexEl (here, end, stem) && stem.lexType == TermE) {
	termNode.stemMethod = GetStemMethod(stem, defaultStemMethod);
	if (termNode.stemMethod == 4) { // error so backtrack
	  here = oldHere;
	  termNode.stemMethod = (unsigned long)defaultStemMethod;
	} 
      }else here = oldHere; //ignore - wrong syntax

    } else if (el.lexType == RangeE) {
      termNode.startRange = ParseInt (here, end);
      termNode.endRange = ParseInt (here, end);
      
    } else if (el.lexType == AtE) {
      termNode.startRange = termNode.endRange = ParseInt (here, end);
      
    } else {
      // no term modifiers
      here = oldHere;
      break;
    }
    
    oldHere = here;
  }  
}

static void ParseProxModifiers (UCArray::const_iterator &here,
				UCArray::const_iterator end,
				ProxMatchQueryNode *proxNode) {
  // so far only have one - the tag stuff
  LexEl el;
  UCArray::const_iterator oldHere = here;
  while (ParseLexEl (here, end, el)) {
    if (el.lexType == TagE) {
      oldHere = here; // don't backtrack past here
      if (ParseLexEl (here, end, el) && el.lexType == TermE) {
	proxNode->tagNodePtr = new TagNode;
	proxNode->tagNodePtr->tagName = el.text;
	
      }
      else { // error in tag
	here = oldHere;
      }
    } // TagE
    // add in other cases here
    else {
      // no modifiers
      here = oldHere;
      break;
    }
    oldHere = here;
  }//while


}

// expects starting brackets to have been parsed
static void ParseSquareBrackets(UCArray::const_iterator &here,
				UCArray::const_iterator end,
				ProxMatchQueryNode *proxNode,
				int defaultStemMethod) {

  LexEl el;
  while (ParseLexEl (here, end, el)) {
    if (el.lexType == TermE || el.lexType == IntegerE) {
      TermNode termNode;
      termNode.term = el.text;
      ParseTermModifiers (here, end, termNode, defaultStemMethod);
      proxNode->terms.push_back(termNode);
    }
    else if (el.lexType == CloseSquareBracketE) {
      break;
    }
    else if (el.lexType == AndOpE) {
      // ignore, the words are AND'ed anyway
      cerr << "and inside []\n";
    }
    else if (el.lexType == OrOpE) {
      cerr << "or inside []\n";
    }
    else {
      //error
      // just ignore for now
      cerr <<"bad syntax inside []\n";
    }
  } // while

}
// expects the starting quote to have been parsed
// and discarded
// is now for exact phrases
static void ParsePhrase (UCArray::const_iterator &here,
			 UCArray::const_iterator end,
			 ProxMatchQueryNode &proxNode) {
  LexEl el;
  bool first = true;
  while (ParseLexEl (here, end, el)) {
    if (el.lexType == TermE || el.lexType == IntegerE) {
      TermNode termNode;
      termNode.term = el.text;
      //ParseTermModifiers (here, end, termNode);
      if (first) {
	first = false;
      }
      else {
	termNode.startRange = -2;
	termNode.endRange = -1;
      }
      proxNode.terms.push_back (termNode);
      
    } else if (el.lexType == QuoteE) {
      break;
      
    } else {
      // error
      break;
    }
  }
}

static QueryNode *ParseTerm (UCArray::const_iterator &here,
			     UCArray::const_iterator end,
			     int defaultBoolCombine,
			     int defaultStemMethod) {
  LexEl el;

  UCArray::const_iterator oldHere = here;
  if (!ParseLexEl (here, end, el)) return NULL;

  if (el.lexType == OpenBracketE)
    return ParseBracketExpression (here, end, defaultBoolCombine,
				   defaultStemMethod);

  ProxMatchQueryNode *proxNode = new ProxMatchQueryNode;

  if (el.lexType == TermE || el.lexType == IntegerE) {
    TermNode termNode;
    termNode.term = el.text;
    ParseTermModifiers (here, end, termNode, defaultStemMethod);
    oldHere = here;  // dont backtrack past here
    if (ParseLexEl(here, end, el) && el.lexType == NearOpE) {
      delete proxNode;
      proxNode = (ProxMatchQueryNode *)ParseTerm(here, end, defaultBoolCombine,
						 defaultStemMethod);
      SetRangeValues(termNode, el.text);
      proxNode->terms.push_back (termNode);
      return proxNode;
    }
    else {
      here = oldHere; // backtrack
      proxNode->terms.push_back (termNode);
      ParseProxModifiers(here, end, proxNode);
      return proxNode;
    }
  } else if (el.lexType == QuoteE) {
    ParsePhrase (here, end, *proxNode);
    return proxNode;
  }
  else if (el.lexType == OpenSquareBracketE) {
    ParseSquareBrackets (here, end, proxNode, defaultStemMethod);
    ParseProxModifiers (here, end, proxNode);
    return proxNode;
  }

  // not a term
  here = oldHere;
  delete proxNode;
  return NULL;
}


static QueryNode *ParseExpression (UCArray::const_iterator &here,
				   UCArray::const_iterator end,
				   int defaultBoolCombine,
				   int defaultStemMethod) {
  LexEl el;
  QueryNode *curTree = NULL;

  UCArray::const_iterator oldHere = here;
  while (ParseLexEl (here, end, el)) {
    if (el.lexType == OpenSquareBracketE ||
	el.lexType == OpenBracketE ||
	el.lexType == TermE ||
	el.lexType == QuoteE ||
	el.lexType == IntegerE ) {
	//	el.lexType == TagE) { //tag at end of term now
      // some type of term, back track and parse it
      here = oldHere;
      // if default==1, AND, else if==0, OR
      if (defaultBoolCombine) {
	curTree = AndAdd (curTree, ParseTerm (here, end, defaultBoolCombine, 
					      defaultStemMethod));
      }
      else {
	curTree = OrAdd (curTree, ParseTerm (here, end, defaultBoolCombine,
					     defaultStemMethod));
      }

    } else if (el.lexType == AndOpE) {
      curTree = AndAdd (curTree, ParseTerm (here, end, defaultBoolCombine,
					    defaultStemMethod));
      
    } else if (el.lexType == OrOpE) {
      curTree = OrAdd (curTree, ParseTerm (here, end, defaultBoolCombine,
					   defaultStemMethod));
      
    } else if (el.lexType == NotOpE) {
      curTree = NotAdd (curTree, ParseTerm (here, end, defaultBoolCombine,
					    defaultStemMethod));
      
    } else if (el.lexType == CloseBracketE) {
      // parsebracketexpression is waiting for the last bracket, so put it back
      here = oldHere;
      break;
    
    } else break;
    
    oldHere = here;
  }

  return curTree;
}

QueryNode *ParseQuery (const UCArray &queryStr, int defaultBoolCombine,
		       int defaultStemMethod) {
  UCArray::const_iterator here = queryStr.begin();
  UCArray::const_iterator end = queryStr.end();
  return ParseExpression (here, end, defaultBoolCombine, defaultStemMethod);
}
