/**********************************************************************
 *
 * querytools.cpp -- 
 * Copyright (C) 1999  The New Zealand Digital Library Project
 *
 * A component of the Greenstone digital library software
 * from the New Zealand Digital Library Project at the
 * University of Waikato, New Zealand.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 *********************************************************************/

#include "querytools.h"
#include <ctype.h>
#include "unitool.h" // for is_unicode_letdig

// request.filterResultOptions and request.fields (if required) should
// be set from the calling code
void set_queryfilter_options (FilterRequest_t &request, const text_t &querystring,
			      cgiargsclass &args) {

  request.filterName = "QueryFilter";

  OptionValue_t option;

  option.name = "Term";
  option.value = querystring;
  request.filterOptions.push_back (option);

  option.name = "QueryType";
  option.value = (args.getintarg("t")) ? "ranked" : "boolean";
  request.filterOptions.push_back (option);

  option.name = "MatchMode";
  option.value = (args.getintarg("t")) ? "some" : "all";
  request.filterOptions.push_back (option);

  option.name = "Casefold";
  option.value = (args.getintarg("k")) ? "true" : "false";
  request.filterOptions.push_back (option);

  option.name = "Stem";
  option.value = (args.getintarg("s")) ? "true" : "false";
  request.filterOptions.push_back (option);

  if (!args["h"].empty()) {
    option.name = "Index";
    option.value = args["h"];
    request.filterOptions.push_back (option);
  }

  if (!args["j"].empty()) {
    option.name = "Subcollection";
    option.value = args["j"];
    request.filterOptions.push_back (option);
  }

  if (!args["n"].empty()) {
    option.name = "Language";
    option.value = args["n"];
    request.filterOptions.push_back (option);
  }
  
  if (!args["g"].empty()) { // granularity for mgpp
    option.name = "Level";
    option.value = args["g"];
    request.filterOptions.push_back (option);
  }

  set_more_queryfilter_options (request, args);
}

void set_queryfilter_options (FilterRequest_t &request, const text_t &querystring1,
			      const text_t &querystring2, cgiargsclass &args) {

  set_queryfilter_options (request, querystring1, args);

  // fill in the second query if needed
  if (!args["cq2"].empty()) {
    OptionValue_t option;

    option.name = "CombineQuery";
    option.value = args["cq2"];
    request.filterOptions.push_back (option);
    
    option.name = "Term";
    option.value = querystring2;
    request.filterOptions.push_back (option);
    
    option.name = "QueryType";
    option.value = (args.getintarg("t")) ? "ranked" : "boolean";
    request.filterOptions.push_back (option);

    option.name = "Casefold";
    option.value = (args.getintarg("k")) ? "true" : "false";
    request.filterOptions.push_back (option);

    option.name = "Stem";
    option.value = (args.getintarg("s")) ? "true" : "false";
    request.filterOptions.push_back (option);

    if (!args["h2"].empty()) {
      option.name = "Index";
      option.value = args["h2"];
      request.filterOptions.push_back (option);
    }

    if (!args["j2"].empty()) {
      option.name = "Subcollection";
      option.value = args["j2"];
      request.filterOptions.push_back (option);
    }

    if (!args["n2"].empty()) {
      option.name = "Language";
      option.value = args["n2"];
      request.filterOptions.push_back (option);
    }
  }
  set_more_queryfilter_options (request, args);
}

void set_more_queryfilter_options (FilterRequest_t &request, cgiargsclass &args) {

  OptionValue_t option;
  int arg_m = args.getintarg("m");
  
  option.name = "Maxdocs";
  option.value = arg_m;
  request.filterOptions.push_back (option);

  //  option.name = "StartResults";
  //  option.value = args["r"];
  //  request.filterOptions.push_back (option);

  //  option.name = "EndResults";
  //  int endresults = args.getintarg("o") + (args.getintarg("r") - 1);
  //  if ((endresults > arg_m) && (arg_m != -1)) endresults = arg_m;
  //  option.value = endresults;
  //  request.filterOptions.push_back (option);
}

void format_querystring (text_t &querystring, int querymode) {
  text_t formattedstring;

  text_t::const_iterator here = querystring.begin();
  text_t::const_iterator end = querystring.end();

  // space is used to insert spaces between Chinese
  // characters. No space is needed before the first
  // Chinese character.
  bool space = false;

  // want to remove ()|!& from querystring so boolean queries are just
  // "all the words" queries (unless querymode is advanced)
  while (here != end) {
    if ((querymode == 0) && (*here == '(' || *here == ')' || *here == '|' ||
			     *here == '!' || *here == '&')) {
      formattedstring.push_back(' ');
    } else {
      if ((*here >= 0x4e00 && *here <= 0x9fa5) ||
	  (*here >= 0xf900 && *here <= 0xfa2d)) {
	// Chinese character
	if (space) formattedstring.push_back (0x200b);
	formattedstring.push_back (*here);
	formattedstring.push_back (0x200b);
	space = true;
      } else {
	// non-Chinese character
	formattedstring.push_back (*here);
	space = false;
      }
    }
    here ++;
  }
  querystring = formattedstring;
}


	
void add_dates(text_t &querystring, int startdate, int enddate, 
	       int startbc, int endbc)
{
  if(startdate)
    {
      int querystringis = 0;
      text_t::const_iterator here = querystring.begin();
      text_t::const_iterator end = querystring.end();
      while(here!=end)
	{
	  if(!(isspace((*here)))){
	    here = end;
	    querystringis = 1;
	  }
	  else
	    here++;
	}
      //converting BCE dates
      if(startbc && startdate > 0)
	{
	  startdate *= -1;
	}
      if(endbc && enddate > 0)
	{
	  enddate *= -1;
	}
       if(enddate != 0 && enddate<startdate)
	{
	  cout<<"enddate too small"<<endl;
	  return;
	}
       if(querystringis)
	 querystring.appendcstr(" AND");
       if(!enddate)
	 {
	   querystring.appendcstr(" [");
	   if(startdate<0)
	     {
	       querystring.appendcstr("bc");
	       querystring.appendint((startdate*-1));
	     }
	   else 
	     querystring.appendint(startdate);
	   querystring.appendcstr("]:CO");
	 }
       else{
	 int nextdate = startdate;
	 querystring.appendcstr(" (");
	 while(nextdate<=enddate)
	   {
	     if(nextdate!=0)
	       {
		 querystring.appendcstr(" [");
		 if(nextdate<0)
		   {
		     querystring.appendcstr("bc");
		     querystring.appendint((nextdate*-1));
		   }
		 else 
		   querystring.appendint(nextdate);
		 querystring.appendcstr("]:Coverage");
	       }
	     nextdate++;
	   }
	 querystring.appendcstr(" )");
       }
    }

}

void get_phrases (const text_t &querystring, text_tarray &phrases) {

  phrases.erase (phrases.begin(), phrases.end());
  if (!querystring.empty()) {
    
    text_t::const_iterator end = querystring.end();
    text_t::const_iterator here = findchar (querystring.begin(), end, '"');
    if (here != end) {
      text_t tmptext;
      bool foundquote = false;
      while (here != end) {
	if (*here == '"') {
	  if (foundquote) {
	    if (!tmptext.empty()) {
	      phrases.push_back(tmptext);
	      tmptext.clear();
	    }
	    foundquote = false;
	  } else foundquote = true;
	} else {
	  if (foundquote) tmptext.push_back (*here);
	}
	here ++;
      }
    }
  }
}

// search history tool
text_t escape_quotes(const text_t &querystring) {

  text_t::const_iterator here = querystring.begin();
  text_t::const_iterator end = querystring.end();
 
  text_t escquery = "";
  while (here != end) {
    if (*here != '\'' && *here != '\"' && *here != '\n' && *here != '\r') escquery.push_back(*here);
    else if (*here == '\n' || *here == '\r') {
      escquery.push_back(' ');
    } else {
      escquery +="\\\\";
      escquery.push_back(*here);
    }

    here++;
  }
  return escquery;

}

// some query form parsing functions for use with mgpp

void parse_reg_query_form(text_t &querystring, cgiargsclass &args){

  querystring.clear();
  text_t combine;
  int argt = args.getintarg("t");// t=0 -and, t=1 - or
  if (argt == 0) combine = "&";
  else combine = "|";
  
  text_t field = args["fqf"];
  if (field.empty()) return; // no query
  text_tarray fields;
  splitchar(field.begin(), field.end(), ',', fields); 
  
  text_t value = args["fqv"];
  if (value.empty()) return; // somethings wrong
  text_tarray values;
  splitchar(value.begin(), value.end(), ',', values);

  for (int i=0; i< values.size(); i++) {
    if (!values[i].empty()) {
      text_t text = formatelem(values[i]);
      addqueryelem(querystring, fields[i], values[i], combine);
    }
  }
  
}

text_t formatelem(text_t &text) {

  text_t::iterator here = text.begin();
  text_t::iterator end = text.end();

  bool space = false;
  text_t newtext = "";
  while (here != end) {
    if (is_unicode_letdig(*here)) {
      newtext.push_back(*here);
      space = true;
    }
    else {
      if (space) {
	newtext.push_back(' ');
	space = false;
      }
    }
    here++;
  }

  return newtext;
}

void parse_adv_query_form(text_t &querystring, cgiargsclass &args){

  querystring.clear();
  text_t combine = "&";

  text_t field = args["fqf"];
  if (field.empty()) return; // no query
  text_tarray fields;
  splitchar(field.begin(), field.end(), ',', fields); 
  
  text_t value = args["fqv"];
  if (value.empty()) return; // somethings wrong
  text_tarray values;
  splitchar(value.begin(), value.end(), ',', values);

  text_t stem = args["fqs"];
  if (stem.empty()) return; // somethings wrong
  text_tarray stems;
  splitchar(stem.begin(), stem.end(), ',', stems);

  text_t fold = args["fqk"];
  if (fold.empty()) return; // somethings wrong
  text_tarray folds;
  splitchar(fold.begin(), fold.end(), ',', folds);

  text_t comb = args["fqc"];
  if (comb.empty()) return; //somethings wrong
  text_tarray combs;
  splitchar(comb.begin(), comb.end(), ',', combs);
  
  for(int i=0; i< values.size(); i++) {
    if (!values[i].empty()) {
      if (i!=0) {
	if (combs[i-1]=="and") combine = "&";
	else if (combs[i-1]=="or")combine = "|";
	else if (combs[i-1]=="not")combine = "!";
      }
      text_t term = values[i];
      term = addstemcase(term, stems[i], folds[i]);
      addqueryelem(querystring, fields[i], term, combine);
    }
    
  }
}

text_t addstemcase(text_t &terms, text_t &stem, text_t &fold) {
  
  text_t outtext;
  text_t word;
  //unsigned short c;                                                            
  text_t::iterator here = terms.begin();
  text_t::iterator end = terms.end();

  while (here !=end) {
    if (is_unicode_letdig(*here)) {
      // not word boundary
      word.push_back(*here);
      here++;    
    }
    else {
      // found word boundary   
      if (!word.empty() ) {
	if (stem == "1" || fold =="1") {
	  word += "#";
	  if (stem == "1") word += "s";
	  //else word += "u";
	  
	  if (fold == "1") word += "i";
	  //else word += "c";
	}
	word += " ";
	outtext += word;
	word.clear();
      }
      //outtext.push_back(*here);
      here++;
    }
  }
   
  // get last word
  if (!word.empty()) {
    if (stem == "1"|| fold == "1") {
      word += "#";
      if (stem == "1") word += "s";
      //else word += "u";
      
      if (fold == "1") word += "i";
      //else word += "c";
    }
    word += " ";
    outtext += word;
  }
  return outtext;
}



void addqueryelem(text_t &querystring, text_t &tag, 
				  text_t &query, text_t combine) {
  if (!querystring.empty()) { // have to put and/or
    querystring += " "+combine + " ";
 
  }
  if (tag=="ZZ") { // just add onto querystring
       querystring +=  query;
  }
  else {
    querystring += "["+query+"]:"+tag;
  }

}



