/**********************************************************************
 *
 * mgppsearch.cpp -- 
 * Copyright (C) 1999  The New Zealand Digital Library Project
 *
 * A component of the Greenstone digital library software
 * from the New Zealand Digital Library Project at the
 * University of Waikato, New Zealand.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 *********************************************************************/


#include "gsdlconf.h"
#include "mgppsearch.h"
#include "fileutil.h"
#include "GSDLQueryParser.h"
#include "MGQuery.h"
#include "TextGet.h"
#include "queryinfo.h"



static text_t getindexsuffix(const queryparamclass &qp) {
  text_t indexsuffix = "index";
  text_t ind = qp.index;
  text_t sub = qp.subcollection;
  text_t lang = qp.language;
   
  indexsuffix = filename_cat(indexsuffix, ind + sub + lang, qp.collection);
  return indexsuffix;

}

////////////////////
// mgppsearch class //
////////////////////

mgppsearchclass::mgppsearchclass ()
  : searchclass() {

  gdbm_level = "Document";
}

mgppsearchclass::~mgppsearchclass () 
{
  if (cache != NULL) 
    {
      delete cache;
      cache = NULL;
    }
}

void mgppsearchclass::set_gdbm_level(text_t &level) {
  gdbm_level = level;
  
}

bool mgppsearchclass::search(const queryparamclass &queryparams, 
			     queryresultsclass &queryresult) {
  
  char *basepath = collectdir.getcstr(); //like ...gsdl/collect/demo
  char *indexname = (getindexsuffix(queryparams)).getcstr(); // like ...demo/mt/demo

  // load index data
  IndexData indexData;
  if (!indexData.LoadData (basepath, indexname)) {
    cerr<<"couldn't load index data\n"<<endl;
    return false;
  }

  // set default stem method from values originally set on prefs page
  int defaultStemMethod = 0;
  if (queryparams.casefolding) {
    defaultStemMethod |= 1;
  }
  if (queryparams.stemming) {
    defaultStemMethod |= 2;
  }
  
  // set default Boolean combiner from all/some setting
  // if match_mode == 1, ie all, default=1 ie AND
  // if match_mode == 0, ie some, default=0, ie OR
  int defaultBoolCombine = 0;
  if (queryparams.match_mode){
    defaultBoolCombine = 1;
  }

  // use default query info settings - change to reflect user preferences??
  QueryInfo queryInfo;
  SetCStr (queryInfo.docLevel, (queryparams.level.getcstr())); 
  queryInfo.maxDocs = (unsigned long)queryparams.maxdocs;
  queryInfo.sortByRank = (queryparams.search_type == 1);
  queryInfo.exactWeights = false;
  queryInfo.needRankInfo = true; // used for overall term freq as well as ranking
  queryInfo.needTermFreqs = true;
  
  ExtQueryResult queryResult;
  
  UCArray queryArray;
  SetCStr(queryArray, (queryparams.querystring.getcstr()));
  
  // create the mgpp query tree
  QueryNode *queryTree = NULL;
  queryTree = ParseQuery(queryArray, defaultBoolCombine, defaultStemMethod);
    
  UCArray level;
  UCArrayClear(level);
  
  //set the level for results
  SetCStr(level, gdbm_level.getcstr());

  
  // do the query
  MGQuery(indexData, queryInfo, queryTree, queryResult, level);
  
 
  // convert ExtQueryResult to queryresultclass
  
  queryresult.docs_matched = (int)queryResult.docs.size();

  if (queryresult.docs_matched == (int)queryResult.actualNumDocs) {
    queryresult.is_approx = Exact; 
  }
  else if (queryresult.docs_matched < (int)queryResult.actualNumDocs) {
    queryresult.is_approx = MoreThan;
  }
  else {
    queryresult.is_approx = Approximate;
  }

  docresultclass doc;
  for (int i=0; i<(int)queryResult.docs.size(); i++) {
    doc.clear();
    doc.docnum = (int)queryResult.levels[i];
    doc.docweight = queryResult.ranks[i];
    queryresult.docs.docset[doc.docnum] = doc;
    queryresult.docs.docorder.push_back(doc.docnum);

  }

  // term info
  termfreqclass term;
  for (int i=0; i<(int)queryResult.termFreqs.size(); i++) {
    term.clear();
    term.termstr = GetCStr(queryResult.termFreqs[i].term);
    term.termstemstr = term.termstr;
    term.termfreq = queryResult.termFreqs[i].termFreq;
    queryresult.terms.push_back(term);
    queryresult.orgterms.push_back(term); // should this change??
    
    for (int j=0; j<(int)queryResult.termFreqs[i].equivTerms.size(); j++) {
      queryresult.termvariants.insert(GetCStr(queryResult.termFreqs[i].equivTerms[j]));
    }

  }
  // clean up
  indexData.UnloadData();
  delete indexname;
  return true;

}


bool mgppsearchclass::browse_search(const queryparamclass &queryparams, int start, int numDocs,
				    queryresultsclass &queryresult) {

  char *basepath = collectdir.getcstr(); //like ...gsdl/collect/demo
  char *indexname = (getindexsuffix(queryparams)).getcstr();
 
  IndexData indexData;
  if (!indexData.LoadData (basepath, indexname)) {
    cerr<<"couldn't load index data\n"<<endl;
    return false;
  }

  UCArray level;
  UCArrayClear(level);
  
  //browse always at top level
  SetCStr(level, "Document");
  

  BrowseQueryNode browseNode;
  browseNode.startPosition = start;
  browseNode.numTerms = numDocs;

  BrowseQueryResult browseResult;

  UCArrayClear(browseNode.term);
  SetCStr(browseNode.term, (queryparams.querystring.getcstr()));

  // do the actual query
  MGBrowseQuery(indexData, level, browseNode, browseResult);

  // load results into term info
  termfreqclass term;
  for (int i=0; i<(int)browseResult.termFreqs.size(); i++) {
    term.clear();
    term.termstr = GetCStr(browseResult.termFreqs[i].term);
    term.termstemstr = term.termstr;
    term.termfreq = browseResult.termFreqs[i].termFreq;
    queryresult.terms.push_back(term);
    queryresult.orgterms.push_back(term);

  }
  // clean up
  indexData.UnloadData();
  delete indexname;

  return true;
}

// the document text for 'docnum' is placed in 'output'
// docTargetDocument returns 'true' if it was able to
// try to get a document
// collection is needed to see if an index from the 
// collection is loaded. THe default index bits are just there cos
// the mg version needs them

bool mgppsearchclass::docTargetDocument(const text_t &/*defaultindex*/,
					const text_t &/*defaultsubcollection*/,
					const text_t &/*defaultlanguage*/,
					const text_t &collection,
					int docnum,
					text_t &output) {
  
  char *basepath = collectdir.getcstr(); //like ...gsdl/collect/demo

  text_t textfilename = "/index/text/"+collection;
  TextData textdata;
  if(!textdata.LoadData(basepath, textfilename.getcstr())) {
    //error
    return false;
  }
  UCArray doctext;
  UCArray level;
  //SetCStr(level, "Section");
  SetCStr(level, gdbm_level.getcstr());
  if (!GetDocText(textdata, level, (unsigned long)docnum, doctext)) {
    //error
    return false;
  }

  // convert UCArray to text_t
  output.clear();
  output = GetCStr(doctext);

  // here need to remove the <Document>, <Section>, <Paragraph> tags

  // mg converts to unicode, this may need to be added here???

  //clean up
  textdata.UnloadData ();
  delete basepath;

  return true;

}






