#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include <savant.h>
#include <savutil.h>
#include <savantio.h>

int wvtree_insert_one(DB_UINT *, DB_UINT);

/* %%% THESE ARE FAKE DECLARATIONS!! */

int Memory_Usage, Current_IDnum, Global_Num_Docs, Global_Num_Dates, Global_Num_Words;
int Global_WVoffset;
WV_Tree *Global_Tree;


int
wvtree_insert_ddv(DenseDocVec *ddv)
{
  /* given a DenseDocVec, for each word entry in the ddv, attach the
     doc-id and term-frequency info to the appropriate node of
     Global_Tree.  Also increments Global_Num_Docs and Current_IDnum,
     and increases Memory_Usage according to the amount of heap
     malloced.

     returns the Current_IDnum (before incrementing); this is probably
     not all that useful.  */
  int i;

  for(i=0; i<ddv->num_entries; i++) {
    Memory_Usage += wvtree_insert_one(&(ddv->wordcodes[WORD_ENCODE_WIDTH*i]), 
				      WV_PACK(Current_IDnum, ddv->weights[i]));
  }
  Global_Num_Docs++;
  return(Current_IDnum++);
}

int 
wvtree_insert_one(DB_UINT *wordcode,
		  DB_UINT docweight)
{
  /* Given a worcode and a docweight (document-id/term-frequency
     pair), concatenate (or create) the node of Global_Tree
     corresponding to wordcode with the docweight.  Increments
     Global_Num_Words or Global_Num_Dates as appropriate. 
     
     Returns the number of bytes newly malloced by the addition.
     This is used by the auto-split-and-merge. 
     */
  int cmp, i;
  WV_Tree **treeptr = &Global_Tree;
  WV_List *others;
  
  /* binary search for the proper wordcode node */
  while (*treeptr != NULL) {
    cmp = wordcode_cmp((*treeptr)->wordcode, wordcode);
    if(cmp < 0) {
      treeptr = &((*treeptr)->right);
    }
    else if(cmp > 0) {
      treeptr = &((*treeptr)->left);
    }
    else { /* proper node found */
      others = (*treeptr)->wvlist;
      (*treeptr)->wvlist = (WV_List *)malloc(sizeof(WV_List));
      (*treeptr)->wvlist->docweight = docweight;
      (*treeptr)->wvlist->next = others;
      (*treeptr)->num_entries++;
      if ((word_type(wordcode[0]) == DATE_FIELD) ||
	  (word_type(wordcode[0]) == TIME_FIELD)) {
	Global_Num_Dates++;  /* Even equal dates are considered "unique" */
      }
      return(sizeof(WV_List));
    }
  }

  /* if loop ends by (treeptr == NULL) condition, create a new node */
  *treeptr = (WV_Tree *)malloc(sizeof(WV_Tree));
  for(i=0; i<WORD_ENCODE_WIDTH; i++) {
    (*treeptr)->wordcode[i] = wordcode[i];
  }
  (*treeptr)->num_entries = 1;
  (*treeptr)->wvlist = (WV_List *)malloc(sizeof(WV_List));
  (*treeptr)->wvlist->docweight = docweight;
  (*treeptr)->wvlist->next = NULL;
  (*treeptr)->right = (*treeptr)->left = NULL;
  if ((word_type(wordcode[0]) == DATE_FIELD) ||
      (word_type(wordcode[0]) == TIME_FIELD)) {
    Global_Num_Dates++;
  }
  else {
    Global_Num_Words++;
  }
  return(sizeof(WV_Tree) + sizeof(WV_List));
}
/*
int
write_wvtree_node(WV_Tree *node)
{*/
  /* Given a pointer to a single, non-NULL (WV_Tree *),
     write_wvtree_node writes out the info for that node only (no tree
     traversal) to the global files WORDVEC_FILE, WVOFF_FILE, and DATE_FILE.
     Uses Global_WVoffset.
     
     This function used to be part of the loop in write_global_tree.
     The file-writing parts moved to here, and the tfidf magnitude
     stuff moved writewv_ra.c:accum_dvmags_from_wvtree_node_tfidf()
     
     File Formats:
     
     Format for WORDVEC_FILE:
     (DB_INT)   (width*DB_UINT) (DB_UINT)       (DB_UINT),(DB_UINT),..., (DB_UINT)
     NUM_WORDS, WORDCODE-1,     NUM_WEIGHTS=N1, WEIGHT-1, WEIGHT-2, ..., WEIGHT-N1,
     -          WORDCODE-2,     NUM_WEIGHTS=N2, WEIGHT-1, WEIGHT-2, ..., WEIGHT-N2,
     etc.
   
     Format for WVOFF_FILE:
     (width*DB_UINT)   (DB_UINT)
     WORDCODE-1,       OFFSET-1,
     WORDCODE-2,       OFFSET-2,
     etc.
   
     Format for DATE file:
     (DB_INT)  (width*DB_UINT) (DB_UINT)
     NUM_DATES, WORDCODE-1,     WEIGHT-1,
     -          WORDCODE-2,     WEIGHT-2,
     etc.
   
   */
/*
  enum Field_Types type;
  WV_List *list;
*/
  /* If this is a date, write it to the date file instead of the 
     wordvec file.  Even if a date is exactly the same, it
     gets written multiple times.  (It's expected that most will be
     different, if only by a few seconds) */
/*  type = word_type(node->wordcode[0]);
  if ((type == DATE_FIELD) ||
      (type == TIME_FIELD)) {
    for(list=node->wvlist; list!=NULL; list=list->next) {
      fwrite_big(node->wordcode, sizeof(DB_INT), WORD_ENCODE_WIDTH, DATE_FILE);*/
      /* Set where?  Is this right?  Is it packed with docnum like it should be? */
/*      fwrite_big(&(list->docweight), sizeof(DB_INT), 1, DATE_FILE);
    }
  }
  else {*/  /* not a date */
    /* Into the WV offsets file goes the current offset and wordcode: */
/*    fwrite_big(node->wordcode, sizeof(DB_INT), WORD_ENCODE_WIDTH, WVOFF_FILE);
    fwrite_big(&Global_WVoffset, sizeof(DB_INT), 1, WVOFF_FILE);*/
    /* Now write the wordvec info, keeping track of the offset and 
       freeing list cells as we go */
/*    fwrite_big(node->wordcode, sizeof(DB_INT), WORD_ENCODE_WIDTH, WORDVEC_FILE);
    fwrite_big(&(node->num_entries), sizeof(DB_INT), 1, WORDVEC_FILE);
    
    Global_WVoffset += (WORD_ENCODE_WIDTH+1)*sizeof(DB_INT);
    for(list=node->wvlist; list!=NULL; list=list->next) {
      fwrite_big(&(list->docweight), sizeof(DB_INT), 1, WORDVEC_FILE);
      Global_WVoffset += sizeof(DB_INT);
    }
  }

  return(0);*/  /* temporary... make this return something useful */
/*}*/
