/**************************************************************************
 *
 * ivf.pass1.cpp -- Memory efficient pass 1 inversion
 * Copyright (C) 1999  Rodger McNab
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 * $Id: ivf.pass1.cpp,v 1.1 2000/01/14 02:26:07 sjboddie Exp $
 *
 **************************************************************************/

#include "sysfuncs.h"

#include "mg_files.h"
#include "invf.h"
#include "mg.h"
#include "build.h"
#include "locallib.h"
#include "UCArray.h"
#include "bitio_m_stdio.h"
#include "bitio_gen.h"
#include <stdio.h>
#include "words.h"
#include "messages.h"
#include "netorder.h"
#include "FIvfLevelInfo.h"

#include "longlong.h"

#if defined(GSDL_USE_OBJECTSPACE)
#  include <ospace\std\map>
#elif defined(GSDL_USE_STL_H)
#  include <map.h>
#else
#  include <map>
#endif


/*
   $Log: ivf.pass1.cpp,v $
   Revision 1.1  2000/01/14 02:26:07  sjboddie
   Rodgers new C++ mg

   *
 */



// a fragment corresponds to the index level (word-level is the
// minimum index level)


// structure to determine level information
struct LevelWorker {
  unsigned long lastLevelDocNum;
  unsigned long count;

  LevelWorker () {
    lastLevelDocNum = count = 0;
  }
};


// note: the word is stored in the map
struct IvfWordInfo {
  unsigned long wordCount;      // word frequency
  unsigned long fragCount;      // number of fragments that contain the word
  unsigned long lastFragNum;    // last fragment to contain the word
  unsigned long chunkWordCount; // word frequency within this chunk
  unsigned long chunkFragCount; // number of fragments within this chunk that
                                //   contain the word

  LevelWorker *levels;          // level info for this word
  
  IvfWordInfo ();
  ~IvfWordInfo ();
  void Clear ();                // will delete levels
};

typedef map<UCArray, IvfWordInfo, DictLTUCArray> IvfWordInfoMap;
typedef vector<IvfWordInfoMap::iterator> IvfWordInfoItArray;

// tags don't require as much information
struct IvfTagInfo {
  unsigned long tagCount;       // tag frequency
  unsigned long fragCount;      // number of fragments that contain the tag
  unsigned long lastFragNum;    // last fragment to contain the tag
  unsigned long chunkFragCount; // number of fragments within this chunk that
                                //   contain the tag

  IvfTagInfo ();
  void Clear ();
};

typedef map<UCArray, IvfTagInfo, DictLTUCArray> IvfTagInfoMap;
typedef vector<IvfTagInfoMap::iterator> IvfTagInfoItArray;


#define INIT_CHECK_FRAC 0.10
#define CHECK_FRAC 0.75
#define CHECK_CLOSE 0.999
#define CHECK_DIV 1.5


static FILE *ic;		// the invf chunk file
static stdio_bitio_buffer icb;

IvfWordInfoMap ivfWordInfo;
IvfWordInfoItArray ivfWordInfoOccurOrder;
IvfTagInfoMap ivfTagInfo;
IvfTagInfoItArray ivfTagInfoOccurOrder;

static unsigned long chunksWritten;
static unsigned long maxMemNeeded;

static unsigned long numDocs;
static unsigned long numChunkDocs;

static unsigned long numFrags;
static unsigned long numChunkFrags;

static unsigned long numWords;

// the number of document numbers in the inverted file
static unsigned long numChunkEntries;

// next entry in the inverted file to check memory
// requirements for the current chunk
static unsigned long entryCheckPoint;

// information about all the different levels
static FIvfLevel ivfLevel;



IvfWordInfo::IvfWordInfo () {
  levels = NULL;
  Clear();
}

IvfWordInfo::~IvfWordInfo () {
  if (levels != NULL) delete [] levels;
}

void IvfWordInfo::Clear () {
  wordCount = 0;
  chunkWordCount = 0;
  lastFragNum = 0;
  fragCount = 0;
  chunkFragCount = 0;

  if (levels != NULL) {
    delete [] levels;
    levels = NULL;
  }
}


IvfTagInfo::IvfTagInfo () {
  Clear();
}

void IvfTagInfo::Clear () {
  tagCount = 0;
  lastFragNum = 0;
  fragCount = 0;
  chunkFragCount = 0;
}



int init_ivf_1 (const TagInfo &tagInfo, char *file_name) {
  // set up the chunk file
  if (!(ic = create_file (file_name, INVF_CHUNK_SUFFIX, "wb",
			  MAGIC_CHUNK, MG_MESSAGE)))
    return COMPERROR;
  fwrite ("    ", sizeof (u_long), 1, ic);	// Space for the maxmem
  icb.attachFile (ic);
  icb.encodeStart();

  // reset global variables
  ivfWordInfo.erase (ivfWordInfo.begin(), ivfWordInfo.end());
  ivfWordInfoOccurOrder.erase (ivfWordInfoOccurOrder.begin(), ivfWordInfoOccurOrder.end());
  ivfTagInfo.erase (ivfTagInfo.begin(), ivfTagInfo.end());
  ivfTagInfoOccurOrder.erase (ivfTagInfoOccurOrder.begin(), ivfTagInfoOccurOrder.end());
  
  chunksWritten = 0;
  maxMemNeeded = 0;
 
  numDocs = 0;
  numChunkDocs = 0;

  numFrags = 0;
  numChunkFrags = 0;

  numWords = 0;

  numChunkEntries = 0;
  entryCheckPoint = (unsigned long) ((invf_buffer_size * INIT_CHECK_FRAC) / CHECK_DIV);

  // init the level information
  ivfLevel.Clear();
  ivfLevel.docTag = tagInfo.docTag;
  ivfLevel.indexLevel = tagInfo.indexLevel;
  IvfLevelInfo blankLevel;
  UCArraySet::const_iterator levelHere = tagInfo.levelTags.begin();
  UCArraySet::const_iterator levelEnd = tagInfo.levelTags.end();
  while (levelHere != levelEnd) {
    blankLevel.levelTag = *levelHere;
    ivfLevel.levelInfo[*levelHere] = blankLevel;
    levelHere++;
  }
  
  return COMPALLOK;
}

static void ProcessOpenTag (const TagInfo &tagInfo, const TextEl &el,
			    bool &inFrag) {
  bool wordLevelIndex = tagInfo.indexLevel.empty();

  // check for start of next fragment
  if (!wordLevelIndex && el.tagName == tagInfo.indexLevel) {
    numFrags++;
    numChunkFrags++;
    inFrag = true;
  }
  
  // update tag stats
  IvfTagInfo &i = ivfTagInfo[el.tagName];
  if (i.tagCount == 0) {
    // new tag, add to list of iterators
    IvfTagInfoMap::iterator iIt = ivfTagInfo.find (el.tagName);
    ivfTagInfoOccurOrder.push_back (iIt);
  }
  i.tagCount++;

  // all open tags count as new tags
  numChunkEntries++;
  i.fragCount++;
  i.chunkFragCount++;
  i.lastFragNum = numFrags;
  
  // update level information
  IvfLevelInfoMap::iterator levelIt = ivfLevel.levelInfo.find (el.tagName);
  if (levelIt != ivfLevel.levelInfo.end()) {
    // is a level tag
    (*levelIt).second.numEntries++;
    (*levelIt).second.workInLevel = true;
  }
}

static void ProcessCloseTag (const TagInfo &tagInfo, const TextEl &el,
			     bool &inFrag) {
  bool wordLevelIndex = tagInfo.indexLevel.empty();

  // check for end of fragment
  if (!wordLevelIndex && el.tagName == tagInfo.indexLevel) {
    inFrag = false;
  }

  // update level information
  IvfLevelInfoMap::iterator levelIt = ivfLevel.levelInfo.find (el.tagName);
  if (levelIt != ivfLevel.levelInfo.end()) {
    // is a level tag
    (*levelIt).second.workInLevel = false;
  }
}

static void ProcessText (const TagInfo &tagInfo, const TextEl &el,
			 bool &inFrag) {
  bool wordLevelIndex = tagInfo.indexLevel.empty();

  // make sure this text is to be indexed
  if (!wordLevelIndex && !inFrag) return;

  const unsigned char *textHere = el.text.begin();
  const unsigned char *textEnd = el.text.end() - 1;
  UCArray word;
  
  if (!inaword (textHere, textEnd))
    ParseNonindexWord (textHere, textEnd);

  
  // Alternately parse off words and non-words from the input
  // Each token is then inserted into the set if it does
  // not exist or has it's frequency count incremented if it does.

  while (textHere <= textEnd) {
    textHere = ParseIndexWord (textHere, textEnd, word);
    textHere = ParseNonindexWord (textHere, textEnd);

    if (!word.empty()) {
      numWords++;

      if (wordLevelIndex) {
	numFrags++;
	numChunkFrags++;
      }
      
      // update word stats

      IvfWordInfo &i = ivfWordInfo[word];
      if (i.wordCount == 0) {
	// new word
	// add to list of iterators
	IvfWordInfoMap::iterator iIt = ivfWordInfo.find (word);
	ivfWordInfoOccurOrder.push_back (iIt);

	// add level information array
	if (ivfLevel.levelInfo.size() > 0) 
	  i.levels = new LevelWorker [ivfLevel.levelInfo.size()];
      }

      i.wordCount++;
      i.chunkWordCount++;
      if (numFrags > i.lastFragNum) {
	numChunkEntries++;
	i.fragCount++;
	i.chunkFragCount++;
	i.lastFragNum = numFrags;
      }

      // update level information for this word
      if (i.levels != NULL) {
	IvfLevelInfoMap::iterator levelHere = ivfLevel.levelInfo.begin();
	IvfLevelInfoMap::iterator levelEnd = ivfLevel.levelInfo.end();
	LevelWorker *levelWorkerPtr = i.levels;
	while (levelHere != levelEnd) {

	  // check to make sure the level encompases this fragment
	  if (!(*levelHere).second.workInLevel) {
	    cerr << "Level tag <" << (*levelHere).first
		 << "> does not encompass all fragments\n";
	    exit (1);
	  }
	  
	  if ((*levelHere).second.numEntries > (*levelWorkerPtr).lastLevelDocNum) {
	    (*levelWorkerPtr).lastLevelDocNum = (*levelHere).second.numEntries;
	    (*levelWorkerPtr).count ++;
	  }
	  
	  levelHere++;
	  levelWorkerPtr++;
	}
      }
    }
  }
}


static unsigned long MemoryRequired (bool wordLevelIndex) {
  register unsigned long total = 0;

  // add memory required for word entries
  IvfWordInfoMap::const_iterator wordHere = ivfWordInfo.begin();
  IvfWordInfoMap::const_iterator wordEnd = ivfWordInfo.end();
  while (wordHere != wordEnd) {
    register const IvfWordInfo &info = (*wordHere).second;
    if (info.chunkFragCount > 0) {
      total += BIO_Bblock_Bound (numChunkFrags, info.chunkFragCount);
      if (!wordLevelIndex) {
	total += info.chunkWordCount;
      }
    }
    
    wordHere++;
  }

  // add memory required for tag entries
  IvfTagInfoMap::const_iterator tagHere = ivfTagInfo.begin();
  IvfTagInfoMap::const_iterator tagEnd = ivfTagInfo.end();
  while (tagHere != tagEnd) {
    register const IvfTagInfo &info = (*tagHere).second;
    if (info.chunkFragCount > 0) {
      // two d entries for each frag entry
      unsigned long pTag = info.chunkFragCount*2;
      total += BIO_Bblock_Bound (numChunkFrags+pTag, pTag);
    }
    
    tagHere++;
  }
  
  total = (total + 7) >> 3;
  return total;
}


/*
static void PrintChunkInfo (unsigned long mem) {
  cout << "Chunk Number: " << chunksWritten << "\n";
  cout << "numChunkDocs " << numChunkDocs << "\n";
  cout << "numChunkFrags " << numChunkFrags << "\n";
  cout << "mem " << mem << "\n";
  cout << "numWords " << ivfWordInfo.size() << "\n";
  cout << "numTags " << ivfTagInfo.size() << "\n\n";

  // output debug tag information in dictionary order
  IvfTagInfoMap::iterator tagMapHere = ivfTagInfo.begin();
  IvfTagInfoMap::iterator tagMapEnd = ivfTagInfo.end();
  unsigned long tagNum = 0;
  while (tagMapHere != tagMapEnd) {
    cout << (*tagMapHere).first << " " << tagNum
	 << " " << (*tagMapHere).second.chunkFragCount << "\n";
    tagNum++;
    tagMapHere++;
  }
}
*/

static void OutputChunkInfo (unsigned long mem, bool /*wordLevelIndex*/) {
  chunksWritten++;

  // sanity check
  if (ivfWordInfo.size() != ivfWordInfoOccurOrder.size()) {
    Message ("ERROR: Word information size mismatch: %u vs %u\n",
	     (unsigned int)ivfWordInfo.size(),
	     (unsigned int)ivfWordInfoOccurOrder.size());
    exit (1);
  }
  if (ivfTagInfo.size() != ivfTagInfoOccurOrder.size()) {
    Message ("ERROR: Tag information size mismatch: %u vs %u\n",
	     (unsigned int)ivfTagInfo.size(),
	     (unsigned int)ivfTagInfoOccurOrder.size());
    exit (1);
  }

  icb.gamma_encode (numChunkDocs + 1, NULL);
  icb.gamma_encode (numChunkFrags + 1, NULL);
  icb.gamma_encode (mem + 1, NULL);
  icb.gamma_encode (ivfWordInfo.size() + 1, NULL);
  icb.gamma_encode (ivfTagInfo.size() + 1, NULL);

  /*  PrintChunkInfo (mem);*/

  // output word information in occurance order
  IvfWordInfoItArray::iterator wordHere = ivfWordInfoOccurOrder.begin();
  IvfWordInfoItArray::iterator wordEnd = ivfWordInfoOccurOrder.end();
  while (wordHere != wordEnd) {
    register IvfWordInfo &ivfWordInfo = (*(*wordHere)).second;
    
    icb.gamma_encode (ivfWordInfo.chunkWordCount + 1, NULL);
    if (ivfWordInfo.chunkWordCount >= 2) {
      icb.gamma_encode (ivfWordInfo.chunkFragCount, NULL);
    }

    ivfWordInfo.lastFragNum = 0;
    ivfWordInfo.chunkWordCount = 0;
    ivfWordInfo.chunkFragCount = 0;
    
    wordHere++;
  }

  // output tag information in occurance order
  IvfTagInfoItArray::iterator tagHere = ivfTagInfoOccurOrder.begin();
  IvfTagInfoItArray::iterator tagEnd = ivfTagInfoOccurOrder.end();
  while (tagHere != tagEnd) {
    register IvfTagInfo &ivfTagInfo = (*(*tagHere)).second;

    icb.gamma_encode (ivfTagInfo.chunkFragCount + 1, NULL);

    ivfTagInfo.lastFragNum = 0;
    ivfTagInfo.chunkFragCount = 0;
    
    tagHere++;
  }

  numChunkDocs = 0;
  numChunkFrags = 0;
  numChunkEntries = 0;
}


int process_ivf_1 (const TagInfo &tagInfo, const TextElArray &doc) {
  bool wordLevelIndex = tagInfo.indexLevel.empty();
  bool inFrag = false;
  if (wordLevelIndex) inFrag = true; // unconditional
  
  numDocs++;
  numChunkDocs++;

  // process each text element in this document
  TextElArray::const_iterator here = doc.begin();
  TextElArray::const_iterator end = doc.end();
  while (here != end) {
    if ((*here).elType == OpenTagE)
      ProcessOpenTag (tagInfo, *here, inFrag);
    else if ((*here).elType == CloseTagE)
      ProcessCloseTag (tagInfo, *here, inFrag);
    else if ((*here).elType == TextE)
      ProcessText (tagInfo, *here, inFrag);
    
    here++;
  }

  // check the amount of memory needed for this chunk
  if (numChunkEntries >= entryCheckPoint) {
    unsigned long mem = MemoryRequired (wordLevelIndex);
    if (mem >= invf_buffer_size * CHECK_CLOSE) {
      if (mem > maxMemNeeded) maxMemNeeded = mem;
      OutputChunkInfo (mem, wordLevelIndex);
      entryCheckPoint = (unsigned long)
	((invf_buffer_size * INIT_CHECK_FRAC) / CHECK_DIV);
      
    } else {
      entryCheckPoint = (unsigned long)
	(entryCheckPoint * ((CHECK_FRAC * (invf_buffer_size - mem)) / mem) +
	 entryCheckPoint);
      if (entryCheckPoint <= numChunkEntries)
	entryCheckPoint = numChunkEntries + 1;
    }
  }
  
  return COMPALLOK;
}


static void CalcInvfDictSize (unsigned long &totalBytes,
			      unsigned long &indexStringBytes) {
  totalBytes = 0;	// The sum of the length of all words, including 
			// the length byte
  indexStringBytes = 0; // The amount of space required to store the 
			// words in the diction, this takes into account 
			// the prefixes
  const UCArray *lastWord = NULL;
  
  // calculate size of word information
  IvfWordInfoMap::iterator wordHere = ivfWordInfo.begin();
  IvfWordInfoMap::iterator wordEnd = ivfWordInfo.end();
  while (wordHere != wordEnd) {
    unsigned long wordSize = (*wordHere).first.size();
    totalBytes += wordSize + 1;
    indexStringBytes += wordSize + 2;
    if (lastWord != NULL)
      indexStringBytes -= PrefixLen (*lastWord, (*wordHere).first);
    lastWord = &((*wordHere).first);
    
    wordHere++;
  }

  // calculate size of tag information
  lastWord = NULL;
  IvfTagInfoMap::iterator tagHere = ivfTagInfo.begin();
  IvfTagInfoMap::iterator tagEnd = ivfTagInfo.end();
  while (tagHere != tagEnd) {
    unsigned long tagSize = (*tagHere).first.size();
    totalBytes += tagSize + 1;
    indexStringBytes += tagSize + 2;
    if (lastWord != NULL)
      indexStringBytes -= PrefixLen (*lastWord, (*tagHere).first);
    lastWord = &((*tagHere).first);
    
    tagHere++;
  }
}


//      OutputInvfDict ():
//                writes out the stemmed dictionary file
//                in the following format 
//                        lookback value (int)
//                        totalbytes value (int)
//                        indexstringbytes (int)
//                        for each word 
//                          wordlen (4 bits)
//                          prefix match (4 bits)
//                          word (wordlen bytes)
//                          word frequency (int)
//                          word count (int)
//
//        Accesses outside variables:     
//
//        Return value...:                
static void OutputInvfDict (char *filename) {
  // create the dictionary header
  invf_dict_header idh;
  idh.word_dict_size = ivfWordInfo.size();
  idh.tag_dict_size = ivfTagInfo.size();
  idh.num_docs = numDocs;
  idh.num_frags = numFrags;
  idh.num_words = numWords;
  idh.num_levels = ivfLevel.levelInfo.size();
  CalcInvfDictSize (idh.total_bytes, idh.index_string_bytes);

  // create the inverted dictionary file
  FILE *sp;
  if (!(sp = create_file (filename, INVF_DICT_SUFFIX, "wb", MAGIC_STEM_BUILD,
			  MG_MESSAGE)))
    return;

  // write out the dictionary header
  if (!idh.Write (sp)) { fclose (sp); return; }

  // remember where the word dictionary starts
  idh.word_dict_start = ftell (sp);
  
  // output the word dictionary
  const UCArray *lastWord = NULL;
  IvfWordInfoMap::iterator wordHere = ivfWordInfo.begin();
  IvfWordInfoMap::iterator wordEnd = ivfWordInfo.end();
  while (wordHere != wordEnd) {
    // get the prefix and suffix lengths
    const UCArray &thisWord = (*wordHere).first;
    WritePreSufStr (sp, lastWord, thisWord);

    // output the number of fragments the word appeared in and the
    // number of times the word appeared
    WriteUL (sp, (*wordHere).second.fragCount);
    WriteUL (sp, (*wordHere).second.wordCount);

    // output frequency information for each level
    // note that we are expecting every word to have
    // level information
    LevelWorker *lwHere = (*wordHere).second.levels;
    LevelWorker *lwEnd = lwHere + idh.num_levels;
    while (lwHere != lwEnd) {
      WriteUL (sp, (*lwHere).count);
      lwHere++;
    }
    
    lastWord = &thisWord;
    wordHere++;
  }

  // remember where the tag dictionary starts
  idh.tag_dict_start = ftell (sp);
  
  // output the tag dictionary
  const UCArray *lastTag = NULL;
  IvfTagInfoMap::iterator tagHere = ivfTagInfo.begin();
  IvfTagInfoMap::iterator tagEnd = ivfTagInfo.end();
  while (tagHere != tagEnd) {
    // get the prefix and suffix lengths
    const UCArray &thisTag = (*tagHere).first;
    WritePreSufStr (sp, lastTag, thisTag);

    // output the number of fragments the tag appeared in and the
    // number of times the tag appeared
    WriteUL (sp, (*tagHere).second.fragCount);
    WriteUL (sp, (*tagHere).second.tagCount);
    
    lastTag = &thisTag;
    tagHere++;
  }

  // write out the updated header
  fseek (sp, sizeof (u_long), SEEK_SET);
  if (!idh.Write (sp)) { fclose (sp); return; }
  
  fclose (sp);
}

static void OutputLevelFile (char *filename) {
  // create the level file
  FILE *f;
  if (!(f = create_file (filename, INVF_LEVEL_SUFFIX, "wb", MAGIC_INVF_LEVELS,
			 MG_MESSAGE)))
    return;

  // write out the information
  ivfLevel.Write (f);
  
  // close the file
  fclose (f);
}

static void OutputTransFile (char *filename) {
  // save the word number (in lastFragNum :-/)
  int i = 0;
  IvfWordInfoMap::iterator wordHere = ivfWordInfo.begin();
  IvfWordInfoMap::iterator wordEnd = ivfWordInfo.end();
  while (wordHere != wordEnd) {
    (*wordHere).second.lastFragNum = i;
    i++; wordHere++;
  }

  // save the tag number (in lastFragNum :-/)
  i = 0;
  IvfTagInfoMap::iterator tagHere = ivfTagInfo.begin();
  IvfTagInfoMap::iterator tagEnd = ivfTagInfo.end();
  while (tagHere != tagEnd) {
    (*tagHere).second.lastFragNum = i;
    i++; tagHere++;
  }
  
  // create the translation file
  FILE *f;
  if (!(f = create_file (filename, INVF_CHUNK_TRANS_SUFFIX, "wb",
			 MAGIC_CHUNK_TRANS, MG_MESSAGE)))
    return;

  stdio_bitio_buffer buffer(f);
  buffer.encodeStart();

  // write out the word translation table
  unsigned long wordDictSize = ivfWordInfoOccurOrder.size();
  IvfWordInfoItArray::iterator wordItHere = ivfWordInfoOccurOrder.begin();
  IvfWordInfoItArray::iterator wordItEnd = ivfWordInfoOccurOrder.end();
  unsigned long oN = 0;
  while (wordItHere != wordItEnd) {
    register IvfWordInfo &ivfWordInfo = (*(*wordItHere)).second;
    buffer.binary_encode (ivfWordInfo.lastFragNum + 1, wordDictSize + 1, NULL);
    oN++;
    wordItHere++;
  }

  // write out the tag translation table
  unsigned long tagDictSize = ivfTagInfoOccurOrder.size();
  IvfTagInfoItArray::iterator tagItHere = ivfTagInfoOccurOrder.begin();
  IvfTagInfoItArray::iterator tagItEnd = ivfTagInfoOccurOrder.end();
  while (tagItHere != tagItEnd) {
    register IvfTagInfo &ivfTagInfo = (*(*tagItHere)).second;
    buffer.binary_encode (ivfTagInfo.lastFragNum + 1, tagDictSize + 1, NULL);
    oN++;
    tagItHere++;
  }

  // finish encoding and close the file
  buffer.encodeDone();
  fclose (f);
}

#ifndef SILENT
static void PrintStats () {
  Message ("Inverted buffer size:          %8u bytes\n", invf_buffer_size);
  Message ("Max memory needed for 1 chunk: %8u bytes\n", maxMemNeeded);

  Message ("Number of chunks written:      %8u\n", chunksWritten);
  Message ("Number of documents:           %8u\n", numDocs);
  Message ("Number of fragments:           %8u\n", numFrags);
  Message ("Number of words:               %8u\n", numWords);

  Message ("Size of word dictionary:       %8u\n", ivfWordInfo.size());
  Message ("Size of tag dictionary:        %8u\n", ivfTagInfo.size());
}
#endif

int done_ivf_1 (const TagInfo &tagInfo, char * filename) {
  bool wordLevelIndex = tagInfo.indexLevel.empty();

  char *temp_str = msg_prefix;
  msg_prefix = "ivf.pass1";

  // output the last chunk
  if (numChunkDocs > 0) {
    unsigned long mem = MemoryRequired (wordLevelIndex);
    OutputChunkInfo (mem, wordLevelIndex);
    if (mem > maxMemNeeded) maxMemNeeded = mem;
  }
  
  // write out and encoded 1 to say there are no more chunks
  icb.gamma_encode (1, NULL);
  icb.encodeDone ();

  // write out the maximum memory required and close the file
  fseek (ic, sizeof (long), 0);
  WriteUL (ic, maxMemNeeded);
  fclose (ic);

  // output the inverted dictionary
  OutputInvfDict (filename);

  // write out the translation file
  OutputTransFile (filename);

  // output the level information
  OutputLevelFile (filename);
  
  // output statistics
#ifndef SILENT
  PrintStats ();
#endif
  
  msg_prefix = temp_str;

  return COMPALLOK;
}

