/* WIDE AREA INFORMATION SERVER SOFTWARE:
   No guarantees or restrictions.  See the readme file for the full standard
   disclaimer.

   Brewster@think.com
*/

/* Copyright (c) CNIDR (see ../COPYRIGHT) */


/* Include file for the irhash.c file.
   Implements the building functions in irext.h */

#ifndef IRHASH_H
#define IRHASH_H

#include "cdialect.h"
#include "cutil.h"
#include "irlex.h"
#include "hash.h"
#include "irlex.h" /* for MAX_WORD_LENGTH */

/* the amount of memory for word occurances (bytes) */
#define WORD_MEMORY_INIT_BLOCK_SIZE 10

/* this is the maximum number of occurances that will be stored in the 
 * disk table.  The number of occurances will reflect the total number in
 * all files.  The theory is that if a word is very common, then it
 * is not very useful in descriminating between files.  Also, if it
 * is very common, then it takes up alot of space.
 * Maybe this should be dependent on the number of documents indexed.
 * Therefore if a word is in every document, then it probably does not mean
 * much.  
 * In increasing this, it may not keep all the references in the 
 * inverted file because the max length of an index block is governed
 * by a size that can be represented in INDEX_BLOCK_SIZE_SIZE bytes.
 */
#ifdef BIO
#define MAX_OCCURANCES 100000L /* need 100000L, was 20000L, dgg */
#else
#define MAX_OCCURANCES 20000L  
#endif

/* this is a flag to be put in the number_of_occurances field of a
word_entry so that it is always greater than the limit and no words will be
collected. */ 
#define STOP_WORD_FLAG 0x40000000

#ifdef __cplusplus
/* declare these as C style functions */
extern "C"
	{
#endif /* def __cplusplus */

unsigned char *make_word_occurrance_block _AP((long size));

void free_word_occurance_block _AP((unsigned char *block));

void flush_word_occurance_buffers _AP((void));

void gc_word_occurance_buffers _AP((hashtable * the_word_memory_hashtable));

void add_stop_words _AP((hashtable *the_word_memory_hashtable));

long write_bytes_to_memory _AP((long value,long size,unsigned char* ptr));

#ifdef __cplusplus
	}
#endif /* def __cplusplus */

#endif /* nded IRHASH_H */
