#!/usr/local/bin/perl
#
# sizedb - calculate size of database needed
#
# usage: sizedb config [files]
#
# Algorithm is
#
# Extract all the words from all Indexed fields.
# Throw away ones <2 characters or that begin with parens.
# Pipe through "sort | uniq | wc -l".
# This gives the number of words that will be indexed.
# Multiply that by 2 or so to get a comfortable table size.
# Print the next prime number greater than the calculated table size.
#
# @(#)$Id: sizedb,v 1.2 1992/08/21 18:50:28 paul Exp $

#
# find the numbers of the Indexed fields
$config=shift;
open (CONFIG,$config) || die "$config: $!\n";
while (<CONFIG>)
{
	($nnum,$nname) = (split(':'));
	if (/:Indexed:/) {$Dex{$nname} = $nnum;}
}
close(CONFIG);
open(OUT, "| sort | uniq | wc -l > /tmp/sizedb.$$");

#
# Collect all indexed words
while (<>)
{
	chop;
	foreach $d (values(%Dex))
	{
		if (/\b$d:([^	]+)/)
		{
			$foo = $1;
			$foo =~ s/\([^ ]* //g;
			$foo =~ s/ \w\w / /g;
			$foo =~ s/ \w / /g;
			$foo =~ s/ /\n/g;
			print OUT "$foo\n";
		}
	}
}
close(OUT);
open(OUT, ">>/tmp/sizedb.$$");
print OUT "2 * p\n";
close(OUT);
system("dc < /tmp/sizedb.$$ | primes | head -1");
unlink("/tmp/sizedb.$$");
exit 0;
