head	1.2;
access;
symbols;
locks; strict;
comment	@# @;


1.2
date	92.08.21.18.50.28;	author paul;	state Exp;
branches;
next	1.1;

1.1
date	92.08.21.17.09.01;	author paul;	state Exp;
branches;
next	;


desc
@@


1.2
log
@*** empty log message ***
@
text
@#!/usr/local/bin/perl
#
# sizedb - calculate size of database needed
#
# usage: sizedb config [files]
#
# Algorithm is
#
# Extract all the words from all Indexed fields.
# Throw away ones <2 characters or that begin with parens.
# Pipe through "sort | uniq | wc -l".
# This gives the number of words that will be indexed.
# Multiply that by 2 or so to get a comfortable table size.
# Print the next prime number greater than the calculated table size.
#
# @@(#)$Id: sizedb,v 1.1 1992/08/21 17:09:01 paul Exp $

#
# find the numbers of the Indexed fields
$config=shift;
open (CONFIG,$config) || die "$config: $!\n";
while (<CONFIG>)
{
	($nnum,$nname) = (split(':'));
	if (/:Indexed:/) {$Dex{$nname} = $nnum;}
}
close(CONFIG);
open(OUT, "| sort | uniq | wc -l > /tmp/sizedb.$$");

#
# Collect all indexed words
while (<>)
{
	chop;
	foreach $d (values(%Dex))
	{
		if (/\b$d:([^	]+)/)
		{
			$foo = $1;
			$foo =~ s/\([^ ]* //g;
			$foo =~ s/ \w\w / /g;
			$foo =~ s/ \w / /g;
			$foo =~ s/ /\n/g;
			print OUT "$foo\n";
		}
	}
}
close(OUT);
open(OUT, ">>/tmp/sizedb.$$");
print OUT "2 * p\n";
close(OUT);
system("dc < /tmp/sizedb.$$ | primes | head -1");
unlink("/tmp/sizedb.$$");
exit 0;
@


1.1
log
@Initial revision
@
text
@d16 1
a16 1
# @@(#)$Id$
d28 1
a28 1
open(OUT, "|egrep -v '^\\(\174^..\044\174^.\044' | sort | uniq | wc -l > /tmp/sizedb.$$");
d37 1
a37 1
		if (/\b$d:/)
d39 4
a42 2
			$foo = $_;
			$foo =~ s/.*\b$d:([^	]+).*/$1/;
@
