#!/usr/bin/perl -w

###########################################################################
#
# buildcol.pl -- This program will build a particular collection
# A component of the Greenstone digital library software
# from the New Zealand Digital Library Project at the 
# University of Waikato, New Zealand.
#
# Copyright (C) 1999 New Zealand Digital Library Project
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
###########################################################################

package buildcol;

BEGIN {
    die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
    die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
    unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
}

use colcfg;
use parsargv;
use util;
use FileHandle;

&main();

sub print_usage {
    print STDERR "\n";
    print STDERR "buildcol.pl: Builds the indexes of a Greenstone collection.\n\n";
    print STDERR "  usage: $0 [options] collection-name\n\n";
    print STDERR "  options:\n";
    print STDERR "   -verbosity number     0=none, 3=lots\n";
    print STDERR "   -archivedir directory Where the archives live\n";
    print STDERR "   -builddir directory   Where to put the built indexes\n";
    print STDERR "   -maxdocs number       Maximum number of documents to build\n";
    print STDERR "   -debug                Print output to STDOUT\n";
    print STDERR "   -mode all|compress_text|build_index|infodb\n";
    print STDERR "   -index indexname      Index to build (will build all in\n"; 
    print STDERR "                         config file if not set)\n";
    print STDERR "   -keepold              will not destroy the current contents of the\n";
    print STDERR "                         building directory\n";
    print STDERR "   -allclassifications   Don't remove empty classifications\n";
    print STDERR "   -create_images        Attempt to create default images for new\n";
    print STDERR "                         collection. This relies on the Gimp being\n";
    print STDERR "                         installed along with relevant perl modules\n";
    print STDERR "                         to allow scripting from perl\n";
    print STDERR "   -collectdir directory Collection directory (defaults to " .
	&util::filename_cat ($ENV{'GSDLHOME'}, "collect") . ")\n";
    print STDERR "   -out                  Filename or handle to print output status to.\n";
    print STDERR "                         The default is STDERR\n";
    print STDERR "   -buildtype mg|mgpp    This will override the config file setting.\n";
    print STDERR "                         (default is mg)\n";
    print STDERR "   -no_strip_html        Do not strip the html tags from the indexed text\n";
    print STDERR "                         (only used for mgpp collections).\n\n";
}


sub main
{
    my ($verbosity, $archivedir, $cachedir, $builddir, $maxdocs, 
	$debug, $mode, $indexname, $keepold, $allclassifications,
	$create_images, $collectdir, $out, $buildtype, $textindex,
	$no_strip_html);
    if (!parsargv::parse(\@ARGV, 
			 'verbosity/\d+/2', \$verbosity,
			 'archivedir/.*/', \$archivedir,
			 'cachedir/.*/', \$cachedir,
			 'builddir/.*/', \$builddir,
			 'maxdocs/^\-?\d+/-1', \$maxdocs,
			 'debug', \$debug,
			 'mode/^(all|compress_text|build_index|infodb)$/all', \$mode,
			 'index/.*/', \$indexname,
			 'keepold', \$keepold,
			 'allclassifications', \$allclassifications,
			 'create_images', \$create_images,
			 'collectdir/.*/', \$collectdir,
			 'out/.*/STDERR', \$out,
			 'no_strip_html', \$no_strip_html,
			 'buildtype/^(mg|mgpp)$/', \$buildtype)) {
	&print_usage();
	die "\n";
    }

    $textindex = "";
    my $close_out = 0;
    if ($out !~ /^(STDERR|STDOUT)$/i) {
	open (OUT, ">$out") || die "Couldn't open output file $out\n";
	$out = "buildcol::OUT";
	$close_out = 1;
    }
    $out->autoflush(1);

    # get and check the collection
    if (($collection = &util::use_collection(@ARGV, $collectdir)) eq "") {
	&print_usage();
	die "\n";
    }

    # read the configuration file

    $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc", "collect.cfg");
    if (-e $configfilename) {
	$collectcfg = &colcfg::read_collect_cfg ($configfilename);

	if (defined $collectcfg->{'buildtype'} && $buildtype eq "") {
	    $buildtype = $collectcfg->{'buildtype'};
	}
	if ($buildtype eq "") {
	    $buildtype = "mg"; # mg is the default
	}
	if (defined $collectcfg->{'archivedir'} && $archivedir eq "") {
	    $archivedir = $collectcfg->{'archivedir'};
	}
	if (defined $collectcfg->{'cachedir'} && $cachedir eq "") {
	    $cachedir = $collectcfg->{'cachedir'};
	}
	if (defined $collectcfg->{'builddir'} && $builddir eq "") {
	    $builddir = $collectcfg->{'builddir'};
	}
	if ($buildtype eq "mgpp" && defined $collectcfg->{'textcompress'}) {
	    $textindex = $collectcfg->{'textcompress'};
	}
	
    } else {
	die "Couldn't find the configuration file $configfilename\n";
    }
    
    #mgpp doesn't work yet on windows
    if ($buildtype eq "mgpp" && $ENV{'GSDLOS'} =~ /^windows$/) {
	die "mgpp doesn't work on windows\n";
    }
    
    #set the text index
    if ($buildtype eq "mgpp") {
	if ($textindex eq "") {
	    $textindex = "text";
	}
    }
    else {
	$textindex = "section:text";
    }

    # create default images if required
    if ($create_images) {
	my $collection_name = $collection;
	$collection_name = $collectcfg->{'collectionmeta'}->{'collectionname'} 
	if defined $collectcfg->{'collectionmeta'}->{'collectionname'};
	
	&create_images ($collection_name);
    }

    # fill in the default archives and building directories if none
    # were supplied, turn all \ into / and remove trailing /
    $archivedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "archives") if $archivedir eq "";
    $archivedir =~ s/[\\\/]+/\//g;
    $archivedir =~ s/\/$//;
    $builddir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "building") if $builddir eq "";
    $builddir =~ s/[\\\/]+/\//g;
    $builddir =~ s/\/$//;

    # update the archive cache if needed
    if ($cachedir) {
	print $out "Updating archive cache\n" if ($verbosity >= 1);

	$cachedir =~ s/[\\\/]+$//;
	$cachedir .= "/collect/$collection" unless 
	    $cachedir =~ /collect\/$collection/;

	$realarchivedir = "$cachedir/archives";
	$realbuilddir = "$cachedir/building";
	&util::mk_all_dir ($realarchivedir);
	&util::mk_all_dir ($realbuilddir);
	&util::cachedir ($archivedir, $realarchivedir, $verbosity);

    } else {
	$realarchivedir = $archivedir;
	$realbuilddir = $builddir;
    }

    # build it in realbuilddir
    &util::mk_all_dir ($realbuilddir);


    # if a builder class has been created for this collection, use it
    # otherwise, use the mg or mgpp builder
    if (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/${collection}builder.pm") {
	$builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
	$buildertype = "${collection}builder";
    } else {
	$builderdir = "$ENV{'GSDLHOME'}/perllib";
	if ($buildtype eq "mgpp") {
	    $buildertype = "mgppbuilder";
	}
	else {
	    $buildertype = "mgbuilder";
	}
    }
	
    require "$builderdir/$buildertype.pm";

    eval("\$builder = new $buildertype(\$collection, " .
	 "\$realarchivedir, \$realbuilddir, \$verbosity, " .
	 "\$maxdocs, \$debug, \$keepold, \$allclassifications, \$out)");
    die "$@" if $@;

    $builder->init();

    if ($buildertype eq "mgppbuilder" && $no_strip_html) {
	$builder->set_strip_html(0);
    }
    if ($mode =~ /^all$/i) {
	$builder->compress_text($textindex);
	$builder->build_indexes($indexname);
	$builder->make_infodatabase();
	$builder->collect_specific();
    } elsif ($mode =~ /^compress_text$/i) {
	$builder->compress_text($textindex);
    } elsif ($mode =~ /^build_index$/i) {
	$builder->build_indexes($indexname);	
    } elsif ($mode =~ /^infodb$/i) {
	$builder->make_infodatabase();
    } else {
	die "unknown mode: $mode\n";
    }

    $builder->make_auxiliary_files() if !$debug;
    $builder->deinit();
    
    if (($realbuilddir ne $builddir) && !$debug) {
	print $out "Copying back the cached build\n" if ($verbosity >= 1);
	&util::rm_r ($builddir);
	&util::cp_r ($realbuilddir, $builddir);
    }

    close OUT if $close_out;
}

sub create_images {
    my ($collection_name) = @_;

    my $image_script = &util::filename_cat ($ENV{'GSDLHOME'}, "bin", "script", "gimp", "title_icon.pl");
    if (!-e $image_script) {
	print $out "WARNING: Image making script ($image_script) could not be found\n";
	print $out "         Default images will not be generated\n\n";
	return;
    }

    my $imagedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "images");

    &util::mk_all_dir ($imagedir);

    # create the images
    system ("$image_script -size 1.5 -image_dir \"$imagedir\" -filename $collection.gif -text \"$collection_name\"");
    system ("$image_script -image_dir \"$imagedir\" -filename ${collection}sm.gif -text \"$collection_name\"");

    # update the collect.cfg configuration file (this will need
    # to be changed when the config file format changes)
    if (!open (CFGFILE, $configfilename)) {
	print $out "WARNING: Couldn't open config file ($configfilename)\n";
	print $out "         for updating so collection images may not be linked correctly\n";
	return;
    }

    my $line = ""; my $file = "";
    my $found = 0; my $foundsm = 0;
    while (defined ($line = <CFGFILE>)) {
	if ($line =~ /collectionmeta\s+iconcollection\s+/) {
	    $line = "collectionmeta iconcollection _httpprefix_/collect/$collection/images/$collection.gif\n";
	    $found = 1;
	} elsif ($line =~ /collectionmeta\s+iconcollectionsmall\s+/) {
	    $line = "collectionmeta iconcollectionsmall _httpprefix_/collect/$collection/images/${collection}sm.gif\n";
	    $foundsm = 1;
	}
	$file .= $line;
    }
    close CFGFILE;

    $file .= "collectionmeta iconcollection _httpprefix_/collect/$collection/images/$collection.gif\n" if !$found;
    $file .= "collectionmeta iconcollectionsmall _httpprefix_/collect/$collection/images/${collection}sm.gif\n" if !$foundsm;

    if (!open (CFGFILE, ">$configfilename")) {
	print $out "WARNING: Couldn't open config file ($configfilename)\n";
	print $out "         for updating so collection images may not be linked correctly\n";
	return;
    }
    print CFGFILE $file;
    close CFGFILE;
}

