###########################################################################
#
# BasClas.pm -- base class for all classifiers
#
# A component of the Greenstone digital library software
# from the New Zealand Digital Library Project at the 
# University of Waikato, New Zealand.
#
# Copyright (C) 2000 New Zealand Digital Library Project
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
###########################################################################

package BasClas;

# How a classifier works.  
#
# For each classifier requested in the collect.cfg file, buildcol.pl creates
# a new classifier object (a subclass of BasClas).  Later, it passes each 
# document object to each classifier in turn for classification.
#
# Four primary functions are used:
#
# 1. "new" is called before the documents are processed to set up the
#    classifier.
#
# 2. "init" is called after buildcol.pl has created the indexes etc but
#    before the documents are classified in order that the classifier might
#    set any variables it requires, etc.
#
# 3. "classify" is called once for each document object.  The classifier
#    "classifies" each document and updates its local data accordingly.
#
# 4. "get_classify_info" is called after every document has been
#    classified.  It collates the information about the documents and
#    stores a reference to the classifier so that Greenstone can later
#    display it.

use parsargv;

sub print_general_usage {
    my ($plugin_name) = @_;
    print STDERR "
   -verbosity N    Controls the quantity of output.  
                   Defaults to verbosity of buildcol.pl, which is usually 2.

   (Most general classifier options are set internally by buildcol.)

";
}

# print_usage should be overridden for any sub-classes
sub print_usage {
    print STDERR "
This classifier has no classifier-specific options

";
}

sub new {
    my $class = shift (@_);
    my $name = shift (@_);

    my $self = {};

    $self->{'outhandle'} = STDERR;
    
    # general options available to all classifiers
    if (!parsargv::parse(\@_,
			 q^builddir/.*/^, \$self->{'builddir'},
			 q^outhandle/.*/STDERR^, \$self->{'outhandle'},
			 q^verbosity/\d/2^, \$self->{'verbosity'},
			 "allow_extra_options")) {
	
	print STDERR "\nThe $name classifier uses an incorrect general option\n";
	print STDERR "(general options are those available to all classifiers).\n";
	print STDERR "Check your collect.cfg configuration file.\n";
    	&print_general_usage($plugin_name);
	die "\n";
    }



    return bless $self, $class;
}

sub init {
    my $self = shift (@_);
}

sub classify {
    my $self = shift (@_);
    my ($doc_obj) = @_;

    my $outhandle = $self->{'outhandle'};
    print $outhandle "BasClas::classify function must be implemented in sub-class\n";
}

sub get_classify_info {
    my $self = shift (@_);

    my $outhandle = $self->{'outhandle'};
    print $outhandle "BasClas::classify function must be implemented in sub-class\n";
}

1;
