###########################################################################
#
# Hierarchy.pm --
# A component of the Greenstone digital library software
# from the New Zealand Digital Library Project at the 
# University of Waikato, New Zealand.
#
# Copyright (C) 1999 New Zealand Digital Library Project
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
###########################################################################

# classifier plugin for generating hierarchical classifications

# options for this classifier are:
# -hfile file.txt    - classification file
# -metadata Metaname - metadata field to test against file.txt
# -sort Meta         - this option is optional (genious;-). by default this 
#                     classifier will sort documents within each section 
#                     alphabetically by Title. sort=nosort prevents sorting 
#                     (i.e. documents will end up in build order), sort=Meta 
#                     will sort each field alphabetically by Meta (Meta may 
#                     also be 'Filename' to sort by the original filename).
# -buttonname Title  - another optional field. this is what will end up in the
#                     Title field for this classification. if not present it
#                     defaults to Metaname

package Hierarchy;

use BasClas;
use util;
use cfgread;
use sorttools;

sub BEGIN {
    @ISA = ('BasClas');
}

sub print_usage {
    print STDERR "
  usage: classify Hierarchy [options]
  options:

   -buttonname X  Title field for this classification.
                  Defaults to metadata name. 

   -metadata X    Metadata field used for classification,
                  list will be sorted by this element.

   -hfile X       The classification structure file

   -sort X        Metadata field to sort by (defaults to none)
";
}


sub new {
    my $class = shift (@_);
    my $self = new BasClas($class, @_);
    
    my $sortname = "Title";
    my ($hfile, $metadata, $title);

    if (!parsargv::parse(\@_, 
			 q^buttonname/.*/^, \$title,
			 q^sort/.*/nosort^, \$sortname,
			 q^hfile/.*/^, \$hfile,
			 q^metadata/.*/^, \$metadata,
			 "allow_extra_options")) {
	
	print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n";
	&print_usage();
	die "\n";
    }

    if (!$metadata) {
	&print_usage;
	print STDERR "\nHierarchy error: no metadata supplied\n";
	die "\n";
    }

    $title = $metadata unless ($title);

    $sortname = undef if $sortname =~ /^nosort$/;

    if (!$hfile) {
	&print_usage;
	print STDERR "\nHierarchy error: No -hfile supplied\n";
	die "\n";
    }
    
    my $subjectfile = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $hfile);
    if (!-e $subjectfile) {
	my $collfile = $subjectfile;
	$subjectfile = &util::filename_cat($ENV{'GSDLHOME'},"etc", $hfile);
	if (!-e $subjectfile) {
	    my $outhandle = $self->{'outhandle'};
	    &print_usage;
	    print STDERR "\nHierarchy Error: Can't locate subject file $hfile\n";
	    print STDERR "This file should be in $collfile or $subjectfile\n";
	    die "\n";
	}
    }

    $self->{'descriptorlist'} = {}; # first field in subject file
    $self->{'locatorlist'} = {}; # second field in subject file
    $self->{'subjectfile'} = $subjectfile;
    $self->{'metaname'} = $metadata;
    $self->{'sortname'} = $sortname;
    $self->{'title'} = $title;
    
    return bless $self, $class;
}

sub init {
    my $self = shift (@_);

    # read in the subject file
    my $list = &cfgread::read_cfg_file ($self->{'subjectfile'}, undef, '^[^#]?\w');
    
    # $list is a hash that is indexed by the descriptor. The contents of this
    # hash is a list of two items. The first item is the OID and the second item
    # is the title
    foreach $descriptor (keys (%$list)) {
	$self->{'descriptorlist'}->{$descriptor} = $list->{$descriptor}->[0];
	unless (defined $self->{'locatorlist'}->{$list->{$descriptor}->[0]}) {
	    $self->{'locatorlist'}->{$list->{$descriptor}->[0]}->{'title'} = $list->{$descriptor}->[1];
	    $self->{'locatorlist'}->{$list->{$descriptor}->[0]}->{'contents'} = [];
	}
    }
}

sub classify {
    my $self = shift (@_);
    my ($doc_obj) = @_;

    my $doc_OID = $doc_obj->get_OID();

    my $metadata = $doc_obj->get_metadata ($doc_obj->get_top_section(),
					   $self->{'metaname'});

    my $sortmeta = "";
    if (defined $self->{'sortname'}) {
	if ($self->{'sortname'} =~ /^filename$/i) {
	    $sortmeta = $doc_obj->get_source_filename();
	} else {
	    $sortmeta = $doc_obj->get_metadata_element($doc_obj->get_top_section(),
						       $self->{'sortname'});
	    if (defined $sortmeta) {
		if ($self->{'sortname'} eq "Creator") {
		    &sorttools::format_string_name_english (\$sortmeta);
		} else {
		    &sorttools::format_string_english (\$sortmeta);
		}
	    }
	}
	$sortmeta = "" unless defined $sortmeta;
    }

    foreach $metaelement (@$metadata) {
	if ((defined $self->{'descriptorlist'}->{$metaelement}) &&
	    (defined $self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}})) {
	    push (@{$self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}}->{'contents'}}, 
		  [$doc_OID, $sortmeta]);
	}
    }
}

sub get_classify_info {
    my $self = shift (@_);
    
    my $list = $self->{'locatorlist'};

    my $classifyinfo = $self->get_entry ($self->{'title'}, "VList", "Invisible");
    foreach $OID (keys (%$list)) {

	my $tempinfo = $self->get_OID_entry ($OID, $classifyinfo, $list->{$OID}->{'title'}, "VList");

	if (defined $self->{'sortname'}) {
	    foreach $subOID (sort {$a->[1] cmp $b->[1];} @{$list->{$OID}->{'contents'}}) {
		push (@{$tempinfo->{'contains'}}, {'OID'=>$subOID->[0]});
	    }
	} else {
	    foreach $subOID (@{$list->{$OID}->{'contents'}}) {
		push (@{$tempinfo->{'contains'}}, {'OID'=>$subOID->[0]});
	    }
	}
    }
    
    return $classifyinfo;
}

sub get_OID_entry {
    my $self = shift (@_);
    my ($OID, $classifyinfo, $title, $classifytype) = @_;

    $OID = "" unless defined $OID;
    $OID =~ s/^\.+//;

    my ($headOID, $tailOID) = $OID =~ /^(\d+)(.*)$/;
    $tailOID = "" unless defined $tailOID;

    if (!defined $headOID) {
	$classifyinfo->{'Title'} = $title;
	$classifyinfo->{'classifytype'} = $classifytype;
	return $classifyinfo;
    }

    $classifyinfo->{'contains'} = [] unless defined $classifyinfo->{'contains'};
    
    # add entries to the contains list until we have one for headOID
    #### the +10 is a hack that works but I'm not completely sure why 
    #### and don't have time to delve deeper. one day someone should 
    #### fix this ;-) -- Stefan
    while (scalar(@{$classifyinfo->{'contains'}}) <= ($headOID+10)) {
	push (@{$classifyinfo->{'contains'}}, $self->get_entry("", $classifytype));
    }

    return $self->get_OID_entry ($tailOID, $classifyinfo->{'contains'}->[($headOID-1)], $title, $classifytype);
}

sub get_entry {
    my $self = shift (@_);
    my ($title, $childtype, $thistype) = @_;

    # organise into classification structure
    my %classifyinfo = ('childtype'=>$childtype,
			'Title'=>$title,
			'contains'=>[]);
    $classifyinfo{'thistype'} = $thistype 
	if defined $thistype && $thistype =~ /\w/;

    return \%classifyinfo;
}


1;
