###########################################################################
#
# RecPlug.pm --
# A component of the Greenstone digital library software
# from the New Zealand Digital Library Project at the 
# University of Waikato, New Zealand.
#
# Copyright (C) 1999 New Zealand Digital Library Project
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
###########################################################################

# plugin which recurses through directories processing
# each file it finds

package RecPlug;

use BasPlug;
use plugin;
use util;


BEGIN {
    @ISA = ('BasPlug');
}

sub new {
    my ($class) = @_;
    my $self = new BasPlug ("RecPlug", @_);

    $self->{'exclude_tail_dirs'} = []; # empty by default

    return bless $self, $class;
}

# return 1 if this class might recurse using $pluginfo
sub is_recursive {
    my $self = shift (@_);

    return 1;
}


# return number of files processed, undef if can't process
# Note that $base_dir might be "" and that $file might 
# include directories
sub read {
    my $self = shift (@_);
    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
    my $outhandle = $self->{'outhandle'};

    foreach my $etd ( @{$self->{'exclude_tail_dirs'}} )
    {
	return 0 if ($file =~ m/$etd/);
    }	

    my (@dir, $subfile);
    my $count = 0;

    # see if this is a directory
    my $dirname = &util::filename_cat ($base_dir, $file);

    # check to make sure we're not reading our own archives 
    # or index directory
    my $gsdlhome = quotemeta ($ENV{'GSDLHOME'});
    if ($dirname =~ m%^${gsdlhome}/.*?/import.*?/(archives|index)$%) {
	print $outhandle "RecPlug: $dirname appears to be a reference to a Greenstone collection, skipping.\n";
	return 1;
    }

    # check to see we haven't got a cyclic path...
    if ($dirname =~ m%(/.*){,41}%) {
	print $outhandle "RecPlug: $dirname is 40 directories deep, is this a recursive path? if not increase constant in RecPlug.pm.\n";
	return 1;
    }

    # check to see we haven't got a cyclic path...
    if ($dirname =~ m%.*?import/(.+?)/import/\1.*%) {
	print $outhandle "RecPlug: $dirname appears to a recursive loop ...\n";
	return 1;
    }


    if (-d $dirname) {

	if ($dirname =~ m|/CVS$|) {
	    print $outhandle "RecPlug: $dirname is a CVS directory, skipping.\n";
	    return 1;
	}
	# read all the files in the directory
        if (!opendir (DIR, $dirname))
        {
            print $outhandle "RecPlug: WARNING - couldn't read directory $dirname\n";
            return;
        }

	@dir = readdir (DIR);
	closedir (DIR);

	print $outhandle "RecPlug: getting directory $dirname\n";

	# process each file
	foreach $subfile (@dir) {
	    last if ($maxdocs != -1 && $count >= $maxdocs);

	    if ($subfile !~ /^\.\.?$/) {
		# note: metadata is not carried on to the next level
		$count += &plugin::read ($pluginfo, $base_dir, &util::filename_cat($file, $subfile),
					 {}, $processor, $maxdocs);
	    }
	}
	return $count;
    }

    # wasn't a directory, someone else will have to process it
    return undef;
}


1;
