###########################################################################
#
# ZIPPlug.pm --
# A component of the Greenstone digital library software
# from the New Zealand Digital Library Project at the 
# University of Waikato, New Zealand.
#
# Copyright (C) 1999 New Zealand Digital Library Project
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
###########################################################################

# plugin which handles compressed and/or archived input formats
#
# currently handled formats and file extensions are:
# 
# gzip (.gz, .z, .tgz, .taz)
# bzip (.bz)
# zip (.zip .jar)
# tar (.tar)
#
# this plugin relies on the following utilities being present 
# (if trying to process the corresponding formats)
#
# gunzip (for gzip)
# bunzip (for bzip)
# unzip (for zip)
# tar (for tar) 

# ZIPPlug is currently disabled on windows as we can't expect any of the
# above utilities to be present on that OS. We should probably provide
# binaries with Greenstone some day.

package ZIPPlug;

use BasPlug;
use plugin;
use util;
use Cwd;


BEGIN {
    @ISA = ('BasPlug');
}

sub new {
    my ($class) = @_;
    my $self = new BasPlug ("ZIPPlug", @_);

    return bless $self, $class;
}

# this is a recursive plugin
sub is_recursive {
    my $self = shift (@_);

    return 1;
}

# return number of files processed, undef if can't process
# Note that $base_dir might be "" and that $file might 
# include directories
sub read {
    my $self = shift (@_);
    my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
    my $outhandle = $self->{'outhandle'};

    # disabled on windows
    return undef if ($ENV{'GSDLOS'} =~ /^windows$/i);

    if ($file =~ /\.(gz|tgz|z|taz|bz|zip|jar|tar)$/i) {

	my $filename = &util::filename_cat ($base_dir, $file);
	if (!-e $filename) {
	    print $outhandle "ZIPPLug: WARNING: $filename does not exist\n";
	    return undef;
	}
	
	my ($file_only) = $file =~ /([^\\\/]*)$/;
	my $tmpdir = &util::get_tmp_filename ();
	&util::mk_all_dir ($tmpdir);

	print $outhandle "ZIPPlug: extracting $file_only to $tmpdir\n";

	# save current working directory
	my $cwd = cwd();
	chdir ($tmpdir) || die "Unable to change to $tmpdir";
	&util::cp ($filename, $tmpdir);
	
	if ($file =~ /\.bz$/i) {
	    $self->bunzip ($file_only);
	} elsif ($file =~ /\.(zip|jar)$/i) {
	    $self->unzip ($file_only);
	} elsif ($file =~ /\.tar$/i) {
	    $self->untar ($file_only);
	} else {
	    $self->gunzip ($file_only);
	}

	chdir ($cwd) || die "Unable to change back to $cwd";

	my $numdocs = &plugin::read ($pluginfo, "", $tmpdir, $metadata, $processor, $maxdocs);
	&util::rm_r ($tmpdir);
	return $numdocs;

    } else {
	return undef;
    }
}

sub bunzip {
    my $self = shift (@_);
    my ($file) = @_;
    if (system ("bunzip $file")!=0)
    {
	&util::rm ($file);
    }
}

sub unzip {
    my $self = shift (@_);
    my ($file) = @_;
    system ("unzip $file");
    &util::rm ($file) if -e $file;
}

sub untar {
    my $self = shift (@_);
    my ($file) = @_;
    system ("tar xf $file");
    &util::rm ($file) if -e $file;
}

sub gunzip {
    my $self = shift (@_);
    my ($file) = @_;
    if (system ("gunzip $file")!=0)
    {
	&util::rm ($file);
    };
}

1;
