###########################################################################
#
# PSPlug.pm -- this might look VERY similar to the PDF plugin...
# A component of the Greenstone digital library software
# from the New Zealand Digital Library Project at the 
# University of Waikato, New Zealand.
#
# Copyright (C) 1999 New Zealand Digital Library Project
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
###########################################################################

package PSPlug;

use ConvertToPlug;
use sorttools;

sub BEGIN {
    @ISA = ('ConvertToPlug');
}

use strict;


sub new {
    my $class = shift (@_);

    my $self = new ConvertToPlug ($class, ("-convert_to","text",@_), "--", "-title_sub", 'Page\s+\d+');
    
    if (!parsargv::parse(\@_, 
			 q^extract_date^, \$self->{'extract_date'},
			 q^extract_pages^, \$self->{'extract_pages'},
			 q^extract_title^, \$self->{'extract_title'},
			 "allow_extra_options")) {
	print STDERR "\nIncorrect options passed to HTMLPlug, check your collect.cfg configuration file\n";
	&print_usage();
	die "\n";
    }

    return bless $self, $class;
}

sub print_usage {
    print STDERR "\n  usage: plugin PSPlug [options]\n\n";
    print STDERR "  options:\n";
    print STDERR "   -extract_date               Extract date from PS header\n";
    print STDERR "   -extract_pages              Extract pages from PS header\n";
    print STDERR "   -extract_title              Extract title from PS header\n";
    print STDERR "\n\nNote! This is a \"poor man's\" ps to text converter. If you are serious, consider\n";
    print STDERR "using the PRESCRIPT package, which is available for download at\n   http://www.nzdl.org/html/software.html\n\n";
}


sub get_default_block_exp {
    my $self = shift (@_);

    return q^(?i)\.(eps)$^;
}

sub get_default_process_exp {
    my $self = shift (@_);

    return q^(?i)\.ps$^;
}

sub extract_metadata_from_postscript {
    my $self = shift (@_);
    my $filename = shift (@_);
    my $doc = shift (@_);
    my $section = $doc->get_top_section();

    my $title_found = 0;
    my $pages_found = 0;
    my $date_found = 0;

    print STDERR "PSPlug: extracting PostScript metadata from \"$filename\"\n" 
	if $self->{'verbosity'} > 1;

    open(INPUT, "<$filename");
    my $date;

    while(my $line =<INPUT>) {
	if ($self->{'extract_title'} && !$title_found) {
	    foreach my $word ($line =~ m|Title: ([-A-Za-z0-9@/\/\(\):,. ]*)|g) {
		my $new_word = $word; 
		$new_word =~ s/\(Untitled\)//i;
		$new_word =~ s/\(Microsoft Word\)//i;
		$new_word =~ s/Microsoft Word//i;
		$new_word =~ s/^\(//i;
		$new_word =~ s/\)$//i;
		$new_word =~ s/^ - //i;
		if ($new_word ne "") {
		    $doc->add_utf8_metadata($section, "Title", $new_word );
		    $title_found = 1;
		}
	    }
	}
	if ($self->{'extract_date'} && !$date_found) {
            foreach my $word ($line =~ m/(Creation[-A-Za-z0-9@\/\(\):,. ]*)/g) {
                if ($word =~ m/ ([A-Za-z][A-Za-z][A-Za-z]) ([0-9 ][0-9])  ?[0-9: ]+ ([0-9]{4})/) {
                    $date = &sorttools::format_date($2,$1,$3);
		    if (defined $date) {
			$doc->add_utf8_metadata($section, "Date", $date );
		    }
                }
                if ($word =~ m/D:([0-9]{4})([0-9]{2})([0-9]{2})[0-9]{6}\)/) {
                    $date = &sorttools::format_date($3,$2,$1);
		    if (defined $date) {
			$doc->add_utf8_metadata($section, "Date", $date );
		    }
                }
                if ($word =~ m/CreationDate: ([0-9]{4}) ([A-Za-z][A-Za-z][A-Za-z]) ([0-9 ][0-9]) [0-9:]*/) {
                    $date = &sorttools::format_date($3,$2,$1);
		    if (defined $date) {
			$doc->add_utf8_metadata($section, "Date", $date );
		    }
                }
		$date_found = 1;
            }
	}
	if ($self->{'extract_pages'} && !$pages_found) {
	    foreach my $word ($line =~ m/(Pages: [0-9]*)/g) {
                my $digits = $word;
                $digits =~ s/[^0-9]//g;
		if ($digits ne "" && $digits ne "0") {
		    $doc->add_utf8_metadata($section, "Pages", $digits );
		    $pages_found = 1;
		}
            }
	}
    }
}


# do plugin specific processing of doc_obj for HTML type
sub process {
    my $self = shift (@_);
    my ($trash, $trash2, $path, $file, $trash3, $doc) = @_;

    print STDERR "PSPlug: passing $_[3] onto $self->{'convert_to'} Plug\n" 
	if $self->{'verbosity'} > 1;
    
    &extract_metadata_from_postscript($self,"$path/$file", $doc);

    return ConvertToPlug::process_type($self,"ps",@_);
}


1;

