#!/usr/local/bin/perl
# Require Perl5
#
# indexmaker -- a perl script to make index.html from PDF files,
#               HTML files, VRML files and other files.
#
# by ANFACE Software <anface@geocities.com> 29 November 1998
# Fly to ANFACE! http://fly.to/anface
#
# Copy, use, and redistribute freely, but don't take the company name off it and
# clearly mark an altered version.  Fixes and enhancements cheerfully 
# accepted.
#
# This is version 6.8.
#
use strict;
use Getopt::Long;
use File::Find;
# PDF library v. 1.07 http://fly.to/anface
use PDF;
# libnet http://www.connect.net/gbarr/libnet/
use Net::FTP;
# libwww-perl-5 http://www.sn.no/libwww-perl/
use LWP::UserAgent;

my $version="6.8";
my $configure="indexmaker.cfg";
my $output="index.html";
my $input="";
my $verbose="";
my $help="";
my $recursive=""; my $match="";
my @elem; my %option; my $elem="";

do GetOptions("configure=s" => \$configure,
              "output=s"    => \$output,
              "input=s"     => \$input,
              "recursive=s" => \$recursive,
              "match=s"     => \$match,
              "help"        => \$help,
              "verbose"     => \$verbose) || printusage() ;
@elem=("OUTPUT_file","INPUT_file","proxy","YOUR_email","META_AuthorName",
       "META_AuthorMail","META_Keywords","META_Description","TITLE_title",
       "GIF_pdficon","GIF_getacrobat","PDF_extensions","HTML_extensions",
       "VRML_extensions","Graphic_extensions","Compress_extensions",
       "Executable_extensions");
%option=(OUTPUT_file           => 'index.html',
         INPUT_file            => '',
         proxy                 => '',
         YOUR_email            => 'yourname@yourhost.com',
         META_AuthorName       => 'Fabrizio Pivari',
         META_AuthorMail       => 'pivari@geocities.com',
         META_Keywords         => 'index, maker, HTML, PDF, VRML',
         META_Description      => "index.html made by IndexMaker $version",
         TITLE_title           => "TOC file made by IndexMaker $version",
         GIF_pdficon           => 'pdficon.gif',
         GIF_getacrobat        => 'getacro.gif',
         PDF_extensions        => 'pdf',
         HTML_extensions       => 'html,htm',
         VRML_extensions       => 'vrml,vrl',
         Graphic_extensions    => 'gif,jpeg,jpg,png',
         Compress_extensions   => 'gz,zip,Z',
         Executable_extensions => 'exe');

$help and printusage();
open (CNF, "$configure") || die "indexmaker: couldn't open configuration file $configure\n";
($verbose && $configure) and print "$configure indexmaker configuration file\n";
($verbose && $output) and print "$output indexmaker output file\n";
($verbose && $input) and print "$input indexmaker input file\n";
while (<CNF>) {
  s/\t/ /g;        #replace tabs by space
  next if /^ *\#/; #ignore comment lines
  next if /^ *$/;  #ignore empty lines
  foreach $elem (@elem) {if (/ *$elem *: *(.*)/i) {$option{$elem}=$1;}}
  }
close(CNF);
($verbose && $option{'proxy'}) and print "$option{'proxy'} indexmaker proxy\n";

sub wanted {
  if ($File::Find::name=~/$match/) {
    push @ARGV,$File::Find::name;
    }
  }

if ($match && !$recursive) {
   print "You can use -match option only with -recursive option\n";
   exit;
   }

if ($recursive) { 
  $match=~s/\./\\./g;
  $match=~s/\*/.*/g;
  $match=~s/\?/./g;
  $match=~s/$/\$/;
  find (\&wanted,"$recursive");
  }

$output and $option{'OUTPUT_file'}=$output;
$input  and $option{'INPUT_file'}=$input;

my $HTMLFILES       = ""; my $PDFFILES        = ""; my $VRMLFILES       = "";
my $GRAPHICFILES    = ""; my $COMPRESSEDFILES = ""; my $EXECUTABLEFILES = "";
my $OTHERS          = "";
open(OUT, ">$option{'OUTPUT_file'}")
    || die("indexmaker: couldn't open output file $option{'OUTPUT_file'}\n");
print OUT <<EOF;
<!doctype HTML public "-//W3C//DTD HTML 3.2//EN">
<!-- Made by IndexMaker $version written by Fabrizio Pivari (pivari\@geocities.com) -->
<HTML>
<HEAD>
<LINK REL="Author" HREF=mailto:$option{'META_AuthorMail'}>
<META NAME="Author" CONTENT="$option{'META_AuthorName'}">
<META NAME="Keywords" CONTENT="$option{'META_Keywords'}">
<META NAME="Description" CONTENT="$option{'META_Description'}">
<TITLE>$option{'TITLE_title'}</TITLE>
</HEAD>
<BODY BGCOLOR="\#ffffff">
<H1>$option{'TITLE_title'}</H1>
EOF

if ($option{'INPUT_file'}) {
  my @INURL; my $inurl="";
  open(IN, "$option{'INPUT_file'}")
      || die("indexmaker: couldn't open input file $option{'INPUT_file'}\n");
  while (<IN>) {
    push @INURL,$_;
    }
  close(IN);
  foreach $inurl (@INURL) {push @ARGV,$inurl;}
  }

my (@PDFext,@HTMLext,@VRMLext,@Graphicext,@Compressext,@Executableext);
my $x=""; my $file=""; my $tmp=""; my $i; my @URL;
my $url=""; my $urlfile;
@PDFext=split(/,/,$option{'PDF_extensions'});
@HTMLext=split(/,/,$option{'HTML_extensions'});
@VRMLext=split(/,/,$option{'VRML_extensions'});
@Graphicext=split(/,/,$option{'Graphic_extensions'});
@Compressext=split(/,/,$option{'Compress_extensions'});
@Executableext=split(/,/,$option{'Executable_extensions'});

if (@ARGV) {
NEW: foreach $x (@ARGV) {
       my $directory=""; 
       $urlfile=0;
       $file = $x;
       $i=0;
       if($file =~ /ftp:\/\/(.*)/) {
         @URL = split (/\//,$1);
         if($option{'YOUR_email'} eq "yourname\@yourhost.com") {
           print <<EOF;
WARNING: you are using yourname\@yourhost.com like password for anonymous ftp.
Change it in the configuration file with your e-mail address!
EOF
           }
#Funziona! ma come cambiare la password dell'accesso ftp? e la port del proxy?
         if($option{'proxy'}) {
           my $http = new LWP::UserAgent;
           $option{'proxy'} and $http-> proxy('ftp' => "$option{'proxy'}");
           my $req = new HTTP::Request 'GET', "$file";
           my $res = $http->request($req,"$URL[$#URL]");
           $url=$file;
           $file=$URL[$#URL];
           $urlfile=1;
           }
         else {
           $directory.="/";
           $directory.=join ("/",@URL[1..($#URL-1)]);
           my $ftp = Net::FTP->new("$URL[$0]");
           $ftp->login("anonymous",$option{'YOUR_email'});
           $ftp->cwd("$directory");
           $ftp->get("$URL[$#URL]");
           $ftp->quit;
           $url=$file;
           $file=$URL[$#URL];
           $urlfile=1;
           }
         }
       if($file =~ /http:\/\/(.*)/) {
         @URL = split (/\//,$1);
         my $http = new LWP::UserAgent;
         $option{'proxy'} and $http-> proxy('http' => "$option{'proxy'}");
         my $req = new HTTP::Request 'GET', "$file";
         my $res = $http->request($req,"$URL[$#URL]");
         $url=$file;
         $file=$URL[$#URL];
         $urlfile=1;
         }
       open(STDIN, "$file") || die ("indexmaker: couldn't open $file for input\n");
       foreach $tmp (@PDFext) {if ($file =~ /\.$tmp$/i) {&pdffile;next NEW;}}
       foreach $tmp (@HTMLext) {if ($file =~ /\.$tmp$/i) {&htmlfile;next NEW;}}
       foreach $tmp (@VRMLext) {if ($file =~ /\.$tmp$/i) {&vrmlfile;next NEW;}}
       foreach $tmp (@Graphicext) {
         if ($file =~ /\.$tmp$/i) {
           if($urlfile) {
             $GRAPHICFILES .= "<LI><A HREF=\"$url\">$url<\/A></LI>\n";
             $verbose and print "Analysing graphic    file: $url\n";
             unlink $file;
             }
           else {
             $GRAPHICFILES .= "<LI><A HREF=\"$file\">$file<\/A></LI>\n";
             $verbose and print "Analysing graphic    file: $file\n";
             }
           next NEW;
           }
         }
       foreach $tmp (@Compressext) {
         if ($file =~ /\.$tmp$/i) {
           if($urlfile) {
             $COMPRESSEDFILES .= "<LI><A HREF=\"$url\">$url<\/A></LI>\n";
             $verbose and print "Analysing compressed file: $url\n";
             unlink $file;
             }
           else {
             $COMPRESSEDFILES .= "<LI><A HREF=\"$file\">$file<\/A></LI>\n";
             $verbose and print "Analysing compressed file: $file\n";
             }
           next NEW;
           }
         }
       foreach $tmp (@Executableext) {
         if ($file =~ /\.$tmp$/i) {
           if($urlfile) {
             $EXECUTABLEFILES .= "<LI><A HREF=\"$url\">$url<\/A></LI>\n";
             $verbose and print "Analysing executable file: $url\n";
             unlink $file;
             }
           else {
             $EXECUTABLEFILES .= "<LI><A HREF=\"$file\">$file<\/A></LI>\n";
             $verbose and print "Analysing executable file: $file\n";
             }
           next NEW;
           }
         }
       if($urlfile) {
         $OTHERS .= qq!<LI><A HREF="$url">$url<\/A></LI>\n!;
         $verbose and print "Analysing others         : $url\n";
         unlink $file;
       }
       else {
         $OTHERS .= qq!<LI><A HREF="$file">$file<\/A></LI>\n!;
         $verbose and print "Analysing others         : $file\n";
         }
       }
     }
else {printusage();}

$HTMLFILES and print OUT "<H2>HTML files</H2>\n<UL>\n$HTMLFILES\n<\/UL>\n";
if ($PDFFILES) {
  print OUT <<EOF;
<H2>PDF files <A HREF="http://www.adobe.com/prodindex/acrobat/readstep.html">
<IMG BORDER=0 SRC="$option{'GIF_getacrobat'}" WIDTH=88 HEIGHT=31></A></H2>
<UL>
$PDFFILES</UL>
EOF
  }
$VRMLFILES and print OUT "<H2>VRML files</H2>\n<UL>\n$VRMLFILES\n<\/UL>\n";
$GRAPHICFILES and print OUT "<H2>Graphic files</H2>\n<UL>\n$GRAPHICFILES\n<\/UL>\n";
$COMPRESSEDFILES and print OUT "<H2>Compressed files</H2>\n<UL>\n$COMPRESSEDFILES\n<\/UL>\n";
$EXECUTABLEFILES and print OUT "<H2>Executable files</H2>\n<UL>\n$EXECUTABLEFILES\n<\/UL>\n";
$OTHERS and print OUT "<H2>Others</H2>\n<UL>\n$OTHERS\n<\/UL>\n";
print OUT "<\/BODY>\n<\/HTML>\n";
close(OUT);

sub pdffile {
  my $author=""; my $description=""; my $title="";
  my $pdf=PDF->new($file);
  $author=$pdf->GetInfo("Author");
  $author=&pdfspecialchar($author);
  $author=&specialchar($author);
  $title=$pdf->GetInfo("Title");
  $title=&pdfspecialchar($title);
  $title=&specialchar($title);
  $description=$pdf->GetInfo("Subject");
  $description=&pdfspecialchar($description);
  $description=&specialchar($description);
  !$title and $title=$file;
  if($urlfile){
    $PDFFILES .= "<LI><A HREF=\"$url\">$title<\/A> ";
    $verbose and print "Analysing PDF        file: $url\n";
    unlink $file;
    }
  else {
    $PDFFILES .= "<LI><A HREF=\"$file\">$title<\/A> ";
    $verbose and print "Analysing PDF        file: $file\n";
    }
  $author and $PDFFILES .= "written by $author ";
  $description and $PDFFILES .= "<BR>\nDescription: $description\n";
  $PDFFILES .= "<IMG SRC=\"$option{'GIF_pdficon'}\" WIDTH=34 HEIGHT=20>\n<\/LI>\n";
  }

sub htmlfile {
  my $author=""; my $description=""; my $title="";
  my $META=""; my $TITLE="";
  while (<STDIN>) {
    if(/<META/i .. />/) {
      $META.=$_;
      $META=~s/\n/ /;
      if ($META=~/NAME=\"Author\" *CONTENT=\"([^"]*)\"/i) {$author=$1;}
      if ($META=~/NAME=\"Description\" * CONTENT=\"([^"]*)\"/i) {$description=$1;}
      }
    if(/<TITLE>/i .. /<\/TITLE>/i) {
      $TITLE.=$_;
      $TITLE=~s/\n/ /;
      $TITLE=~/<TITLE>(.*)<\/TITLE>/i;
      $title=$1;
      }
    }
  !$title and $title=$file;
  if($urlfile){
    $HTMLFILES .=  "<LI><A HREF=\"$url\">$title<\/A> ";
    $verbose and print "Analysing HTML       file: $url\n";
    unlink $file;
    }
  else {
    $HTMLFILES .=  "<LI><A HREF=\"$file\">$title<\/A> ";
    $verbose and print "Analysing HTML       file: $file\n";
    }
  $author and $HTMLFILES .= "written by $author";
  $description and $HTMLFILES .= "<BR>\nDescription: $description\n";
  $HTMLFILES .= "</LI>\n";
  }

sub vrmlfile {
  my $title=""; my $description=""; my $VRML="";
  while (<STDIN>) {
# VRML 2.0
    if(/WorldInfo/i .. /}/) {
      $VRML.=$_;
      $VRML=~s/\n/ /;
      if ($VRML=~/info.*\[([^\[]*)\]/i) {$description = &specialchar($1);}
      if ($VRML=~/title.*\"([^"]*)\"/i) {$title = &specialchar($1);}
      }
# VRML 1.0
    else {
      $VRML.=$_;
      $VRML=~s/\n/ /;
      if ($VRML=~/Info.*\"([^"]*)\"/i) {$description = &specialchar($1);}
      }
    }
  !$title and $title = $file;
  if($urlfile){
    $VRMLFILES .= "<LI><A HREF=\"$url\">$title<\/A> $description\n</LI>\n";
    $verbose and print "Analysing VRML       file: $url\n";
    unlink $file;
    }
  else {
    $VRMLFILES .= "<LI><A HREF=\"$file\">$title<\/A> $description\n</LI>\n";
    $verbose and print "Analysing VRML       file: $file\n";
    }
  }

sub specialchar {
  my $char = shift(@_);
  $char =~ s/&/&amp;/g;
  $char =~ s/</&lt;/g;
  $char =~ s/>/&gt;/g;
  return($char);
  }

sub pdfspecialchar {
  my $char = shift(@_);
  $char =~ s/\\\n/ /;
  $char =~ s/\\\\/\\/g;
  $char =~ s/\\\(/(/g;
  $char =~ s/\\\)/)/g;
  return($char);
  }

sub printusage {
    print <<USAGEDESC;

usage:
        indexmaker [-options ...] list

where options include:
    -help                        print out this message
    -verbose                     verbose
    -recursive directory         scan recursively the directory
    -match     files             match different files ex. *.pdf, a?.*
                                 (require -recursive option)
    -configure file              default indexmaker.cfg
    -output    file              default index.html
    -input     file              a collection of input files and urls

list:
    with list you can use metacharacters and relative and absolute path name
    and ftp URL like ftp://ftp.host.com/directory/file
    and http URL like http://www.host.com/directory/file

example:
    indexmaker *.pdf
    indexmaker -c tests/test.cfg ftp://ftp.host.com/directory/file *.pdf
    indexmaker -v */*.html
    indexmaker -o home.htm *.gif *.tiff *.jpeg
    indexmaker -m *.pdf -r my_directory *.gz

If you want to know more about this tool, you might want
to read the docs. They came together with indexmaker!

Home: http://fly.to/anface

USAGEDESC
    exit(1);
}
