#!/usr/bin/perl
#
# VENDPAGE - Small tool to manage WWW pages for Vend
#
# Version 0.2 (alpha)
# Copyright 1995 by Mike Heins <mikeh@iac.net>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

############### Configurable Variables ######################
#
# Where your HTML source tree is
$HtmlDir = '/usr/local/etc/httpd/htdocs';

# Where the vend root directory is
$VendRoot = '/usr/local/lib/minivend';
$PageDir = 'pages';
#
# Where the images are relative to HTTP server DocumentRoot
$ImageDir = '/images';
###############  End Configurable variables  ######################

# Where the Vend PageDir is (set in minivend.cfg)
$VENDPAGE   =	"$VendRoot/$PageDir";
#
# File for reporting on operation
$RPTFILE	=	"$VENDPAGE/vendpage.rpt";

$] == 5.001 or die sprintf
    "Perl version required (5.001) doesn't match executable version (%.3f)\n", $];

=head1 NAME

vendpage - manage HTML pages for Vend

=head1 SYNOPSIS


B<vendpage> [C<->3] [B<-m>] [B<-n>] [B<-i imagepath>] [B<-h htmldir>] [B<-v venddir>]

=head1 DESCRIPTION

This program manages a tree of web pages that need submission to the
Andrew Wilcox's Vend "shopping cart" software.  It recursively scans a
tree of pages and writes a corresponding tree of processed pages.  If
the directory tree does not exist, it will be created.  It takes normal
HTML hyperlinks and changes them based on a few rules.  The rules are:

=over 4

=item 1. 

If the hyperlink is absolute, either beginning with a '/' or a protocol
specification (i.e. http: or mailto:) it is left unchanged.

=item 2.

If the hyperlink is relative, not taking any BASE tag into account,
it is changed to be relative to the Vend I<PageDir> directory.  The
I<A HREF="file"> specification is also changed to the Vend I<[page file]>
specification, preserving the text between the anchors.

=item 3.

If the hyperlink is a simple transfer to a name in the same document, it
is not changed -- i.e. <I<A HREF="#name">>I<anchor text> <I</A>> will not be
touched.

=item 4.

Image specifications that are relative are changed so that the image
must be in the image directory, which unless specified (in the form of a
path relative to the HTTP server I<DocumentRoot>) is I<DocumentRoot/images>.
Spaces between quote marks and URLs are stripped.

=item 5.

Comments that begin with [vend] are stripped and passed on to the Vend
page as Vend tags.  For example:

 <!-- [vend] [order DB-201-A]DB-201-A Antenna[/order] [/vend] -->

becomes:

 [order DB-201-A]DB-201-A Antenna[/order]

Anything between the I<[vend] - [/vend]> pair will be passed on, including
comment terminators and starters.  The possibility exists to nest
HTML and Vend designations so that one source tree can be used for
both Vend and standard HTML.

=back

=head2 Options

=over 5

=item B<-3>

Use Vend 0.3 style page codes with quotes (I<[page "somepage"]>).

=item B<-h>I<htmldir>

The directory which HTML will be taken from, defaults
to F</home/billc/web-public>.  This can be easily changed in the 
source.

=item B<-i>I<imagepath>

The path relative to the HTTP server document root
that IMG sources should be changed to.

=item B<-m>

Rename F<*.htm> files in source tree to F<*.html>.

=item B<-n>

Do not process IMG tags.

=item B<-s>

Silent mode, do not echo file names as they are processed.

=item B<-v>I<directory>

The directory which processed Vend  source will be written to, defaults
to F<VendRoot/vend/pages>.

=back

=head2 Dependencies

Requires the L<perlmod/Getopt::Std> and I<find.pl> library modules.

Requires 'mkdir C<->p' if the directories are to be automatically created.
The File::Path module could easily be patched in to solve this.

=head2 Author

 <A HREF="mailto:mikeh@iac.net>
Mike Heins, Internet Robotics
 </A>

=cut

$USAGE = <<EOF ;

vendpage [-mns] [-i imagepath] [-h htmldir] [-v venddir]
EOF

unshift(@INC,'/home/billc/perl');

use Getopt::Std;
getopts('3h:i:nmr:st:v:') 
	or die "Bad option: $@\n$USAGE\n";

$HtmlDir    = $opt_h || $opt_h || $HtmlDir;
$ImageDir	= $opt_i ? "$opt_i/" : "$ImageDir/";
$RENAME 	= $opt_m || $opt_m || 0;
$NOIMAGE	= $opt_n || $opt_n || 0;
$RPTFILE	= $opt_r || $opt_r || $RPTFILE;
$SILENT		= $opt_s || $opt_s || 0;
$TOOLBAR	= $opt_t || $opt_t;
$VENDPAGE   = $opt_v || $opt_v || $VENDPAGE;
$VEND3		= $opt_3 || $opt_3 || 0 ;

require "find.pl";
sub fixpath;	# Forward declaration
sub pageit;		# Forward declaration
sub wanted;		# Forward declaration

# Log info and errors
# Sends to STDERR if terminated with newline
sub logit {
	for(@_) {
		#warn "$_\n" if s/\n$//;
		print REPORT "$_\n";
	}
}

open(REPORT,">>$RPTFILE")
	or die "Couldn't write report file $RPTFILE: $!\n";

$now = localtime;
logit("Starting page manager at $now");
if(defined $TOOLBAR) {
	$Tooltext = `cat $TOOLBAR`;
}
else { $TOOLBAR = 0; }

$| = 1;
&find($HtmlDir);

sub wanted {
	# For testing, only do one page
	my $pagename;
	my $newname;
	my $pagename;
	my $pagedir;
	my $href;
	my $html;
	local($/ = undef);

	$pagename = $_;

	return unless $name =~ /\.html?$/;

	open(HTMLFILE,$name)
		or do {
			logit("Couldn't open $name: $!\n");
			return undef;
		};

	#Slurp whole file
	local($/) = undef;
	$html = <HTMLFILE>;
	close HTMLFILE;

	# Figure out where it goes
	($pagedir = $dir) =~ s:^$HtmlDir:$VENDPAGE:e;
	$pagename =~ s/\.htm$/.html/ ;
	$pagename = "\L$pagedir/$pagename";

	print "Doing $pagename..." unless $SILENT;

	# Make the directory if it isn't there
	unless (-d $pagedir) {
		system("mkdir -p $pagedir")
			and do {
				logit("Couldn't make directory $pagedir: $!\n");
				return undef;
			};
	}

	open(PAGEFILE,">$pagename")
		or do {
			logit("Couldn't create $pagename: $!\n");
			return undef;
		};
	
	# Here we finally do the transform
	$html =~ s:
            <A \s+ HREF \s*
                =
            \s* ['"]? ( [^'">]+ ) ["']? \s* >
            ([\000-\377]*?)
            <\s*/a\s*>
            :pageit($_, $dir, $1, $2):xigem;
	#$html =~ s#( \[\s*page \s+ ) .*\.\./#$1#xig;
	$html =~ s#<! \s* -- \s* \[ \s* vend \s* \]
				([\000-\377]*?)
				\[ \s* /vend \s* \] \s* --?>#$1#xigm;
	($html =~ s# ( < \s* HR)
			#$Tooltext . $1#xie) if $TOOLBAR;
	($html =~ s# ( < \s* IMG \s+ .*? SRC \s* = \s* ['"]? ) 
			([^"'>\n]+)
			#$1 . fixpath($2)#xigem) unless $NOIMAGE;
	
	print PAGEFILE $html;
	close PAGEFILE;

	# Move a .htm file to a .html file
	if($RENAME && ($newname = $name) =~ s/\.htm$/.html/) {
		(rename $name, $newname
			or logit("Couldn't move $name to $newname: $!\n"))
			and logit("Moved $name to $newname\n");
	}
	print "done.\n" unless $SILENT;

}

sub pageit {

	my $file = shift;
	my $reldir = shift;
	my $page = shift;
	my $anchor = shift;
	my $unchanged = qq|<A HREF="$page">$anchor</A>|;
	my $error = '[page notfound]LINK ERROR[/page]';
	my $done;

	if($page =~
	m~^\s*(/|#|mailto:|http:|ftp:|gopher:|wais:|telnet:|file:)~) {
		return $unchanged;
	}

	# Force all lower case pagenames, strip leading/trailing whitespace
	$page = lc $page;
	$page =~ s:^\s*::;
	$page =~ s:\s*$::;

	# Strip .html extension
	$page =~ s/\.html?\s*//;

	unless ($file and $dir and $page) {
		logit("Bad HREF found in $dir/$file: '$_'");
		return $error;
	}

	# Strip the leading part of basepath
	unless($reldir =~ s:^$HtmlDir/?::) {
		logit("Bad path found in $dir/$file: '$_'");
		return($error);
	}
	$reldir = "$reldir/" if $reldir;

	# Force all lower case pagenames
	$done	= $VEND3
			? ('[page "' .  $reldir .  $page . '"]' . $anchor. '[/page]')
			: ('[page '  .  $reldir .  $page . ']' . $anchor . '[/page]');

}

sub fixpath {

	my $string = shift;

	# don't touch if already absolute

	$string =~ s:^\s*::;
	$string =~ s:\s*$::;
	return($string) if $string =~ m:^/:;
	$string =~ s:.*/::;

	$ImageDir . $string;

}
