#!/usr/bin/perl
#
# $Id: archieplex.pl,v 2.2 1993/07/16 18:50:16 sanders Exp $
#
# ArchiePlex: Archie search to HTML in Perl
# Version 1.01
# Date: Mon Jul  5 08:45:06 BST 1993
#
# Martijn Koster (m.koster@nexor.co.uk)
# Improvements by Tony Sanders (sanders@bsdi.com)
#
# This program is placed in the public domain;
# you are free to redistribute it without restriction,
# but it would be nice if you would leave the credits
# and mention any changes you've made from the original.
#
# Known bugs: sometimes I get a "bad free ignored"...
#
# Requires archie command line client, tested with
# "Client version 1.4.1 based upon Prospero version Beta.4.2E with debugging."

# To install in Plexus:
#
# Change constants at the top of the subroutine
#
# Change plexus.conf to include:
# in the "Scripts to pre-load" section:
#         "$http_sdir/archieplex.pl",
# in the "Mapping functions": section
#        $map{'archieplex'} = 'require "$http_sdir/archieplex.pl";
#              &do_archieplex($_, $top, $rest, $query)';

# uncomment these to debug standalone.
#
#&do_archieplex("archieplex/archie.sura.net/substring/by-host",
#    "archieplex", "archie.sura.net/substring/by-host", "cshrc");

package archieplex;

# Configuration

$archie		= "/xtel/users/mak/bin/archie";
$defaultserver	= "archie.sura.net";
$maintainer	= "-- <A HREF=\"http://web.nexor.co.uk/mak/mak.html\">Martijn koster</A>";
$info		= "<A HREF=\"http://web.nexor.co.uk/archie-plex/info.html\">ArchiePlex info</A>.";

# associations between types and archie commandline arguments

$args{"by-host"}     = "";
$args{"by-date"}     = "-t";

$args{"exact"}	     = "-e";
$args{"substring"}   = "-s";
$args{"subcase"}     = "-c";
$args{"regexp"}      = "-r";

# idem for titles

$titles{"by-host"}   = "";
$titles{"by-date"}   = "by Date";

$titles{"exact"}     = "ArchiePlex Exact Search";
$titles{"substring"} = "ArchiePlex Substring Search";
$titles{"subcase"}   = "ArchiePlex Case Insensitive Substring Search";
$titles{"regexp"}    = "ArchiePlex Regular Expression Search";

# the archieplex subroutine (exported to package main)

sub main'do_archieplex
{
    # $top is the first component of the url, indicating the gateway selector, e.g. "archieplex"
    # $rest is the rest of the URL, e.g. "archie.sura.net/substring/by-host"
    # $simple is $top/$rest
    # $query is a list of search words separated by +, e.g. "gnu+pcnfsd.c"
    local($simple, $top, $rest, $query) = @_;

    # in ArchiePlex a URL $rest looks like <server>/<type>/<order>
    # where server is a domain name or IP address
    # and type is one of "exact", "substring", "subcase", or "regexp"
    #
    # e.g.:  http://web.nexor.co.uk/archieplex/
    #        http://web.nexor.co.uk/archieplex/archie.sura.net/subcase/by-host
    local($server, $type, $order) = split('/', $rest);

    $server = $defaultserver if ($server eq "default" || !$server);
    $order = "by-host" if (!$order);
    $type = "substring" if (!$type);

    # A title consists of the type title and the order title
    local($title) = join(' ', $titles{$type}, $titles{$order});

    $query ? &archie_data : &archie_index;

    print "<P>________________________________________\n";
    print "<ADDRESS>$maintainer</ADDRESS>\n";
    print "</BODY>\n";
}

sub archie_data {
    local(@terms) = split('\+', $query);	# searches are separated by +

    # construct warchie command line. The -l produces a per-line parseable listing.
    local($cmd) = join(' ', $archie, " -l -h ", $server, $args{$type}, $args{$order}, @terms);

    open(ARCHIE_IN, "$cmd |") || &main'error('internal_error', "Can't run archie: $!");

    # process results

    &main'MIME_header('ok', 'text/html');
    print "<HEAD>\n<TITLE>Result for $title</TITLE>\n</HEAD>\n<BODY>\n";
    print "<H1>Result for $title</H1>\n";

    local($listterms) = join(' ', @terms);

    print "These are the results found for the\n";
    print "<A HREF=\"/$top/$server/$type/$order\">$titles{$type}$titles{$order}</A>\n";
    print "for <CODE>$listterms</CODE>\n";

    while(<ARCHIE_IN>) {
	chop;
	local($datetime, $size, $host, $path) = split;
	local($date_fields) = 'A4 A2  A2 A2 A2 A2';	# fixed width date fields
	local($year, $month, $day, $hour, $min, $sec) = unpack($date_fields, $datetime);
	$year = $year - 1900 if ($year >= 1900 && $year < 2000);
	local($datedescription) = join('', $day, '-', $month, '-', $year);

	# maintain sections per host

	if ($host ne $lasthost) {
	    print "</UL>\n" if $lasthost;		# terminate last list
	    print "<H2><A HREF=\"file://$host/\">$host</A></H2>";
	    print "<UL>\n";

	    $lasthost = $host;
	}

	# print per-line info

	if ($path=~/\/$/) {				# it's a directory
	    chop $path;
	    print "<LI>Directory <A HREF=\"file://$host$path\">$path</A> ($datedescription)\n";
	} else {
	    # split path from file

	    local(@comps) = split('/', $path);
	    local($file) = pop(@comps);
	    local($location) = join('/', @comps);

	    local($sizedescription) = join('', $size, " bytes");
	    $sizedescription = join('', int($size / 1000), "K") if ($size > 1000);
	    $sizedescription = join('', int($size / 1000000), "M") if ($size > 1000000);

	    print "<LI>\n";
	    print "<A HREF=\"file://$host$path\">$file</A> $sizedescription ($datedescription) in\n";
	    print "<A HREF=\"file://$host/$location/\">$location/</A>\n";
	}
    }
    print "</UL>\n" if $lasthost;
}

sub archie_index {
    &main'MIME_header('ok', 'text/html');
    print "<HEAD>\n<ISINDEX>\n";
    print "<TITLE>$title</TITLE>\n</HEAD>\n<BODY>";
    print "<H1>$title</H1>\n";

    print "ArchiePlex locates files available for anonymous FTP.\n";
    print "This service uses the Archie server at <CODE>$server</CODE>\n";
    print "to execute the searches.\n";
    print "For more information see $info\n";

    print "<P>Please specify a search term.\n";
    print "<P>See also:\n";

    print "<UL>\n";
    ($order eq "by-date")  && print "<LI><A HREF=\"/$top/$server/substring/by-host\">This search sorted by Host</A>\n";
    ($order eq "by-host")  && print "<LI><A HREF=\"/$top/$server/substring/by-date\">This search sorted by Date</A>\n";
    ($type ne "substring") && print "<LI><A HREF=\"/$top/$server/substring/by-host\">$titles{substring}$titles{by-date}</A>\n";
    ($type ne "subcase")   && print "<LI><A HREF=\"/$top/$server/subcase/by-host\">$titles{subcase}$titles{by-date}</A>\n";
    ($type ne "exact")     && print "<LI><A HREF=\"/$top/$server/exact/by-host\">$titles{exact}$titles{by-date}</A>\n";
    print "</UL>\n";
}

1;
