#!/u3/thesis/clipper/bin/perl
# Copyright (C) Khun Yee Fung clipper@csd.uwo.ca 1991
#
# Modified by Peter Orbaek (poe@daimi.aau.dk) to look more perl'ish.
#
# You can do anything to this program except selling it for profit or
# pretending you wrote it. The copyright notice must be preserved in all 
# copies. Absolutely no warranty.
#
# $Id: archie,v 3.8 1991/08/12 17:05:18 clipper Exp clipper $
#
# This version of the program is based on Beta 4.2 of prospero protocol.
# The Version number of this release is $Revision: 3.8 $.

eval "exec perl -S $0 $*"
	if $running_under_some_shell;

require 'resolver.pl';
require 'sys/socket.ph';
require 'newgetopt.pl';
require 'archie.depend';
$servername =~ tr/A-Z/a-z/;

select(STDOUT); $| = 1;

# To get the options on the command line. Explanations are in the code
# handling them.
&NGetOpt('match=i', 'reg', 'exact', 'nocase', 'case', 'server=s',
	 'ffile=s', 'format=s', 'along', 'norc', 'syntax', 'version',
	 'sort=s', 'reverse', 'rc=s', 'domain=s', 'aftp');

# Get the name of this program. The last element is the one.
@prog = split('/', $0);
$prog = $prog[$#prog];

# Usage string.
# The options -syntax and -aftp are invisible because -syntax is used only
# to check the syntax of the program and -aftp is useful only for the archie
# interface of the nftp program.
$usage = 
"Usage: $prog [options] word1 word2 ...
  Where options are one or more of the following:
  -along            Print the entries when they are available.
  -case             Case sensitive
  -nocase           Case insensitive
  -exact            Exact match
  -reg              Regular expression match
  -match \#          Max hits
  -server hostname  An alternative archie server
  -ffile filename   Use a format file
  -format string    Specify a format string
  -norc             Do not read .archierc file in home directory.
  -version          Print the version number of the program.
  -rc filename      Read another file as the startup file.
  -sort [date|host] Sort by date ot host.
  -reverse          Reverse sorting order.
  -domain string    Use the order in the string to sort the hosts.
";

($Revision) = ('$Revision: 3.8 $' =~ /Revision: ([\d\.]+)/);
$version = "Prospero Beta.4.2 (Perl Archie Client Version $Revision)\n";

# Should have at least one query.
if ($#ARGV < 0) {
    if (defined($opt_version)) {
        print $version;
        exit(0);
    }
    print "Please specify at least one query.\n";
    print $usage;
    exit(255);
}
@string = @ARGV;

%domainorder = ('ca', 1, 'edu', 2, 'com', 3, 'gov', 4, 'net', 5,
		'de', 6, 'dk', 7, 'nl', 8, 'fi', 9, 'se', 10,
		'au', 1000, 'nz', 1001);

# For the conversion of date in the subroutine date.
%month = ('Jan', 1, 'Feb', 2, 'Mar', 3, 'Apr', 4, 'May', 5, 'Jun', 6, 
	  'Jul', 7, 'Aug', 8, 'Sep', 9, 'Oct', 10, 'Nov', 11, 'Dec', 12);
@month = ('', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 
	  'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec');

# The archie port number is 1525.
$port = 1525;

# The socketaddr structure. See /usr/include/sys/socket.h for the C 
# version.
$sockaddr = 'S n a4 x8';

# Defaults: maximum hit is 40. It does not mean there will be exactly
# 40 entries returned, though. Expect a few entries more or less.
# The default search option is case insensitive.
$match = 40; $case = 'S'; $pnum = 1;

# The default format string. Can be overiden by the -format or -ffile 
# options. Can also specify a default format string in ~/.archierc
$format = "%02seq Host %host

    Location: %dir
      %10type %mode %08size %date %name

";

# To get the user name and user home path.
@pw = getpwuid($<);
$user = $pw[0];
$userpath = $pw[7];

# Read the system startup file if there is one. Set the filename in
# archie.depend.

&parserc($startup);

$startfile = defined($opt_rc) ? $opt_rc : "$userpath/.archierc";
$along = defined($opt_along);
&parserc($startfile) unless (defined($opt_norc));  # Read ~/.archierc?
$match = $opt_match if (defined($opt_match)); # how many hits wanted?
print $version      if (defined($opt_version)); # Print version number?
&pdomain($opt_domain) if (defined($opt_domain)); # Get a domain order?

# The sort option. Default is by the domains of the hosts.
$sortpack = 'host';
if ($opt_sort) {
    if ($opt_sort eq 'date') {
	$sortpack = 'date';
    }
    elsif ($opt_sort eq 'host') {
	$sortpack = 'host';
    }
    else {
	print "Not valid sort field: $opt_sort. Assume host.\n";
	$sortpack = 'host';
    }
}
$reversesort = defined($opt_reverse);

# Read a format string from a file.
if (defined($opt_ffile)) {
    open(FFILE, "$opt_ffile") || die "Can't open format file $opt_ffile\n";
    # slurp in the whole file
    undef $/; $format = <FFILE>; $/ = "\n";
    close FFILE;
}

# Read a format string on the command line.
$format = $opt_format if (defined($opt_format));

# Set the search option.
$case = $ecase = '=' if (defined($opt_exact));  # Exact match
$case = 'C' if (defined($opt_case));   # Set search option to case sensitive.
$case = 'S' if (defined($opt_nocase)); # Set search option to case insensitive.
$case = 'R' if (defined($opt_reg));    # search using a regular expression.
$case =~ tr/A-Z/a-z/ if ($ecase eq '=');

# set a new archie server.
if (defined($opt_server)) {
    $serverip = $servername = $opt_server;
    $servername =~ tr/A-Z/a-z/;
}

# Support for a aftp pipe. [Useful only for the program nftp.]
$format = "%type:%host:%dir\n" if ($opt_aftp);

# parse the format string,
$format = &parseformat($format);

# This is for checking the format etc. Not for external use :-)
if ($opt_syntax) {
    print "Execution until here.\n";
    exit(0);
}

# Get the IP address of the archie server.
if ($serverip =~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/) {
  $thataddr = pack("CCCC", $1, $2, $3, $4);
  $serverip = $servername;
}
elsif (!(($name, $aliases, $type, $len, $thataddr) = 
	 gethostbyname($servername))) {
    $thataddr = &resolver($servername, $nsserver) || 
	die "Can't find the IP address of the archie server $servername\n";
    $serverip = join('.', unpack("CCCC", $thataddr));
}
else {
    $serverip = join('.', unpack("CCCC", $thataddr));
}

$them = pack($sockaddr, &AF_INET, $port, $thataddr);

# now construct our own address
# dnb@meshugge.media.mit.edu gave the patch to satisfy taintperl. 
$PATH = $ENV{'PATH'};
$ENV{'PATH'} = '/bin:/usr/bin';
chop($thishost = `hostname`);
$ENV{'PATH'} = $PATH;
($name, $aliases, $type, $len, $thisaddr) = gethostbyname($thishost);
$us = pack($sockaddr, &AF_INET, 0, $thisaddr);

# get and bind a socket.
socket(DATA, &AF_INET, &SOCK_DGRAM, 0) || die "socket:$!\n";
bind(DATA, $us) || die "bind: $!\n";

# Get the list of matches.
@lists = &list($them, $user, $match, @string);

# Print them.
&result(@lists) unless ($along);

close(DATA);

sub getpacket {
    local($restime) = @_;
    local($seq, $rin, $timeleft, $rout, $ans, $id, $hbyte, $rdp, $hdr_len);
    local($header, $backoff, $kk, $dum, $flags, $wantack, $pktsnum, $nfound);
    $seq = 0;

    # wait for a packet to come back.
    $rin = '';
    vec($rin, fileno(DATA), 1) = 1;
    ($nfound, $timeleft) = select($rout = $rin, '', '', $restime);
    if ($timeleft == 0 || ord($rout) == 0){
	return(0);
    }

    # Read a packet from the server.
    $ans = '';
    recv(DATA, $ans, 10000, 0) || die "recv: Can't recv. Die.\n";

    $hbyte = ord(substr($ans, 0, 1));
    $header = '';
    if ($hbyte < 20) {
	$rdp = ($hbyte & 0xc0) >> 6;
	$hdr_len = $hbyte & 0x3F;
	$header = substr($ans, 0, $hdr_len);
	substr($ans, 0, $hdr_len) = '';
	$backoff = $seq = $kk = $flags = 0;
	($dum, $id, $seq, $kk, $dum, $backoff, $flags) =
	    unpack("Cnnnnnn", $header);
	# Should I acknowledge?
	$wantack = (($flags & 0x8000) != 0);
	$pktsnum = ($kk) ? $kk : 0;
	$timeout = $backoff if ($backoff);
    }
    else {
	$seq = 1;
	$pktsnum = 1;
	$wantack = 0;
	$timeout = 0;
    }
    return (1, $seq, $wantack, $pktsnum, $timeout, $ans);
}

# The subroutine list is the `meat' of the query.
# It sends the query to the archie server host and parses the entries
# returned by the server.
sub list {
    local($them, $user, $match, @words) = @_;
    local($ans, $timeout, $retries, $lines, @lines);
    local($pktsnum, $pktseq, $seq, $timeleft, $acktime);
    local($dum, $backoff, $word, $index, @received, $recthrough);
    local($sq, $waxk, $pkts, $tout);
    
    $timeout = 4;
    $retries = 3;
    $acktime = 0.3;
    @received = ('YES');

    # Construct the query packet.
    @lines = ("VERSION 1\n", "AUTHENTICATOR UNAUTHENTICATED $user\n");
    foreach $word (@words) {
	push(@lines, "DIRECTORY ASCII ARCHIE/MATCH($match,0,$case)/$word\n");
	push(@lines, "LIST ATTRIBUTES COMPONENTS \n");
    }
    $lines = join('', @lines);

    $recthrough = 0;

  RETRY: 
    {
	$head = pack("Cnnnn", 9, $$, 1, 1, $recthrough);
	send(DATA, $head . $lines, 0, $them)
	    || die "send: Failed to send packet: $!";

	$pktsnum = 0;
	while ($pktsnum == 0 || $pktseq < $pktsnum) {
	    $restime = $timeout;
	    ($res, $sq, $wack, $pkts, $tout, $ans) = &getpacket($restime);
	    if (!$res){
		if ($retries-- > 0) {
		    $timeout *= 2;
		    redo RETRY;
		}
		else {
		    die "No responses from the archie server.\n";
		}
	    }
	    do {
		$seq = $sq;
		$timeout = $tout if ($tout);
		$pktsnum = $pkts if ($pkts);
		$wantack++ if ($wack);
		if ($seq) {
		    if ($received[$seq] ne 'YES') {
			# not duplicate packet.
			$retries = 3;
			foreach $i (($#received + 1) .. ($seq - 1)) {
			    $received[$i] = "NO $i";
			}
			$received[$seq] = 'YES';
			$ans =~ s/\000//g;
			$answer[$seq] = $ans unless ($recthrough >= $seq);
			@notyet = grep(/^NO/, @received);
			if ($#notyet < 0) {
			    $recthrough = $#received;
			    $pktseq = $#received;
			}
			else {
			    $notyet[0] =~ /NO (\d+)$/;
			    $recthrough = $1 - 1;
			    $pktseq = $1 - 1;
			}
			if ($along) {
			    &alongtheway($recthrough, 0);
			}
		    }
		    if ($pktsnum == 0 || $pktseq < $pktsnum) {
			($res, $sq, $wack, $pkts, $tout, $ans) = 
			    &getpacket($acktime);
		    }
		    else {
	                $head = pack("Cnnnn", 9, $$, 1, 1, $recthrough);
			last;
		    }
		}
	    } until (!$res || $seq == 0);
	    $head = pack("Cnnnn", 9, $$, 1, 1, $recthrough);
	    if ($wantack) {
		send(DATA, $head . $lines, 0, $them)
		    || die "send: Failed to send an acknowledgement: $!";
		$wantack = 0;
	    }
	}
    }
    if ($wantack) {
	send(DATA, $head . $lines, 0, $them)
	    || die "send: Failed to send an acknowledgement: $!";
    }
    if ($along) {
	&alongtheway($recthrough, 1);
    }
    @answer;
}

# Print the entries in a packet.
sub parselist {
    local(@lists) = @_;
    local(@lines, $dum, $lastmod, $modes, $size, $dir, $entry);
    local($name, @attr, @ainfo, $type);

    $entry = 0;
    # split the lines in the packet first.
    @lines = split(/\n/, join('', @lists));
    foreach $line (@lines) {
	# If a LINK L line, then get the initial fields for the
	# entry. Output the last entry if there is one.
	if ($line =~ /^LINK L/) {
	    &store($host, $type, $dir, $size, $modes, $lastmod, $name) 
		if ($entry);
	    $type = $name = $host = $dir = '';
	    $size = $modes = $lastmod = '';
	    $#attr = $#ainfo = -1;
	    ($dum, $dum, $type, $name, $dum, $host, $dum, $dir, $dum, $dum) =
		split(/ /, $line);
	    $host =~ tr/A-Z/a-z/;
	    $entry = 1;
	}
	elsif ($line =~ /^LINK /) {
	    # What should I do if the response is LINK but not L?
	}
	elsif ($line =~ /^LINK-INFO/) {
	    # A LINK-INFO line. Get one attribute per line.
	    ($dum, $dum, $attr, $dum, @info) = split(/ /, $line);
	    if ($attr eq 'SIZE') {
		$size = join(' ', @info);
	    }
	    elsif ($attr eq 'UNIX-MODES') {
		$modes = join(' ', @info);
	    }
	    elsif ($attr eq 'LAST-MODIFIED') {
		$lastmod = join(' ', @info);
	    }
	    else {
		push(@attr, $attr);
		push(@ainfo, join(' ', @info));
	    }
	}
	elsif ($line =~ /^VERSION-NOT-SUPPORTED TRY (\d+)-(\d+),(\d+)/) {
	    die "Version of archie server ($1-$2, $3) not supported.\n";
	}
	elsif ($line =~ /^NOT-A-DIRECTORY/) {
	    print "Archie error: Not a directory.\n";
	}
	elsif ($line =~ /^UNRESOLVED/) {
	    print "Archie error: Unresolved entries.\n";
	}
	elsif ($line =~ /^FILTER/) {
	}
	elsif ($line =~ /^OBJECT-INFO/) {
	}
	elsif ($line =~ /^NONE-FOUND/) {
	}
	elsif ($line =~ /^SUCCESS/) {
	}
	elsif ($line =~ /^FORWARDED/) {
	    print "Archie error: No forwarding allowed.\n";
	}
	elsif ($line =~ /^FAILURE/) {
	    print "Archie server returns error. \n";
	    if ($line =~ /^FAILURE NOT-AUTHORIZED/) {
		print "Probably Max. hit too high. Use smaller -match value\n";
	    }
	    else {
		print "The error message is:\n";
		print $line;
	    }
	}
	elsif ($line =~ /^NOT-AUTHORIZED/) {
	    print "Archie error: Not authorized.\n";
	}
	else {
	}
    }
    &store($host, $type, $dir, $size, $modes, $lastmod, $name) if ($entry);
}

# Write the fields out on terminal using the format string.
sub write {
    local($host, $type, $dir, $size, $mode, $lastmod, $name) = @_;
    local($seq, @path, $date, $path);

    # Convert the date string from 19910713123250Z to
    # 1991 Jul 13 12:32:50 GMT
    $date = ($lastmod eq '') ? 'No Date' : &date($lastmod);
    $seq = $pnum++;

    # print the entry. Die if something is wrong. Should I
    # Log the output in a file so the effect is not wasted?
    eval "printf $format"
	|| die "A syntax error occured when printing the format string: $@\n";
}

# Convert a string.
sub date {
    local($date) = @_;
    local($year, $month, $day, $hour, $min, $sec) =
	(0, 1, 0, 0, 0, 0);
    local($zone) = 'Z';

    ($year, $month, $day, $hour, $min, $sec, $zone) = 
	($date =~ /^(\d\d\d\d)(\d\d)(\d\d)(\d\d)(\d\d)(\d\d)(.*)/);

    # A time zone Z is the same as GMT.
    if ($zone eq 'Z') {
	$zone = 'GMT';
    }
    "$year $month[$month] $day $hour:$min:$sec $zone";
}

# Parse the format string to convert it to a valid perl format
# string.
sub parseformat {
    local($string) = @_;
    local($nstring, $index, @plist);

    $string =~ s/([\$\{\}\@\*])/\\$1/g;
    $nstring = '';
    $#plist = -1;
    while (($index = index($string, '%')) >= 0) {
	$nstring .= substr($string, 0, $index);
	substr($string, 0, $index) = '';
	if (substr($string, 1, 1) eq '%') {
	    substr($string, 0, 2) = '';
	    $nstring .= '%%';
	}
	elsif ($string =~ /^\%(\d*)(host|dir|mode|date|seq|size|name|type)/) {
	    push(@plist, "\$$2");
	    if ($2 eq 'size' || $2 eq 'seq') {
		$nstring .= "\%$1d";
	    }
	    else {
		$nstring .= "\%$1s";
	    }
	    substr($string, 0, length($1 . $2) + 1) = '';
	}
	else {
	    die sprintf("$prog: Format error. Unknown field: %s\n", $string);
	}
    }
    $nstring .= $string;
    $nstring = '"' . $nstring . '"';
    join(', ', $nstring, @plist);
}

# Parse the startup file ~/.archierc
# The format of the file is very simple:
# command option
# The format command must be the last one.
sub parserc {
    local($startfile) = @_;
    local($domain, @domain);
    if (-e $startfile && -r $startfile) {
	open (RC, $startfile) || return;
	while (<RC>) {
	    chop;
	    if (/^\s*match\s+(\d+)\s*$/) {
		$match = $1;
	    }
	    elsif (/^\s*sort\s+/) {
		if (/^\s*sort\s+date\s*$/) {
		    $sortpack = 'date';
		}
		elsif (/^\s*sort\s+host\s*$/) {
		    $sortpack = 'host';
		}
		else {
		    print "Unknown sort field in startup file: $startfile\n";
		}
	    }
	    elsif (/^\s*domain\s+(.*)$/) {
		&pdomain($1);
	    }
	    elsif (/^\s*search\s+([a-z]+)\s*$/) {
		if ($1 eq 'case') {
		    $case = 'C';
		}
		elsif ($1 eq 'nocase') {
		    $case = 'S';
		}
		elsif ($1 eq 'reg') {
		    $case = 'R';
		}
		elsif ($1 eq 'exact') {
		    $case = '=';
		}
		else {
		    print "$prog: $user/.archierc: unknown search option $1\n";
		}
	    }
	    elsif (/^\s*host\s+(.+)\s*$/) {
		$archieserver = $1;
	    }
	    elsif (/^\s*format\s*$/) {
		undef $/; $format = <RC>; $/ = "\n";
		last;
	    }
	    elsif ($_ =~ /^\s*$/ || $_ =~ /^\s*\#/) {
		# Empty or comment line in the startup file.
	    }
	    else {
		print "$prog: Unknown option in $user/.archierc: $_\n";
	    }
	}
	close(RC);
    }
}

sub store {
    local($host, $type, $dir, $size, $mode, $lastmod, $name) = @_;

    $type = ($type eq 'DIRECTORY') ? 'Directory' : 'File';
    if ($type eq 'Directory' && $dir =~ m.ARCHIE/HOST.) {
	($archie, $dum, $host, $dir) = 
	    ($dir =~ m|([^/]+)/([^/]+)/([^/]+)/(.*)$|);
	$dir = '/' . $dir;
    }
    push(@s_lastmod, $lastmod);
    push(@s_name, $name);
    push(@s_host, $host);
    push(@s_type, $type);
    push(@s_dir, $dir);
    push(@s_size, $size);
    push(@s_mode, $mode);
}

sub result {
    local(@lists) = @_;
    local(%entries, $host, $index, @order, @host, $order, $field);
    $#s_lastmod = -1;
    $#s_name = -1;
    $#s_host = -1;
    $#s_type = -1;
    $#s_dir = -1;
    $#s_mode = -1;
    $#s_size = -1;
    &parselist(@lists);
    $index = 0;
    %entries = ();
    @field =  ($sortpack eq 'date') ? @s_lastmod : @s_host;
    foreach $field (@field) {
	$entries{$field} .= "$index ";
	$index++;
    }
    @order = ($sortpack eq 'date') ? sort sortdate @s_lastmod : 
	sort sorthost @s_host;
    foreach $order (@order) {
	if ($entries{$order} ne '') {
	    @indexes = split(' ', $entries{$order});
	    foreach $i (@indexes) {
		&write($s_host[$i], $s_type[$i], $s_dir[$i],
		       $s_size[$i], $s_mode[$i], $s_lastmod[$i], $s_name[$i]);
	    }
	    $entries{$order} = '';
	}
    }
}

sub sorthost {
    local($t);
    local($c, $d);
    @c = split(/\./, $a);
    @d = split(/\./, $b);
    $domainorder{$c[$#c]} = 1100 if ($domainorder{$c[$#c]} eq '');
    $domainorder{$d[$#d]} = 1100 if ($domainorder{$d[$#d]} eq '');
    $t = ($domainorder{$c[$#c]} > $domainorder{$d[$#d]}) ? 1 :
	($domainorder{$c[$#c]} < $domainorder{$d[$#d]}) ? -1 : 0;
    ($reversesort) ? -$t : $t;
}

sub sortdate {
    local($t);
    local(@c, @d, $c, $d, $e, $f);
    $c = $a; $d = $b;
    @c = split(/ /, $c);
    @d = split(/ /, $d);
    $e = join(' ', $c[0], "$month{$c[1]}", @c[2 .. 6]);
    $f = join(' ', $d[0], "$month{$d[1]}", @d[2 .. 6]);
    $t = $e gt $f ? 1 : $e lt $f ? -1 : 0;
    ($reversesort) ? -$t : $t;
}

sub pdomain {
    local($list) = @_;
    local($domain, @domain, $index);
    @domain = split(/ /, $list);
    $index = 0;
    foreach $domain (@domain) {
	$domainorder{$domain} = $index;
	$index++;
    }
}    

sub alongtheway {
    local($through, $all) = @_;
    return if ($queuehead > $through);
    local(@link, @part, @part1);
    @part = split(/\n/, join('', @answer[$queuehead .. $through]));
    if (!$all) {
	while(($line = pop(@part)) !~ /^LINK L/) {
	    unshift(@part1, $line);
	}
	unshift(@part1, $line) unless ($line eq '');
	$answer[$through] = join("\n", @part1);
	$answer[$through] .= "\n";
	$queuehead = $through;
    }
    &result(join("\n", @part));
}
