: # *-*-perl-*-*
    eval 'exec perl -S $0 "$@"'
    if $running_under_some_shell;  
#
#  ftpenum.pl - Enumerate FTP directories
#
#  Usage: ftpenum.pl hostname directory login password
#
#  Description: FTP hostname, cd to directory, ls -lR, and return
#  a complete list of URL <tab> timestamp, where timestamp is the 
#  UNIX time(3) in decimal.  Ignores all symbolic links.
#
#  If the directory contains an ls-lR.gz, or ls-lR.Z, or ls-lR file,
#  then ftpenum.pl uses that file rather than the remote LIST command
#  to retrieve the recursive directory listing.
#
#  Jim Guyton & Darren Hardy, hardy@cs.colorado.edu, April 1994
#
#  $Id: ftpenum.pl,v 1.21 1995/09/11 22:29:34 duane Exp $
#
#######################################################################
#
#  Copyright (c) 1994, 1995.  All rights reserved.
#  
#    The Harvest software was developed by the Internet Research Task
#    Force Research Group on Resource Discovery (IRTF-RD):
#  
#          Mic Bowman of Transarc Corporation.
#          Peter Danzig of the University of Southern California.
#          Darren R. Hardy of the University of Colorado at Boulder.
#          Udi Manber of the University of Arizona.
#          Michael F. Schwartz of the University of Colorado at Boulder.
#          Duane Wessels of the University of Colorado at Boulder.
#  
#    This copyright notice applies to software in the Harvest
#    ``src/'' directory only.  Users should consult the individual
#    copyright notices in the ``components/'' subdirectories for
#    copyright information about other software bundled with the
#    Harvest source code distribution.
#  
#  TERMS OF USE
#    
#    The Harvest software may be used and re-distributed without
#    charge, provided that the software origin and research team are
#    cited in any use of the system.  Most commonly this is
#    accomplished by including a link to the Harvest Home Page
#    (http://harvest.cs.colorado.edu/) from the query page of any
#    Broker you deploy, as well as in the query result pages.  These
#    links are generated automatically by the standard Broker
#    software distribution.
#    
#    The Harvest software is provided ``as is'', without express or
#    implied warranty, and with no support nor obligation to assist
#    in its use, correction, modification or enhancement.  We assume
#    no liability with respect to the infringement of copyrights,
#    trade secrets, or any patents, and are not responsible for
#    consequential damages.  Proper use of the Harvest software is
#    entirely the responsibility of the user.
#  
#  DERIVATIVE WORKS
#  
#    Users may make derivative works from the Harvest software, subject 
#    to the following constraints:
#  
#      - You must include the above copyright notice and these 
#        accompanying paragraphs in all forms of derivative works, 
#        and any documentation and other materials related to such 
#        distribution and use acknowledge that the software was 
#        developed at the above institutions.
#  
#      - You must notify IRTF-RD regarding your distribution of 
#        the derivative work.
#  
#      - You must clearly notify users that your are distributing 
#        a modified version and not the original Harvest software.
#  
#      - Any derivative product is also subject to these copyright 
#        and use restrictions.
#  
#    Note that the Harvest software is NOT in the public domain.  We
#    retain copyright, as specified above.
#  
#  HISTORY OF FREE SOFTWARE STATUS
#  
#    Originally we required sites to license the software in cases
#    where they were going to build commercial products/services
#    around Harvest.  In June 1995 we changed this policy.  We now
#    allow people to use the core Harvest software (the code found in
#    the Harvest ``src/'' directory) for free.  We made this change
#    in the interest of encouraging the widest possible deployment of
#    the technology.  The Harvest software is really a reference
#    implementation of a set of protocols and formats, some of which
#    we intend to standardize.  We encourage commercial
#    re-implementations of code complying to this set of standards.  
#  
#
$| = 1;		# everything is written to stdout immediately
$ENV{'HARVEST_HOME'} = "/usr/local/harvest" if (!defined($ENV{'HARVEST_HOME'}));
unshift(@INC, "$ENV{'HARVEST_HOME'}/lib");	# use local installation
require 'ftp.pl';
require 'lsparse.pl';

$debug = 0;
$tmpfile = "/tmp/ftpenum.$$";
$tmpfile = $ENV{'TMPDIR'} . "/ftpenum.$$" if (defined($ENV{'TMPDIR'}));

#  Ignore all ftp.pl error messages.
if ($debug) {
	$ftp'showfd = STDERR;
} else {
	open(DEVNULL, "> /dev/null") || 
		die "ftpenum.pl: Cannot write to /dev/null: $!\n";
	$ftp'showfd = DEVNULL;
}

# Option Flags
#
$keep_userinfo	= 0;


# Option Processing
#
while ($ARGV[0] =~ /^--(.*)/o) {
	$option = $1;
	if ($option eq 'keep-userinfo') {
		$keep_userinfo = 1;
		shift (@ARGV);
	}
}

&usage() if ($#ARGV != 3);
$host = shift(@ARGV);
$dir  = shift(@ARGV);
$login = shift(@ARGV);
$password = shift(@ARGV);

# Special case: don't keep userinfo if username is 'anonymous'
#
$keep_userinfo = 0
	if ($login =~ /^anonymous$/io);

#
#  Enumeration parameters
#
$tree_root = "ftp://$host$dir";
$max_depth = 0;
if (defined($ENV{'HARVEST_DEPTH_MAX'}) && $ENV{'HARVEST_DEPTH_MAX'} > 0) {
	$max_depth = $ENV{'HARVEST_DEPTH_MAX'};
	$max_depth += &get_depth($dir) - 1;
}
$nurls = 0;
$url_max = 0;
$url_max = $ENV{'HARVEST_URL_MAX'} if (defined($ENV{'HARVEST_URL_MAX'}));
$url_max = 250 if ($url_max < 1);
$url_ffile = $ENV{'HARVEST_URL_FILTER'};

$ftp_port = 21;
$retry_call = 1;
$attempts   = 5;

die "ftpenum.pl: Cannot connect to $host\n"
	unless (&ftp'open($host, $ftp_port, $retry_call, $attempts));

die "ftpenum.pl: Cannot login to $host\n"
	unless (&ftp'login($login, $password));

#die "ftpenum.pl: cwd to $host:$dir failed.\n" 
exit (1)
	unless (&ftp'cwd($dir));

$cwd = &ftp'pwd();

#
#  First line is RootNode URL of the enumeration space
#
$url = &path_to_url($host, $cwd);
print STDOUT "$url\n";

$lsparse'fstype = "unix";
$lsparse'name   = "ftpenum.pl";

#
#  Now, get a recursive directory listing.  First try to retrieve a 
#  ls-lR file to save the server from computing the ls-lR on-the-fly.
#  We can support GNU zipped, ucb compressed, and uncompress ls-lR files.
#  If no file is available, then perform the LIST -lR command.
#
$did_shortcut = 0;
&ftp'type("I");
if (&ftp'get("ls-lR.gz", $tmpfile, 0)) {
	print STDERR "Got gziped ls-lR\n" if ($debug);
	open(ftp'NS, "gzip -dc $tmpfile |") || 
		die "ftpenum.pl: gzip -dc $tmpfile: $!\n";
	$did_shortcut = 1;
} elsif (&ftp'get("ls-lR.Z", $tmpfile, 0)) {
	print STDERR "Got ucb compressed ls-lR\n" if ($debug);
	open(ftp'NS, "uncompress -c < $tmpfile |") || 
		die "ftpenum.pl: uncompress -c < $tmpfile: $!\n";
	$did_shortcut = 1;
} elsif (&ftp'get("ls-lR", $tmpfile, 0)) {
	print STDERR "Got standard ls-lR\n" if ($debug);
	open(ftp'NS, "$tmpfile") || 
		die "ftpenum.pl: Cannot read $tmpfile: $!\n";
	$did_shortcut = 1;
} elsif (&ftp'dir_open("-lR")) {
	$did_shortcut = 0;
} else {
	die "ftpenum.pl: Cannot get remote directory listing: $ftp'response\n";
}
$rls = "ftp'NS";                # the port from ftp package

if(! &lsparse'reset($cwd)) {          # don't use $dir here
	die "ftpenum.pl: lsparse reset failed";
}

while (!eof($rls)) {
	( $path, $size, $time, $type, $mode ) = &lsparse'line($rls);
        $path =~ s/\/\.\//\//g;                 # remove /./ components
print "PATH=$path SIZE=$size TIME=$time TYPE=$type MODE=$mode\n" if ($debug);
	last if ($path eq '');
	next if (&filter_match($path));
	if ($type eq "f") {
		$url = &path_to_url($host, $path);
		print STDOUT "$url\t$time\n";	# OK, pass along
		if (++$nurls >= $url_max) {
			print STDERR "ftpenum.pl: Truncating RootNode $tree_root at $url_max LeafNode URLs\n";
			&sigdie();
		}
	}
}
&sigdie();	# END OF PROGRAM

sub sigdie {
	if ($did_shortcut) {
		close($rls);
		unlink($tmpfile);
	} else {
		&ftp'dir_close();
	}
	&ftp'quit();
	exit(0);
}


#
# very simple pathname to ftp-style URL
#
sub path_to_url {
	local($host, $path) = @_;

	$host = "$login:$password$host"		# add user:pw to URL
		if ($keep_userinfo);
	$path = &cleanup_path($path);
	$path =~ s/\/\.\//\//g;			# remove /./ components
	$path = '/' . $path			# add leading slash
		unless ($path =~ /^\//);
	
	$ret = "ftp://$host$path";
	return $ret;
}


#
# if path contains any weird characters, convert 'em to hex
# as per the draft URL document
#
sub cleanup_path {
	local($path) = @_;
	#
	#  RFC 1738 defines that these characters should be escaped
	#
	$rfc1738_escape = '<>"#%{}|\\^~[]`\' ';
	
	$ret = "";
	for ($i = 0 ; $i < length($path) ; $i++) {
		$c = substr($path, $i, 1);
		$do_escape = 0;
		for ($j = 0; $j < length($rfc1738_escape); $j++) {
			$ec = substr($rfc1738_escape, $j, 1);
			$do_escape = 1, last if ($c eq $ec);	# esc char
		}
		# we %ab encode funny characters
		if ($do_escape) {
			$ret = $ret . sprintf("%%%02x", ord($c));
		} else {
			$ret = $ret . $c;
		}
	}
	return $ret;
}

sub usage {
	print STDERR "Usage: --keep-userinfo ftpenum.pl hostname directory login password\n";
	exit(1);
}

sub filter_match {
	local($path) = @_;
	return 1 if ($max_depth > 0 && &get_depth($path) > $max_depth);
	return &compute_filter($path) if (defined($url_ffile));
	return 0;
}

sub read_filter {
	open(FILTER, "< $url_ffile") || 
		die "ftpenum.pl: Cannot read $url_ffile: $!\n";
	while (<FILTER>) {
		next if (/^\n/o || /^#/o);
		chop;
		push (@URLFilter, $_);
	}
	close(FILTER);
	return 0;
}

sub compute_filter {
	local($data) = @_;
	open(FILTER, "< $url_ffile") || 
		die "ftpenum.pl: Cannot read $url_ffile: $!\n";
	&read_filter unless (@URLFilter);
	foreach $line (@URLFilter) {
		next unless ($line =~ /^\s*(\S+)\s+(\S+)\s*$/);
		$allow_deny = $1;
		$re = $2;
		$rvalue = 0 if ($allow_deny =~ /allow/io);
		$rvalue = 1 if ($allow_deny =~ /deny/io);
		return $rvalue
			if ($data =~ /$re/);
	}
	return 0;
}

sub get_depth {
	local($path) = @_;
	$tmp = $path;
	$tmp =~ s/[^\/]+//g;
	$r = length($tmp);
	undef $tmp;
	return $r;
}
