: # *-*-perl-*-*
    eval 'exec perl -S $0 "$@"'
    if $running_under_some_shell;  
#
#  ftpenum.pl - Enumerate FTP directories
#
#  Usage: ftpenum.pl hostname directory login password
#
#  Description: FTP hostname, cd to directory, ls -lR, and return
#  a complete list of URL <tab> timestamp, where timestamp is the 
#  UNIX time(3) in decimal.  Ignores all symbolic links.
#
#  If the directory contains an ls-lR.gz, or ls-lR.Z, or ls-lR file,
#  then ftpenum.pl uses that file rather than the remote LIST command
#  to retrieve the recursive directory listing.
#
#  Jim Guyton & Darren Hardy, hardy@cs.colorado.edu, April 1994
#
#  $Id: ftpenum.pl,v 1.11 1995/03/22 06:49:23 hardy Exp $
#
#######################################################################
#
#  Copyright (c) 1994, 1995.  All rights reserved.
#  
#          Mic Bowman of Transarc Corporation.
#          Peter Danzig of the University of Southern California.
#          Darren R. Hardy of the University of Colorado at Boulder.
#          Udi Manber of the University of Arizona.
#          Michael F. Schwartz of the University of Colorado at Boulder. 
#          Duane Wessels of the University of Colorado at Boulder. 
#  
#  This copyright notice applies to all code in Harvest other than
#  subsystems developed elsewhere, which contain other copyright notices
#  in their source text.
#  
#  The Harvest software was developed by the Internet Research Task
#  Force Research Group on Resource Discovery (IRTF-RD).  The Harvest
#  software may be used for academic, research, government, and internal
#  business purposes without charge.  If you wish to sell or distribute
#  the Harvest software to commercial clients or partners, you must
#  license the software.  See
#  http://harvest.cs.colorado.edu/harvest/copyright,licensing.html#licensing.
#  
#  The Harvest software is provided ``as is'', without express or
#  implied warranty, and with no support nor obligation to assist in its
#  use, correction, modification or enhancement.  We assume no liability
#  with respect to the infringement of copyrights, trade secrets, or any
#  patents, and are not responsible for consequential damages.  Proper
#  use of the Harvest software is entirely the responsibility of the user.
#  
#  For those who are using Harvest for non-commercial purposes, you may
#  make derivative works, subject to the following constraints:
#  
#  - You must include the above copyright notice and these accompanying 
#    paragraphs in all forms of derivative works, and any documentation 
#    and other materials related to such distribution and use acknowledge 
#    that the software was developed at the above institutions.
#  
#  - You must notify IRTF-RD regarding your distribution of the 
#    derivative work.
#  
#  - You must clearly notify users that your are distributing a modified 
#    version and not the original Harvest software.
#  
#  - Any derivative product is also subject to the restrictions of the 
#    copyright, including distribution and use limitations.
#
$| = 1;		# everything is written to stdout immediately
$ENV{'HARVEST_HOME'} = "/usr/local/harvest" if (!defined($ENV{'HARVEST_HOME'}));
unshift(@INC, "$ENV{'HARVEST_HOME'}/lib");	# use local installation
require 'ftp.pl';
require 'lsparse.pl';

$debug = 0;
$tmpfile = "/tmp/ftpenum.$$";
$tmpfile = $ENV{'TMPDIR'} . "/ftpenum.$$" if (defined($ENV{'TMPDIR'}));

#  Ignore all ftp.pl error messages.
if ($debug) {
	$ftp'showfd = STDERR;
} else {
	open(DEVNULL, "> /dev/null") || 
		die "ftpenum.pl: Cannot write to /dev/null: $!\n";
	$ftp'showfd = DEVNULL;
}

&usage() if ($#ARGV != 3);
$host = shift(@ARGV);
$dir  = shift(@ARGV);
$login = shift(@ARGV);
$password = shift(@ARGV);

#
#  Enumeration parameters
#
$tree_root = "ftp://$host$dir";
$max_depth = 0;
if (defined($ENV{'HARVEST_DEPTH_MAX'}) && $ENV{'HARVEST_DEPTH_MAX'} > 0) {
	$max_depth = $ENV{'HARVEST_DEPTH_MAX'};
	$max_depth += &get_depth($dir) - 1;
}
$nurls = 0;
$url_max = 0;
$url_max = $ENV{'HARVEST_URL_MAX'} if (defined($ENV{'HARVEST_URL_MAX'}));
$url_max = 250 if ($url_max < 1);
$url_ffile = $ENV{'HARVEST_URL_FILTER'};

$ftp_port = 21;
$retry_call = 1;
$attempts   = 5;

if (&ftp'open($host, $ftp_port, $retry_call, $attempts) != 1) {
	die "ftpenum.pl: Cannot connect to $host\n";
}
if (&ftp'login($login, $password) != 1) {
	die "ftpenum.pl: Cannot login to $host\n";
}
if (&ftp'cwd($dir) != 1) {
	die "ftpenum.pl: cwd to $host:$dir failed.\n" 
}
$cwd = &ftp'pwd();

#
#  First line is RootNode URL of the enumeration space
#
$url = &path_to_url($host, $cwd);
print STDOUT "$url\n";

$lsparse'fstype = "unix";
$lsparse'name   = "ftpenum.pl";

#
#  Now, get a recursive directory listing.  First try to retrieve a 
#  ls-lR file to save the server from computing the ls-lR on-the-fly.
#  We can support GNU zipped, ucb compressed, and uncompress ls-lR files.
#  If no file is available, then perform the LIST -lR command.
#
$did_shortcut = 0;
&ftp'type("I");
if (&ftp'get("ls-lR.gz", $tmpfile, 0)) {
	print STDERR "Got gziped ls-lR\n" if ($debug);
	open(ftp'NS, "gzip -dc $tmpfile |") || 
		die "ftpenum.pl: gzip -dc $tmpfile: $!\n";
	$did_shortcut = 1;
} elsif (&ftp'get("ls-lR.Z", $tmpfile, 0)) {
	print STDERR "Got ucb compressed ls-lR\n" if ($debug);
	open(ftp'NS, "uncompress -c < $tmpfile |") || 
		die "ftpenum.pl: uncompress -c < $tmpfile: $!\n";
	$did_shortcut = 1;
} elsif (&ftp'get("ls-lR", $tmpfile, 0)) {
	print STDERR "Got standard ls-lR\n" if ($debug);
	open(ftp'NS, "$tmpfile") || 
		die "ftpenum.pl: Cannot read $tmpfile: $!\n";
	$did_shortcut = 1;
} elsif (&ftp'dir_open("-lR")) {
	$did_shortcut = 0;
} else {
	die "ftpenum.pl: Cannot get remote directory listing: $ftp'response\n";
}
$rls = "ftp'NS";                # the port from ftp package

if(! &lsparse'reset($cwd)) {          # don't use $dir here
	die "ftpenum.pl: lsparse reset failed";
}

while (!eof($rls)) {
	( $path, $size, $time, $type, $mode ) = &lsparse'line($rls);
print "PATH=$path SIZE=$size TIME=$time TYPE=$type MODE=$mode\n" if ($debug);
	last if ($path eq '');
	next if (&filter_match($path));
	if ($type eq "f") {
		$url = &path_to_url($host, $path);
		print STDOUT "$url\t$time\n";	# OK, pass along
		if (++$nurls >= $url_max) {
			print STDERR "ftpenum.pl: Truncating RootNode $tree_root at $url_max LeafNode URLs\n";
			&sigdie();
		}
	}
}
&sigdie();	# END OF PROGRAM

sub sigdie {
	if ($did_shortcut) {
		close($rls);
		unlink($tmpfile);
	} else {
		&ftp'dir_close();
	}
	&ftp'quit();
	exit(0);
}


#
# very simple pathname to ftp-style URL
#
sub path_to_url {
	local($host, $path) = @_;
	$path = &cleanup_path($path);
	$path =~ s/\/\.\//\//g;		# remove /./ components
	
	if (substr($path, 0, 1) eq '/') {
		$ret = "ftp://$host$path";
	} else {
		$ret = "ftp://$host/$path";
	}
	return $ret;
}


#
# if path contains any weird characters, convert 'em to hex
# as per the draft URL document
#
sub cleanup_path {
	local($path) = @_;
	#
	#  RFC 1738 defines that these characters should be escaped
	#
	$rfc1738_escape = '<>"#%{}|\\^~[]`\' ';
	
	$ret = "";
	for ($i = 0 ; $i < length($path) ; $i++) {
		$c = substr($path, $i, 1);
		$do_escape = 0;
		for ($j = 0; $j < length($rfc1738_escape); $j++) {
			$ec = substr($rfc1738_escape, $j, 1);
			$do_escape = 1, last if ($c eq $ec);	# esc char
		}
		# we %ab encode funny characters
		if ($do_escape) {
			$ret = $ret . sprintf("%%%02x", ord($c));
		} else {
			$ret = $ret . $c;
		}
	}
	return $ret;
}

sub usage {
	print STDERR "Usage: ftpenum.pl hostname directory login password\n";
	exit(1);
}

sub filter_match {
	local($path) = @_;
	return 1 if ($max_depth > 0 && &get_depth($path) > $max_depth);
	return &compute_filter($path) if (defined($url_ffile));
	return 0;
}

sub compute_filter {
	local($data) = @_;
	open(FILTER, "< $url_ffile") || 
		die "ftpenum.pl: Cannot read $url_ffile: $!\n";
	while (<FILTER>) {
		next if (/^\n/o || /^#/o);
		chop;
		($allow_deny, $re) = split;
		$rvalue = 0 if ($allow_deny =~ /allow/io);
		$rvalue = 1 if ($allow_deny =~ /deny/io);
		if ($data =~ /$re/) {
			close(FILTER);
			return $rvalue;
		}
	}
	close(FILTER);
	return 0;
}

sub get_depth {
	local($path) = @_;
	$tmp = $path;
	$tmp =~ s/[^\/]+//g;
	$r = length($tmp);
	undef $tmp;
	return $r;
}
