static char rcsid[] = "staturl.c,v 1.33 1996/01/29 23:26:42 duane Exp";
/*
 *  staturl.c - Prints timestamp or MD5 values for each input URL.
 *  Timestamps for file: URLs; otherwise, retrieves the URL and prints MD5.
 *
 *  Usage: staturl
 * 
 *  Input:
 *         URL1
 *         ...
 *         URLn
 *
 *  Output:
 *         URL <tab> Last-Modification-Time:123456789
 *         ...
 *         URL <tab> MD5:xyz
 *
 *  DEBUG: section  41, level 1         Gatherer enumeration URL verification
 *
 *  Darren Hardy, hardy@cs.colorado.edu, May 1994
 *
 *  ----------------------------------------------------------------------
 *  Copyright (c) 1994, 1995.  All rights reserved.
 *  
 *    The Harvest software was developed by the Internet Research Task
 *    Force Research Group on Resource Discovery (IRTF-RD):
 *  
 *          Mic Bowman of Transarc Corporation.
 *          Peter Danzig of the University of Southern California.
 *          Darren R. Hardy of the University of Colorado at Boulder.
 *          Udi Manber of the University of Arizona.
 *          Michael F. Schwartz of the University of Colorado at Boulder.
 *          Duane Wessels of the University of Colorado at Boulder.
 *  
 *    This copyright notice applies to software in the Harvest
 *    ``src/'' directory only.  Users should consult the individual
 *    copyright notices in the ``components/'' subdirectories for
 *    copyright information about other software bundled with the
 *    Harvest source code distribution.
 *  
 *  TERMS OF USE
 *    
 *    The Harvest software may be used and re-distributed without
 *    charge, provided that the software origin and research team are
 *    cited in any use of the system.  Most commonly this is
 *    accomplished by including a link to the Harvest Home Page
 *    (http://harvest.cs.colorado.edu/) from the query page of any
 *    Broker you deploy, as well as in the query result pages.  These
 *    links are generated automatically by the standard Broker
 *    software distribution.
 *    
 *    The Harvest software is provided ``as is'', without express or
 *    implied warranty, and with no support nor obligation to assist
 *    in its use, correction, modification or enhancement.  We assume
 *    no liability with respect to the infringement of copyrights,
 *    trade secrets, or any patents, and are not responsible for
 *    consequential damages.  Proper use of the Harvest software is
 *    entirely the responsibility of the user.
 *  
 *  DERIVATIVE WORKS
 *  
 *    Users may make derivative works from the Harvest software, subject 
 *    to the following constraints:
 *  
 *      - You must include the above copyright notice and these 
 *        accompanying paragraphs in all forms of derivative works, 
 *        and any documentation and other materials related to such 
 *        distribution and use acknowledge that the software was 
 *        developed at the above institutions.
 *  
 *      - You must notify IRTF-RD regarding your distribution of 
 *        the derivative work.
 *  
 *      - You must clearly notify users that your are distributing 
 *        a modified version and not the original Harvest software.
 *  
 *      - Any derivative product is also subject to these copyright 
 *        and use restrictions.
 *  
 *    Note that the Harvest software is NOT in the public domain.  We
 *    retain copyright, as specified above.
 *  
 *  HISTORY OF FREE SOFTWARE STATUS
 *  
 *    Originally we required sites to license the software in cases
 *    where they were going to build commercial products/services
 *    around Harvest.  In June 1995 we changed this policy.  We now
 *    allow people to use the core Harvest software (the code found in
 *    the Harvest ``src/'' directory) for free.  We made this change
 *    in the interest of encouraging the widest possible deployment of
 *    the technology.  The Harvest software is really a reference
 *    implementation of a set of protocols and formats, some of which
 *    we intend to standardize.  We encourage commercial
 *    re-implementations of code complying to this set of standards.  
 *  
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include "util.h"
#include "url.h"
#include "template.h"		/* Needed for #define's only */

int main(argc, argv)
     int argc;
     char *argv[];
{
	static char buf[BUFSIZ];
	char *s = NULL;
	URL *up = NULL;
	extern int liburl_conform_rfc1738;
	FILE *logfp = NULL;
	int delay = 0;

	if (getenv("HARVEST_GATHERER_LOGFILE") != (char *) NULL)
		logfp = fopen(getenv("HARVEST_GATHERER_LOGFILE"), "a+");
	if (logfp == (FILE *) NULL)
		logfp = stderr;
	init_log3("staturl", logfp, stderr);
#ifdef USE_HOST_CACHE
	host_cache_init();
#endif
	debug_init();

	if ((s = getenv("HARVEST_URL_DELAY")))
		delay = atoi(s);

#ifdef HAVE_SETLINEBUF
	setlinebuf(stdout);	/* don't keep pipe waiting */
	setlinebuf(stderr);
#else
	setbuf(stdout, NULL);
	setbuf(stderr, NULL);
#endif
	init_url();
	liburl_conform_rfc1738 = 0;	/* trust the input, don't force */

	for (argc--, argv++; argc > 0 && **argv == '-'; argc--, argv++) {
		if (strncmp(*argv, "-D", 2) == 0) {
			debug_flag(*argv);
		}
	}

	Debug(41, 1, ("staturl: Starting...\n"));
	while (fgets(buf, BUFSIZ, stdin)) {
		if (buf[0] == '#')	/* skip comments */
			continue;
		if ((s = strrchr(buf, '\n')) != NULL)
			*s = '\0';	/* strip last newline */

		Debug(41, 1, ("staturl: trying to process: %s\n", buf));

		if ((up = url_open(buf)) == NULL) {
			continue;
		}
		if (up->type == URL_FILE) {
			struct stat sb;

			if (lstat(up->filename, &sb) < 0)
				continue;
			printf("%s\t%s:%d\n", up->url, T_TIMESTAMP, (int) sb.st_mtime);
			url_close(up);
			continue;
		}
		if (url_retrieve(up)) {
			url_close(up);
			continue;
		}
		if (up->md5)
			printf("%s\t%s:%s\n", up->url, T_MD5, up->md5);
		url_close(up);
		if (delay > 0)
			sleep(delay);
	}

	finish_url();
	Debug(41, 1, ("staturl: Finished.\n"));
	exit(0);
}
