
static char rcsid[] = "prepurls.c,v 1.23 1996/01/04 04:15:12 duane Exp";
/*
 *  prepurls.c - Prepares URLs using an enumerator (for Root node) 
 *  and a URL-stat (for Leaf nodes).
 *
 *  Usage: prepurls [--root cmd] [--leaf cmd]
 *
 *  The cmds for root and leaf take URLs as stdin and output URLs + stamp.
 *
 *  Input:
 *      ROOT\tURL Opt1 ... OptN
 *      ...
 *      LEAF\tURL
 *
 *  Output:
 *      URL     MD5:xxxx
 *      ...
 *      URL     Last-Modification-Time:xxxx
 *
 *  Darren Hardy, hardy@cs.colorado.edu, April 1994
 *
 *  ----------------------------------------------------------------------
 *  Copyright (c) 1994, 1995.  All rights reserved.
 *  
 *    The Harvest software was developed by the Internet Research Task
 *    Force Research Group on Resource Discovery (IRTF-RD):
 *  
 *          Mic Bowman of Transarc Corporation.
 *          Peter Danzig of the University of Southern California.
 *          Darren R. Hardy of the University of Colorado at Boulder.
 *          Udi Manber of the University of Arizona.
 *          Michael F. Schwartz of the University of Colorado at Boulder.
 *          Duane Wessels of the University of Colorado at Boulder.
 *  
 *    This copyright notice applies to software in the Harvest
 *    ``src/'' directory only.  Users should consult the individual
 *    copyright notices in the ``components/'' subdirectories for
 *    copyright information about other software bundled with the
 *    Harvest source code distribution.
 *  
 *  TERMS OF USE
 *    
 *    The Harvest software may be used and re-distributed without
 *    charge, provided that the software origin and research team are
 *    cited in any use of the system.  Most commonly this is
 *    accomplished by including a link to the Harvest Home Page
 *    (http://harvest.cs.colorado.edu/) from the query page of any
 *    Broker you deploy, as well as in the query result pages.  These
 *    links are generated automatically by the standard Broker
 *    software distribution.
 *    
 *    The Harvest software is provided ``as is'', without express or
 *    implied warranty, and with no support nor obligation to assist
 *    in its use, correction, modification or enhancement.  We assume
 *    no liability with respect to the infringement of copyrights,
 *    trade secrets, or any patents, and are not responsible for
 *    consequential damages.  Proper use of the Harvest software is
 *    entirely the responsibility of the user.
 *  
 *  DERIVATIVE WORKS
 *  
 *    Users may make derivative works from the Harvest software, subject 
 *    to the following constraints:
 *  
 *      - You must include the above copyright notice and these 
 *        accompanying paragraphs in all forms of derivative works, 
 *        and any documentation and other materials related to such 
 *        distribution and use acknowledge that the software was 
 *        developed at the above institutions.
 *  
 *      - You must notify IRTF-RD regarding your distribution of 
 *        the derivative work.
 *  
 *      - You must clearly notify users that your are distributing 
 *        a modified version and not the original Harvest software.
 *  
 *      - Any derivative product is also subject to these copyright 
 *        and use restrictions.
 *  
 *    Note that the Harvest software is NOT in the public domain.  We
 *    retain copyright, as specified above.
 *  
 *  HISTORY OF FREE SOFTWARE STATUS
 *  
 *    Originally we required sites to license the software in cases
 *    where they were going to build commercial products/services
 *    around Harvest.  In June 1995 we changed this policy.  We now
 *    allow people to use the core Harvest software (the code found in
 *    the Harvest ``src/'' directory) for free.  We made this change
 *    in the interest of encouraging the widest possible deployment of
 *    the technology.  The Harvest software is really a reference
 *    implementation of a set of protocols and formats, some of which
 *    we intend to standardize.  We encourage commercial
 *    re-implementations of code complying to this set of standards.  
 *  
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "util.h"

static char *rootcmd = "enum";
static char *leafcmd = "staturl";
static FILE *rootnodes = NULL;
static FILE *leafnodes = NULL;

static void usage()
{
	fprintf(stderr, "Usage: prepurls [--root cmd] [--leaf cmd]\n");
	exit(1);
}

int main(argc, argv)
     int argc;
     char *argv[];
{
	char buf[BUFSIZ], *s;
	int rpid, lpid, cpid, rootpipe[2], leafpipe[2], ncaught = 0;
	FILE *logfp = NULL;

	if (getenv("HARVEST_GATHERER_LOGFILE") != (char *) NULL)
		logfp = fopen(getenv("HARVEST_GATHERER_LOGFILE"), "a+");
	if (logfp == (FILE *) NULL)
		logfp = stderr;

	init_log3("prepurls", logfp, stderr);
	debug_init();
	/* Parse the command line */
	while (--argc > 0) {
		++argv;
		if (!strcmp(*argv, "--root")) {
			if (--argc < 1)
				usage();
			rootcmd = strdup(*++argv);
		} else if (!strcmp(*argv, "--leaf")) {
			if (--argc < 1)
				usage();
			leafcmd = strdup(*++argv);
		}
	}
	if (!rootcmd || !leafcmd)
		usage();

	/* Start up the root node processor */
	if (pipe(rootpipe) < 0) {
		log_errno("pipe");
		exit(1);
	}
	if ((rpid = fork()) < 0) {
		log_errno("fork");
		exit(1);
	}
	if (rpid != 0) {	/* parent */
		close(rootpipe[0]);
	} else {		/* child */
		char *argv[64];
		dup2(rootpipe[0], 0);
		close(rootpipe[1]);
		memset(argv, '\0', sizeof(argv));
		parse_argv(argv, rootcmd);
		execvp(argv[0], argv);
		log_errno("execvp");
		_exit(1);
	}
	if ((rootnodes = fdopen(rootpipe[1], "w")) == NULL) {
		log_errno("fdopen");
		exit(1);
	}
	/* Start up the leaf node processor */
	if (pipe(leafpipe) < 0) {
		log_errno("pipe");
		exit(1);
	}
	if ((lpid = fork()) < 0) {
		log_errno("fork");
		exit(1);
	}
	if (lpid != 0) {	/* parent */
		close(leafpipe[0]);
	} else {		/* child */
		char *argv[64];
		dup2(leafpipe[0], 0);
		close(leafpipe[1]);
		memset(argv, '\0', sizeof(argv));
		parse_argv(argv, leafcmd);
		execvp(argv[0], argv);
		log_errno("execvp");
		_exit(1);
	}
	if ((leafnodes = fdopen(leafpipe[1], "w")) == NULL) {
		log_errno("fdopen");
		exit(1);
	}
	/* 
	 * Now, divide up the input by ROOT's and LEAF's and send to
	 * the correct node processor.
	 */
	while (fgets(buf, BUFSIZ, stdin)) {
		if (!strncmp(buf, "ROOT", strlen("ROOT"))) {
			s = buf + strlen("ROOT");
			while (isspace(*s))
				s++;
			fprintf(rootnodes, "%s", s);
			fflush(rootnodes);
#ifdef DEBUG
			Log("Passing Root: %s\n", s);
#endif
		} else if (!strncmp(buf, "LEAF", strlen("LEAF"))) {
			s = buf + strlen("LEAF");
			while (isspace(*s))
				s++;
			fprintf(leafnodes, "%s", s);
			fflush(leafnodes);
#ifdef DEBUG
			Log("Passing Leaf: %s\n", s);
#endif
		} else {
			Log("Illegal Input: %s\n", buf);
		}
	}
	fclose(rootnodes);
	close(rootpipe[1]);
	fclose(leafnodes);
	close(leafpipe[1]);
	while (ncaught < 2) {
		cpid = wait(NULL);
		if ((cpid == rpid) || (cpid == lpid))
			ncaught++;
	}
	exit(0);
}
