static char rcsid[] = "gather.c,v 1.51 1996/01/30 01:33:10 duane Exp";
/*
 *  gather.c - Simple, portable client to retrieve data from a Gatherer,
 *  and print it to stdout.  Uses the GNU zip compression to transmit
 *  the data over the network.  Must have 'gzip' in your path.
 *
 *  Usage: gather [-info | -nocompress] hostname port [timestamp]
 *
 *  Darren Hardy, hardy@cs.colorado.edu, July 1994
 *
 *  ----------------------------------------------------------------------
 *  Copyright (c) 1994, 1995.  All rights reserved.
 *  
 *    The Harvest software was developed by the Internet Research Task
 *    Force Research Group on Resource Discovery (IRTF-RD):
 *  
 *          Mic Bowman of Transarc Corporation.
 *          Peter Danzig of the University of Southern California.
 *          Darren R. Hardy of the University of Colorado at Boulder.
 *          Udi Manber of the University of Arizona.
 *          Michael F. Schwartz of the University of Colorado at Boulder.
 *          Duane Wessels of the University of Colorado at Boulder.
 *  
 *    This copyright notice applies to software in the Harvest
 *    ``src/'' directory only.  Users should consult the individual
 *    copyright notices in the ``components/'' subdirectories for
 *    copyright information about other software bundled with the
 *    Harvest source code distribution.
 *  
 *  TERMS OF USE
 *    
 *    The Harvest software may be used and re-distributed without
 *    charge, provided that the software origin and research team are
 *    cited in any use of the system.  Most commonly this is
 *    accomplished by including a link to the Harvest Home Page
 *    (http://harvest.cs.colorado.edu/) from the query page of any
 *    Broker you deploy, as well as in the query result pages.  These
 *    links are generated automatically by the standard Broker
 *    software distribution.
 *    
 *    The Harvest software is provided ``as is'', without express or
 *    implied warranty, and with no support nor obligation to assist
 *    in its use, correction, modification or enhancement.  We assume
 *    no liability with respect to the infringement of copyrights,
 *    trade secrets, or any patents, and are not responsible for
 *    consequential damages.  Proper use of the Harvest software is
 *    entirely the responsibility of the user.
 *  
 *  DERIVATIVE WORKS
 *  
 *    Users may make derivative works from the Harvest software, subject 
 *    to the following constraints:
 *  
 *      - You must include the above copyright notice and these 
 *        accompanying paragraphs in all forms of derivative works, 
 *        and any documentation and other materials related to such 
 *        distribution and use acknowledge that the software was 
 *        developed at the above institutions.
 *  
 *      - You must notify IRTF-RD regarding your distribution of 
 *        the derivative work.
 *  
 *      - You must clearly notify users that your are distributing 
 *        a modified version and not the original Harvest software.
 *  
 *      - Any derivative product is also subject to these copyright 
 *        and use restrictions.
 *  
 *    Note that the Harvest software is NOT in the public domain.  We
 *    retain copyright, as specified above.
 *  
 *  HISTORY OF FREE SOFTWARE STATUS
 *  
 *    Originally we required sites to license the software in cases
 *    where they were going to build commercial products/services
 *    around Harvest.  In June 1995 we changed this policy.  We now
 *    allow people to use the core Harvest software (the code found in
 *    the Harvest ``src/'' directory) for free.  We made this change
 *    in the interest of encouraging the widest possible deployment of
 *    the technology.  The Harvest software is really a reference
 *    implementation of a set of protocols and formats, some of which
 *    we intend to standardize.  We encourage commercial
 *    re-implementations of code complying to this set of standards.  
 *  
 */
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <signal.h>
#include <ctype.h>
#include <sys/types.h>
#include <sys/param.h>
#include <sys/wait.h>
#include <sys/socket.h>
#include <netdb.h>
#include <netinet/in.h>

/* Wait 5 minutes for a response from the remote Gatherer */
#define WAIT_MAX_SECS	300

static void sigdie()
{
	fprintf(stderr, "gather: Timed out after waiting %d seconds for a response from the remote Gatherer.\n", WAIT_MAX_SECS);
	exit(1);
}

static void usage()
{
	fprintf(stderr, "Usage: gather [-info | -nocompress] hostname port [timestamp]\n");
	exit(1);
}


#define grab_data_default(x,y,z)	grab_data_v0_2(x,y,z)

static int do_compress = 1;
static int do_info = 0;

/* Local functions */
static void grab_data_v0_2();
static void die();
static char *getfullhostname();
static void send_msg();
static char *xstrdup();		/* same as strdup(3) for portability */
static void gzip_message();
static void do_plain_text_retrieve();
static void do_compressed_retrieve();
static struct hostent *xgethostbyname();
static int check_for_gzip();

int main(argc, argv)
     int argc;
     char *argv[];
{
	FILE *ifp = NULL;
	FILE *ofp = NULL;
	char *p = NULL;
	char *server_host = NULL;
	char *this_host = getfullhostname();
	int s;
	int server_port;
	int timestamp = 0;
	int version_major;
	int version_minor;
	int version_minor_minor = 0;
	static char that_version[BUFSIZ];
	static char buf[BUFSIZ];
	static char that_host[BUFSIZ];
	static char xbuf[4096];
	struct hostent *hp = NULL;
	struct sockaddr_in sa;

	signal(SIGALRM, sigdie);
	signal(SIGPIPE, SIG_DFL);	/* parent process may have ignored */
	alarm(0);
	do_compress = 1;
	if (argc > 1 && !strcmp(argv[1], "-nocompress")) {
		argc--;
		argv++;
		do_compress = 0;
	}
	if (argc > 1 && !strcmp(argv[1], "-info")) {
		argc--;
		argv++;
		do_info = 1;
	}
	if (argc < 3)
		usage();

	server_host = xstrdup(argv[1]);
	server_port = atoi(argv[2]);
	if (argc == 4)
		timestamp = atoi(argv[3]);

	if (timestamp < 0 || server_port < 0)
		usage();
	for (p = argv[2]; *p; p++)
		if (!isdigit(*p))
			usage();

	if (do_compress && check_for_gzip() != 0) {
		fprintf(stderr, "gather: WARNING: 'gzip' not found.  Compression disabled.\n");
		do_compress = 0;
	}
	errno = 0;
	/* Find out who they are */
	if ((hp = xgethostbyname(server_host)) == NULL) {
		if (errno == 0)
			fprintf(stderr, "gather: %s: Host unknown.\n",
			    server_host);
		else
			perror(server_host);
		exit(1);
	}
	/* Set up the Destination Address */
	memset(&sa, '\0', sizeof(sa));
	memcpy(&sa.sin_addr, hp->h_addr, hp->h_length);
	sa.sin_family = AF_INET;
	sa.sin_port = (unsigned short) htons(server_port);

	/* Create a socket, and connect to the remote host */
	if ((s = socket(PF_INET, SOCK_STREAM, 0)) < 0) {
		perror("gather: socket");
		exit(1);
	}
	if (connect(s, (struct sockaddr *) &sa, sizeof(sa)) < 0) {
		perror("gather: connect");
		exit(1);
	}
	/* Use buffered I/O to make sure we get the right number of bytes */
	if ((ifp = fdopen(s, "r")) == NULL) {
		perror("gather: fdopen");
		exit(1);
	}
	if ((ofp = fdopen(s, "w")) == NULL) {
		perror("gather: fdopen");
		exit(1);
	}
	/* Grab welcome message */
	alarm(WAIT_MAX_SECS);
	if (fgets(buf, BUFSIZ, ifp) == NULL) {
		fprintf(stderr, "gather: Did not receive welcome message.\n");
		die(ofp);
	}
	alarm(0);
	if (strncmp(buf, "000", 3) != 0) {	/* Check OK */
		fprintf(stderr, "gather: Couldn't connect to %s:%d.\n",
		    server_host, server_port);
		fprintf(stderr, "gather: Did not receive handshake: %s", buf);
		die(ofp);
	}
	if (sscanf(buf, "000 - HELLO %s %s", that_version, that_host) != 2) {
		fprintf(stderr, "gather: Cannot parse handshake: %s\n", buf);
		die(ofp);
	}
	version_major = version_minor = version_minor_minor = 0;
	if ((sscanf(that_version, "%d.%d.%d", &version_major, &version_minor,
		    &version_minor_minor) != 3) ||
	    (sscanf(that_version, "%d.%d", &version_major, &version_minor) != 2)) {
		fprintf(stderr, "gather: Cannot parse version number: %s\n",
		    that_version);
		die(ofp);
	}
	/* Say HELLO */
	sprintf(buf, "HELLO %s\n", this_host);
	send_msg(buf, ofp);
	alarm(WAIT_MAX_SECS);
	if (fgets(buf, BUFSIZ, ifp) == NULL) {
		fprintf(stderr, "gather: Did not receive HELLO ack.\n");
		die(ofp);
	}
	alarm(0);
	if (strncmp(buf, "100", 3)) {	/* Check OK */
		fprintf(stderr, "gather: Received: %s", buf);
		/* don't exit, not a fatal error */
	}
	if (do_info) {
		if (!(version_major > 0 ||
			version_minor > 2 ||
			version_minor_minor > 2)) {
			send_msg("QUIT\n", ofp);
			(void) close(fileno(ofp));
			exit(0);
		}
		send_msg("INFO\n", ofp);
		while (fgets(xbuf, 4096, ifp)) {
			if (!strncmp(xbuf, "600", 3)) {
				send_msg("QUIT\n", ofp);
				(void) close(fileno(ofp));
				exit(0);
			}
			if (!strncmp(xbuf, "601", 3) ||
			    !strncmp(xbuf, "602", 3)) {
				die(ofp);
			}
			fputs(xbuf, stdout);
		}
		die(ofp);
	}
	if (do_compress) {
		/* Set mode to compressed data */
		sprintf(buf, "SET compression\n");
		send_msg(buf, ofp);
		if (fgets(buf, BUFSIZ, ifp) == NULL) {
			fprintf(stderr, "gather: Did not receive SET COMPRESSION ack.\n");
			die(ofp);
		}
		if (strncmp(buf, "500", 3)) {
			fprintf(stderr, "gather: Received: %s", buf);
			die(ofp);
		}
	}
	/* Issue SEND-UPDATE command */
	sprintf(buf, "SEND-UPDATE %d\n", timestamp);
	send_msg(buf, ofp);
	if (fgets(buf, BUFSIZ, ifp) == NULL) {
		fprintf(stderr, "gather: Did not receive SEND-UPDATE %d ack.\n", timestamp);
		die(ofp);
	}
	if (strncmp(buf, "400", 3) != 0) {	/* Check OK */
		fprintf(stderr, "gather: Received: %s", buf);
		die(ofp);
	}
#ifdef DEBUG
	fprintf(stderr, "Server is using protocol version %d.%d.x\n",
	    version_major, version_minor);
#endif

	if (version_major == 0 && version_minor == 2)
		grab_data_v0_2(ifp, ofp, this_host);
	else
		grab_data_default(ifp, ofp, this_host);

	/* Quit */
	fprintf(ofp, "QUIT\n");
	fflush(ofp);

	(void) close(s);	/* close the socket, and exit */
	exit(0);
}

/*
 *  grab_data() - Version 0.2.x of the protocol.  Sets compression
 *  and then feeds all of the xfer'd data to gzip.  A closed socket is
 *  the end-of-transmission.
 */
static void grab_data_v0_2(ifp, ofp, this_host)
     FILE *ifp, *ofp;
     char *this_host;
{
	if (do_compress)
		do_compressed_retrieve(ifp, ofp);
	else
		do_plain_text_retrieve(ifp, ofp);
}

/*
 *  plain-text retrieve is not great because you have to check each line
 *  for the 499 tag.
 */
static void do_plain_text_retrieve(ifp, ofp)
     FILE *ifp;
     FILE *ofp;
{
	static char buf[BUFSIZ];
	int i;

	while (fgets(buf, BUFSIZ, ifp)) {
		if (!strncmp(buf, "499 - Sent", 10))
			return;
		/* try to find the length of data we read in.  Note that
		 * use of fgets() here is bad because we may be reading
		 * arbitrary binary data (including NULLs) in a SOIF value. */
		for (i = 0; i < BUFSIZ; i++) {
			if (buf[i] == '\n') {
				i++;
				break;
			}
		}
		fwrite(buf, 1, i, stdout);
	}
}

/*
 *  do_compressed_retrieve - Retrieves GNU zip'ed data from 'ifp',
 *  then writes the uncompressed data to stdout.  ofp is the
 *  socket back to the Gatherer.
 */
static void do_compressed_retrieve(ifp, ofp)
     FILE *ifp;
     FILE *ofp;
{
	int pid;
	int pfd[2];
	int n;
	static char buf[BUFSIZ];

	if (pipe(pfd) < 0) {
		perror("gather: pipe");
		die(ofp);
	}
	if ((pid = fork()) < 0) {
		perror("gather: fork");
		die(ofp);
	}
	if (pid == 0) {		/* CHILD */
		close(pfd[1]);
		dup2(pfd[0], 0);	/* comp-read-pipe -> stdin */
		execlp("gzip", "gzip", "-dc", NULL);
		perror("gather: execlp: gzip");
		gzip_message();
		_exit(1);
	}
	/* PARENT */
	/* Feed GNU gzip the data to uncompress */
	close(pfd[0]);
	alarm(WAIT_MAX_SECS);
	while ((n = fread(buf, 1, BUFSIZ - 1, ifp)) > 0) {
		alarm(0);
		if (write(pfd[1], buf, n) < 0) {
			perror("gather: write");
			die(ofp);
		}
		alarm(WAIT_MAX_SECS);
	}
	close(pfd[1]);
	(void) waitpid(pid, NULL, 0);
}

/*
 *  getfullhostname() - Returns the fully qualified name of the current 
 *  host, or NULL on error.  Pointer is only valid until the next call
 *  to the gethost*() functions.
 */
static char *getfullhostname()
{
	struct hostent *hp;
	static char buf[MAXHOSTNAMELEN + 1];
	extern int gethostname();	/* UNIX system call */

	if (gethostname(buf, MAXHOSTNAMELEN) < 0)
		return (NULL);
	if ((hp = xgethostbyname(buf)) == NULL)
		return (xstrdup(buf));
	return (xstrdup(hp->h_name));
}

static struct hostent *xgethostbyname(name)
     char *name;
{
	struct hostent *hp = NULL;
	static char x[64];
	unsigned long ip;

	if (sscanf(name, "%[0-9].%[0-9].%[0-9].%[0-9]%s", x, x, x, x, x) == 4) {
		ip = inet_addr(name);
		hp = gethostbyaddr((char *) &ip, 4, AF_INET);
		if (!hp) {	/* special hack for DNS's which don't work */
			/* unknown if this works                   */
			hp = (struct hostent *) malloc(sizeof(struct hostent));
			memset(hp, '\0', sizeof(struct hostent));
			hp->h_name = xstrdup(name);
			hp->h_aliases = NULL;
			hp->h_addrtype = AF_INET;
			hp->h_length = 4;
			hp->h_addr_list = (char **) malloc(sizeof(char *));;
			*(hp->h_addr_list) = (char *) malloc(4);
			memcpy(*(hp->h_addr_list), (char *) &ip, 4);
		}
	} else {
		hp = gethostbyname(name);
	}
	return hp;
}



static void send_msg(buf, ofp)
     char *buf;
     FILE *ofp;
{
	int n = strlen(buf);
#ifdef DEBUG
	fprintf(stderr, "Sending: %s", buf);
#endif
	if (fwrite(buf, 1, n, ofp) != n) {
		perror("gather: fwrite");
		(void) close(fileno(ofp));
		exit(1);
	}
	if (fflush(ofp) != 0) {
		perror("gather: fflush");
		(void) close(fileno(ofp));
		exit(1);
	}
}


/*
 *   xstrdup() - same as xstrdup(3)
 */
static char *xstrdup(s)
     char *s;
{
	static char *p = NULL;
	int sz;

	sz = strlen(s);
	p = (char *) malloc((size_t) sz + 1);	/* allocate memory for string */
	if (p == NULL) {
		perror("gather: malloc");
		exit(1);
	}
	memcpy(p, s, sz);	/* copy string */
	p[sz] = '\0';		/* terminate string */
	return (p);
}

static void gzip_message()
{
	fprintf(stderr, "Could not locate the gzip program. gzip is available\n\
in GNU's compression software distribution at \n\
             ftp://ftp.gnu.ai.mit.edu/pub/gnu/gzip-1.2.4.shar\n");
	fflush(stderr);
}

static void die(fp)
     FILE *fp;
{
	fprintf(fp, "QUIT\n");
	fflush(fp);
	(void) close(fileno(fp));
	exit(1);
}


/* Fork a 'gzip -V' command to see if gzip really exists */
static int check_for_gzip()
{
	int status;
	int pid;

	if ((pid = fork()) < 0) {
		perror("gather: fork");
		return 1;
	}
	if (pid == 0) {		/* CHILD */
		close(1);
		close(2);
		execlp("gzip", "gzip", "-V", NULL);
		_exit(1);
	}
	/* PARENT */
	(void) waitpid(pid, &status, 0);
	return (status >> 8);
}
