static char rcsid[] = "parser.c,v 1.63 1996/01/04 04:07:03 duane Exp";
/*
 *  parser.c -- Broker
 *
 *  William G. Camargo, Penn State Univ.
 *  Darren R. Hardy, University of Colorado.
 *
 *  parse input from Gatherer and perform operations using collector utils.
 *  parser for the Collector<->Gatherer protocol
 *
 *  DEBUG: section  72, level 1         Broker SOIF parsing routines
 *
 *  ----------------------------------------------------------------------
 *  Copyright (c) 1994, 1995.  All rights reserved.
 *  
 *    The Harvest software was developed by the Internet Research Task
 *    Force Research Group on Resource Discovery (IRTF-RD):
 *  
 *          Mic Bowman of Transarc Corporation.
 *          Peter Danzig of the University of Southern California.
 *          Darren R. Hardy of the University of Colorado at Boulder.
 *          Udi Manber of the University of Arizona.
 *          Michael F. Schwartz of the University of Colorado at Boulder.
 *          Duane Wessels of the University of Colorado at Boulder.
 *  
 *    This copyright notice applies to software in the Harvest
 *    ``src/'' directory only.  Users should consult the individual
 *    copyright notices in the ``components/'' subdirectories for
 *    copyright information about other software bundled with the
 *    Harvest source code distribution.
 *  
 *  TERMS OF USE
 *    
 *    The Harvest software may be used and re-distributed without
 *    charge, provided that the software origin and research team are
 *    cited in any use of the system.  Most commonly this is
 *    accomplished by including a link to the Harvest Home Page
 *    (http://harvest.cs.colorado.edu/) from the query page of any
 *    Broker you deploy, as well as in the query result pages.  These
 *    links are generated automatically by the standard Broker
 *    software distribution.
 *    
 *    The Harvest software is provided ``as is'', without express or
 *    implied warranty, and with no support nor obligation to assist
 *    in its use, correction, modification or enhancement.  We assume
 *    no liability with respect to the infringement of copyrights,
 *    trade secrets, or any patents, and are not responsible for
 *    consequential damages.  Proper use of the Harvest software is
 *    entirely the responsibility of the user.
 *  
 *  DERIVATIVE WORKS
 *  
 *    Users may make derivative works from the Harvest software, subject 
 *    to the following constraints:
 *  
 *      - You must include the above copyright notice and these 
 *        accompanying paragraphs in all forms of derivative works, 
 *        and any documentation and other materials related to such 
 *        distribution and use acknowledge that the software was 
 *        developed at the above institutions.
 *  
 *      - You must notify IRTF-RD regarding your distribution of 
 *        the derivative work.
 *  
 *      - You must clearly notify users that your are distributing 
 *        a modified version and not the original Harvest software.
 *  
 *      - Any derivative product is also subject to these copyright 
 *        and use restrictions.
 *  
 *    Note that the Harvest software is NOT in the public domain.  We
 *    retain copyright, as specified above.
 *  
 *  HISTORY OF FREE SOFTWARE STATUS
 *  
 *    Originally we required sites to license the software in cases
 *    where they were going to build commercial products/services
 *    around Harvest.  In June 1995 we changed this policy.  We now
 *    allow people to use the core Harvest software (the code found in
 *    the Harvest ``src/'' directory) for free.  We made this change
 *    in the interest of encouraging the widest possible deployment of
 *    the technology.  The Harvest software is really a reference
 *    implementation of a set of protocols and formats, some of which
 *    we intend to standardize.  We encourage commercial
 *    re-implementations of code complying to this set of standards.  
 *  
 */
#include "broker.h"
#include "log.h"

#define SP_SIZE BUFSIZ

char space[SP_SIZE];
extern int recv_nobjs;
extern GathererID *COL_gid;

/* parse an input file. */
int P_parse_input(tfile, type)
     char *tfile;
     int type;
{
	int err = SUCCESS;
	int Mode = NO_MODE;
	FILE *InFile = NULL;

	Mode = NO_MODE;
	InFile = NULL;

	Debug(72, 1, ("P_parse_input: starting with type %d\n", type));

	/* The tfile is really a FILE * to 'gather' if a Gatherer */
	if (type < BAFULL_U) {
		InFile = (FILE *) tfile;
	} else if ((InFile = fopen(tfile, "r")) == NULL) {
		errorlog("Parser: Cannot read %s\n", tfile);
		return ERROR;
	}
	while (err == SUCCESS) {
		err = P_parse_command(InFile, Mode);
	}

	if (type < BAFULL_U) {
		COL_Close_Read_Pipe(InFile);
	} else {
		(void) fclose(InFile);

		if (unlink(tfile) < 0) {
			errorlog("Parser:  Cannot remove %s\n", tfile);
			log_errno(tfile);
			xfree(tfile);
			return ERROR;
		}
		xfree(tfile);
	}
	return (err);
}

/* Do the commands: update, delete or refresh */
int P_parse_command(InFile, Mode)
     FILE *InFile;
     int Mode;
{
	char *command;
	int nextc, n = 0;

	Debug(72, 1, ("P_parse_command: starting with Mode %d\n", Mode));
	nextc = P_get_next_char(InFile);

	if ((nextc == EOF) || (nextc != '@'))
		return ERROR;

	command = space;
	command[0] = '\0';
	if (fgets(command, SP_SIZE, InFile) == NULL) {
		errorlog("P_parse_command: Cannot read command.\n");
		return ERROR;
	}
	if (strncmp(command, "DELETE", 6) == 0) {
		if (strchr(command, '}') != NULL)
			return SUCCESS;		/* nop */
		Mode = DEL_MODE;
	} else if (strncmp(command, "UPDATE", 6) == 0) {
		if (strchr(command, '}') != NULL)
			return SUCCESS;		/* nop */
		Mode = UPD_MODE;
	} else if (strncmp(command, "REFRESH", 7) == 0) {
		if (strchr(command, '}') != NULL)
			return SUCCESS;		/* nop */
		Mode = REF_MODE;
	} else {
		errorlog("Parser: P_parse_command: Cannot determine next command: %s\n", command);
		return ERROR;
	}

	init_parse_template_file(InFile);
	while (P_parse_object(Mode) == SUCCESS) {
		/* every 100 objects, give status */
		if (recv_nobjs > 0 && recv_nobjs % 250 == 0) {
			Log("Received %d objects so far...\n", recv_nobjs);
		}
		/* check for pending connections */
		if ((n++ & 0x1F) == 0) {
			(void) select_loop(0, 0, 0);
		}
	}
	finish_parse_template();

	return SUCCESS;
}

/* update/delete/refresh an object */
int P_parse_object(Mode)
     int Mode;
{
	reg_t *new_r;
	Template *template;
	AVList *walker;
	FILE *OutFile = NULL;
	extern time_t max_update_time;

	/* 
	 *  Read the next template from the input.  If the parser returns NULL,
	 *  then we check to see if we've reached the end of the file.  If
	 *  So we stop the parsing by returning ERROR; otherwise we continue
	 *  trying to parse by running SUCCESS.
	 */
	if ((template = parse_template()) == NULL)
		return (is_parse_end_of_input()? ERROR : SUCCESS);

	Debug(72, 1, ("P_parse_object: received object: %s\n", template->url));
	recv_nobjs++;

	/* Set up the new reg_t record; and add the URL to it */
	new_r = (reg_t *) xmalloc(sizeof(reg_t));
	memset(new_r, '\0', sizeof(reg_t));	/* null entire record */
	new_r->url = xstrdup(template->url);	/* Save URL in reg ent */
	new_r->urls = strlen(new_r->url);
	new_r->GID = -1;

	/* Find a file to which to write the template */
	if (Mode == UPD_MODE) {
		if ((OutFile = COL_UPD_Obj_begin(new_r)) == NULL) {
			Log("WARNING: Cannot initialize update: %s (FD %d).\n",
			    new_r->url, new_r->FD);
			free_template(template);
			RG_Free_Entry(new_r);
			return ERROR;
		}
	} else {
		OutFile = NULL;
	}

	/* 
	 *  Walk the attribute-value list of the template, and save
	 *  away the needed reg_t values into new_r.  Also, normalize
	 *  all attribute names.
	 */
	COL_gid = (GathererID *) xmalloc(sizeof(GathererID));
	memset(COL_gid, '\0', sizeof(GathererID));
	COL_gid->GID = -1;

	for (walker = template->list; walker; walker = walker->next) {
		(void) COL_Normalize_Name(walker->data->attribute);
		(void) COL_Save_Att(walker->data, new_r);
	}

	/* pick off max update time for logging in LASTUPDATE */
	max_update_time = new_r->update_time > max_update_time ?
	    new_r->update_time : max_update_time;

	/* Now write the template to the file in the database, if needed */
	if (OutFile != NULL) {
		(void) init_print_template(OutFile);
		print_template(template);
		finish_print_template();
		(void) fclose(OutFile);
		OutFile = NULL;
	}
	free_template(template);	/* Don't need anymore */

	/* Verify/correct the reg_t record */
	if (COL_Fill_Entry(new_r) == ERROR) {

		/* backout of the changes */
		if (Mode == UPD_MODE)
			(void) SM_Destroy_Obj(new_r->FD);
		RG_Free_Entry(new_r);
		RG_gid_free(COL_gid);
		return ERROR;
	}
	RG_gid_free(COL_gid);

	/* Finish the job */
	switch (Mode) {
	case UPD_MODE:
		return (COL_UPD_Obj_end(new_r));
	case DEL_MODE:
		return (COL_DEL_Obj(new_r));
	case REF_MODE:
		return (COL_REF_Obj(new_r));
	default:
		break;
	}
	errorlog("P_parse_error: Internal error: Illegal Mode: %d\n", Mode);
	return (ERROR);
}

/* get next non-whitespace character on input stream */
int P_get_next_char(InFile)
     FILE *InFile;
{
	int tmp;

	tmp = getc(InFile);
	while (isspace((unsigned char) tmp)) {
		tmp = getc(InFile);
	}
	return (tmp);
}
