static char rcsid[] = "data_object.c,v 1.30 1996/01/05 20:28:52 duane Exp";
/*
 *  data_object.c - Operations for DataObject manipulations.
 *
 *  DEBUG: section  60, level 1         Gatherer essence data object processing
 *
 *  Darren Hardy, hardy@cs.colorado.edu, February 1994
 *
 *  ----------------------------------------------------------------------
 *  Copyright (c) 1994, 1995.  All rights reserved.
 *  
 *    The Harvest software was developed by the Internet Research Task
 *    Force Research Group on Resource Discovery (IRTF-RD):
 *  
 *          Mic Bowman of Transarc Corporation.
 *          Peter Danzig of the University of Southern California.
 *          Darren R. Hardy of the University of Colorado at Boulder.
 *          Udi Manber of the University of Arizona.
 *          Michael F. Schwartz of the University of Colorado at Boulder.
 *          Duane Wessels of the University of Colorado at Boulder.
 *  
 *    This copyright notice applies to software in the Harvest
 *    ``src/'' directory only.  Users should consult the individual
 *    copyright notices in the ``components/'' subdirectories for
 *    copyright information about other software bundled with the
 *    Harvest source code distribution.
 *  
 *  TERMS OF USE
 *    
 *    The Harvest software may be used and re-distributed without
 *    charge, provided that the software origin and research team are
 *    cited in any use of the system.  Most commonly this is
 *    accomplished by including a link to the Harvest Home Page
 *    (http://harvest.cs.colorado.edu/) from the query page of any
 *    Broker you deploy, as well as in the query result pages.  These
 *    links are generated automatically by the standard Broker
 *    software distribution.
 *    
 *    The Harvest software is provided ``as is'', without express or
 *    implied warranty, and with no support nor obligation to assist
 *    in its use, correction, modification or enhancement.  We assume
 *    no liability with respect to the infringement of copyrights,
 *    trade secrets, or any patents, and are not responsible for
 *    consequential damages.  Proper use of the Harvest software is
 *    entirely the responsibility of the user.
 *  
 *  DERIVATIVE WORKS
 *  
 *    Users may make derivative works from the Harvest software, subject 
 *    to the following constraints:
 *  
 *      - You must include the above copyright notice and these 
 *        accompanying paragraphs in all forms of derivative works, 
 *        and any documentation and other materials related to such 
 *        distribution and use acknowledge that the software was 
 *        developed at the above institutions.
 *  
 *      - You must notify IRTF-RD regarding your distribution of 
 *        the derivative work.
 *  
 *      - You must clearly notify users that your are distributing 
 *        a modified version and not the original Harvest software.
 *  
 *      - Any derivative product is also subject to these copyright 
 *        and use restrictions.
 *  
 *    Note that the Harvest software is NOT in the public domain.  We
 *    retain copyright, as specified above.
 *  
 *  HISTORY OF FREE SOFTWARE STATUS
 *  
 *    Originally we required sites to license the software in cases
 *    where they were going to build commercial products/services
 *    around Harvest.  In June 1995 we changed this policy.  We now
 *    allow people to use the core Harvest software (the code found in
 *    the Harvest ``src/'' directory) for free.  We made this change
 *    in the interest of encouraging the widest possible deployment of
 *    the technology.  The Harvest software is really a reference
 *    implementation of a set of protocols and formats, some of which
 *    we intend to standardize.  We encourage commercial
 *    re-implementations of code complying to this set of standards.  
 *  
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <memory.h>
#include <unistd.h>
#include <errno.h>
#include <sys/types.h>
#include "url.h"
#include "util.h"
#include "essence.h"

/* Local functions */
static void remove_local_file();
static int init_file_object();
static int init_remote_object();

/*
 *  create_data_object() - Creates a DataObject structure for the given url.
 *  The given flags are associated with the object.  Returns NULL if
 *  the URL is invalid or unsupported.  Otherwise returns an initialized
 *  DataObject pointer where the following is guaranteed:
 *
 *  object->url is set
 *  object->flags is set to flags
 *  object->data is NULL
 *  object->type is NULL
 *  if object->url->type is URL_FILE, then
 *     object->url->filename is set
 *     object->s is set
 *     object->basename is set
 *     object->parent_url is NULL
 *  if object->url->type is URL_HTTP, then
 *     object->url->url is trimmed of any '#' directives
 *     object->basename is set
 *     object->parent_url is NULL
 *     object->flags has F_TEMPORARY
 */
DataObject *create_data_object(url, flags)
     char *url;
     unsigned int flags;
{
	static DataObject *object = NULL;

	Debug(60, 1, ("create_data_object(%s, %d)\n", url, flags));
	object = xmalloc(sizeof(DataObject));
	memset(object, '\0', sizeof(DataObject));

	/* Common DataObject Initializations */
	object->s = NULL;
	object->type = NULL;
	object->ttype = NULL;
	object->data = NULL;
	object->dsize = 0;
	object->flags = flags;
	object->avl = NULL;

	/* Validate and Initialize URL */
	if ((object->url = url_open(url)) == NULL) {
		xfree(object);
		return (NULL);
	}
	/* Validate the host */
	if (do_confhost && url_confirm(object->url)) {
		url_close(object->url);
		xfree(object);
		return (NULL);
	}
	/* Per URL object Initializations */
	switch (object->url->type) {
	case URL_FILE:
		if (init_file_object(object)) {
			url_close(object->url);
			xfree(object);
			return (NULL);
		}
		break;
	case URL_HTTP:
	case URL_FTP:
	case URL_GOPHER:
	case URL_NEWS:
	case URL_NOP:
	case URL_X:
		if (init_remote_object(object)) {
			url_close(object->url);
			xfree(object);
			return (NULL);
		}
		break;
	default:
		errorlog("Unsupported URL: create_data_object: %s\n",
		    object->url->url);
	}
	return (object);
}

/*
 *  create_dol() - Creates a DataObjectList using create_data_object().
 *  Returns NULL on error; otherwise returns a DataObjectList item.
 */
DataObjectList *create_dol(url, flags)
     char *url;
     unsigned int flags;
{
	static DataObjectList *ol;

	Debug(60, 1, ("create_dol(%s)\n", url));

	ol = xmalloc(sizeof(DataObjectList));
	if ((ol->object = create_data_object(url, flags)) == NULL) {
		xfree(ol);
		return (NULL);
	}
	ol->next = NULL;
	return (ol);
}

/*
 *  free_data_object() - Cleans up a DataObject.
 */
void free_data_object(object)
     DataObject *object;
{
	Debug(60, 1, ("free_data_object(%s)\n", object->url->url));
	if (object == NULL)
		return;

	if (object->flags & F_TEMPORARY)
		remove_local_file(object);

	if (object->avl)
		free_AVList(object->avl);
	if (object->url)
		url_close(object->url);
	if (object->data)
		xfree(object->data);
	if (object->type)
		xfree(object->type);
	if (object->parent_url)
		xfree(object->parent_url);
	if (object->s)
		xfree(object->s);
	xfree(object);
}

/*
 *  free_dol() - Frees a entire DataObjectList.
 */
void free_dol(object)
     DataObjectList *object;
{
	DataObjectList *walker = object, *tmp;

	while (walker) {
		free_data_object(walker->object);
		tmp = walker;
		walker = walker->next;
		xfree(tmp);
	}
}

/*
 *  remove_local_file() - Removes the object from the file system.
 */
static void remove_local_file(object)
     DataObject *object;
{
	char buf[MAXPATHLEN];

	if (!object->url || !object->url->filename || !object->s)
		return;

	Debug(60, 1, ("Removing %s (%s) -- %s\n", object->url->url,
		S_ISDIR(object->s->st_mode) ? "Directory" : "File",
		object->url->filename));

	if (S_ISDIR(object->s->st_mode)) {
		if (rmdir(object->url->filename) < 0) {
			if (errno == ENOTEMPTY) {
				/* dir is not empty, try harder */
				sprintf(buf, "/bin/rm -rf %s",
				    object->url->filename);
				run_cmd(buf);
			} else {
				log_errno2(__FILE__, __LINE__, object->url->filename);
			}
		}
	} else {
		if (unlink(object->url->filename) < 0)
			log_errno2(__FILE__, __LINE__, object->url->filename);
	}
}


/*
 *  init_file_object() - Special initialization for "file:" objects.
 */
static int init_file_object(object)
     DataObject *object;
{
	Debug(60, 1, ("init_file_object(%s)\n", object->url->filename));
	if ((object->basename = strrchr(object->url->filename, '/')) == NULL)
		object->basename = object->url->filename;
	else if (strlen(object->basename) > 1)	/* avoid "/" */
		object->basename++;

	object->parent_url = NULL;
	return (object_retrieve(object));
}

/*
 *  init_remote_object() - Special initialization for remote objects.
 */
static int init_remote_object(object)
     DataObject *object;
{
	object->basename = strrchr(object->url->pathname, '/');
	if (object->basename == NULL) {
		object->basename = object->url->pathname;
	} else {
		if (strlen(object->basename) > 1) {
			object->basename++;
		}
	}

	object->s = NULL;
	object->parent_url = NULL;
	object->flags |= F_TEMPORARY;
	return (0);
}

/*
 *  object_retrieve() - Retrieves the object to the local file system.
 *  Returns non-zero if the object cannot be retrieved; zero otherwise.
 */
int object_retrieve(object)
     DataObject *object;
{
	if (object->flags & F_NO_ACCESS || object->flags & F_MANUAL)
		return (1);

	Debug(60, 1, ("object_retrieve(%s,%d)\n", object->url->url, object->flags));

	if (url_retrieve(object->url)) {
		errorlog("object_retrieve: Failed to retrieve URL: %s\n",
		    object->url->url);
		object->flags |= F_NO_ACCESS;	/* negative cache it */
		return (1);
	}
	if (object->url->filename && object->s == NULL) {
		object->s = xmalloc(sizeof(struct stat));
		if (lstat(object->url->filename, object->s) < 0) {
			char buf[BUFSIZ];
			sprintf(buf, "lstat: %s", object->url->filename);
			log_errno2(__FILE__, __LINE__, buf);
			xfree(object->s);
			object->s = NULL;
			return (1);	/* sorry... */
		}
	}
	return (0);
}
