static char rcsid[] = "main.c,v 1.81 1996/02/01 06:34:07 duane Exp";
/*
 *  main.c - User front-end for the Essence system.
 *
 *  DEBUG: section  62, level 1         Gatherer essence main
 *
 *  Darren Hardy, hardy@cs.colorado.edu, February 1994
 *
 *  ----------------------------------------------------------------------
 *  Copyright (c) 1994, 1995.  All rights reserved.
 *  
 *    The Harvest software was developed by the Internet Research Task
 *    Force Research Group on Resource Discovery (IRTF-RD):
 *  
 *          Mic Bowman of Transarc Corporation.
 *          Peter Danzig of the University of Southern California.
 *          Darren R. Hardy of the University of Colorado at Boulder.
 *          Udi Manber of the University of Arizona.
 *          Michael F. Schwartz of the University of Colorado at Boulder.
 *          Duane Wessels of the University of Colorado at Boulder.
 *  
 *    This copyright notice applies to software in the Harvest
 *    ``src/'' directory only.  Users should consult the individual
 *    copyright notices in the ``components/'' subdirectories for
 *    copyright information about other software bundled with the
 *    Harvest source code distribution.
 *  
 *  TERMS OF USE
 *    
 *    The Harvest software may be used and re-distributed without
 *    charge, provided that the software origin and research team are
 *    cited in any use of the system.  Most commonly this is
 *    accomplished by including a link to the Harvest Home Page
 *    (http://harvest.cs.colorado.edu/) from the query page of any
 *    Broker you deploy, as well as in the query result pages.  These
 *    links are generated automatically by the standard Broker
 *    software distribution.
 *    
 *    The Harvest software is provided ``as is'', without express or
 *    implied warranty, and with no support nor obligation to assist
 *    in its use, correction, modification or enhancement.  We assume
 *    no liability with respect to the infringement of copyrights,
 *    trade secrets, or any patents, and are not responsible for
 *    consequential damages.  Proper use of the Harvest software is
 *    entirely the responsibility of the user.
 *  
 *  DERIVATIVE WORKS
 *  
 *    Users may make derivative works from the Harvest software, subject 
 *    to the following constraints:
 *  
 *      - You must include the above copyright notice and these 
 *        accompanying paragraphs in all forms of derivative works, 
 *        and any documentation and other materials related to such 
 *        distribution and use acknowledge that the software was 
 *        developed at the above institutions.
 *  
 *      - You must notify IRTF-RD regarding your distribution of 
 *        the derivative work.
 *  
 *      - You must clearly notify users that your are distributing 
 *        a modified version and not the original Harvest software.
 *  
 *      - Any derivative product is also subject to these copyright 
 *        and use restrictions.
 *  
 *    Note that the Harvest software is NOT in the public domain.  We
 *    retain copyright, as specified above.
 *  
 *  HISTORY OF FREE SOFTWARE STATUS
 *  
 *    Originally we required sites to license the software in cases
 *    where they were going to build commercial products/services
 *    around Harvest.  In June 1995 we changed this policy.  We now
 *    allow people to use the core Harvest software (the code found in
 *    the Harvest ``src/'' directory) for free.  We made this change
 *    in the interest of encouraging the widest possible deployment of
 *    the technology.  The Harvest software is really a reference
 *    implementation of a set of protocols and formats, some of which
 *    we intend to standardize.  We encourage commercial
 *    re-implementations of code complying to this set of standards.  
 *  
 */
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#include <memory.h>
#include <signal.h>
#include <sys/param.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <netdb.h>
#include <locale.h>
#include "util.h"
#define MAIN
#include "essence.h"
#undef MAIN

#if defined(DEBUG) && defined(_HARVEST_OSF_)
#include <malloc.h>
#endif

/* Local variables */
static char byname[MAXPATHLEN + 1];	/* file naming config file */
static char byurl[MAXPATHLEN + 1];	/* URL naming config file */
static char bycontent[MAXPATHLEN + 1];	/* file content config file */
static char magic[MAXPATHLEN + 1];	/* file(1) magic file */
static char *dbdir = NULL;	/* directory to put database */
static char *libpath = NULL;	/* directories to put config files */
static char *logfile = NULL;	/* file to log messages to */
static char *input_file = NULL;	/* file from which to get input files */
static char *gname = NULL;	/* Gatherer name */
static char *ghost = NULL;	/* Gatherer host */
static char *gver = NULL;	/* Gatherer version */
static int max_deletions = 0;	/* # of GDBM deletions before reorg */
static char *pp_rules_file = 0;	/* Rules for Post-processing */

/* Local functions */
static void init_gatherer_id();
static void nested_feeder();
static void process_object();
static void do_shutdown();
static void do_startup();
static void usage();

static char default_libpath[BUFSIZ];

static void usage()
{
	fprintf(stderr, "\
Usage: essence [options] -f input-URLs\n\
    OR essence [options] URL ...\n\
\n\
Essence version %s\n\
\n\
Options:\n\
\n\
  --allowlist filename    File with list of types to allow\n\
  --confirm-host          Explicitly confirm that host is valid\n\
  --dbdir directory       Directory to place database\n\
  --default-ttl seconds   Default time-to-live value\n\
  --default-refresh secs  Default refresh-rate value\n\
  --delete-duplicates     Delete duplicates during ``exploder''\n\
  --fake-md5s             Computes fake MD5s for SOIF generated by unnesting\n\
  --fast                  Use fast algorithms when possible. (May be risky.)\n\
  --fast-summarizing      Use ``fast'' summarizer.  (May be risky.)\n\
  --full-text             Use entire file instead of summarizing\n\
  --gatherer-host	  Gatherer-Host value\n\
  --gatherer-name	  Gatherer-Name value\n\
  --gatherer-version	  Gatherer-Version value\n\
  --help		  Print usage information\n\
  --libdir path           List of Directories to find configuration files\n\
  --log logfile           Name of the file to log messages to\n\
  --max-deletions n       Number of GDBM deletions before reorganization\n\
  --max-refresh n         Maximum number of objects to refresh\n\
  --memory-efficient      Try to be memory efficient at the expense of speed\n\
  --minimal-bookkeeping   Generates a minimal amount of bookkeeping attrs\n\
  --no-access		  Do not read contents of objects\n\
  --no-keywords		  Do not automatically generate keywords\n\
  --post-process filename Perform summary post-processing\n\
  --quiet                 Minimize logging output\n\
  --stoplist filename     File with list of types to remove\n\
  --tmpdir directory      Name of directory to use for temporary files\n\
  --type-only             Only type data; do not summarize objects\n\
  --verbose               Verbose output (the default)\n\
  --version               Version information\n",
	    GATHERER_VERSION);
	exit(1);
}

int main(argc, argv)
     int argc;
     char *argv[];
{
	DataObject *obj = NULL;
	unsigned int object_flags = 0;
#ifdef HAVE_GETCWD
	extern char *getcwd();
#else
	extern char *getwd();
#endif

	setlocale(LC_ALL, "");

	/* Initialize Globals */
	harvest_add_gatherer_path();
	sprintf(default_libpath, "%s/gatherer", harvest_libdir());
	verbose = 1;
	do_dupremove = 0;
	do_keywords = 1;
	do_fulltext = 0;
	do_typeonly = 0;
	do_minimalbooks = 0;
	do_fakemd5s = 0;
	memefficient = 0;
	do_confhost = 0;
	do_cksumdups = 1;
	do_fast = 0;
	gatherer_id = NULL;
	max_refresh = 0;
	default_ttl = DEFAULT_TTL;
	default_refresh = DEFAULT_REFRESH;
	tmpdir = stoplist = allowlist = NULL;
	topdir = xmalloc(MAXPATHLEN + 1);
#ifdef HAVE_GETCWD
	if (getcwd(topdir, MAXPATHLEN) == NULL) {
		perror("getcwd");
#else
	if (getwd(topdir) == NULL) {
		perror("getwd");
#endif
		exit(1);
	}
#ifdef USE_QUICKSUM
	quicksum_file = NULL;
#endif

	/* Process command line */
	if (argc < 2)
		usage();
	debug_init();
	for (argc--, argv++; argc > 0 && **argv == '-'; argc--, argv++) {
		if (!strcmp(*argv, "--help")) {
			usage();
		} else if (!strncmp(*argv, "-D", 2)) {
			debug_flag(*argv);
			verbose = 1;
		} else if (!strcmp(*argv, "--fake-md5s")) {
			do_fakemd5s = 1;
		} else if (!strcmp(*argv, "--delete-duplicates")) {
			do_dupremove = 1;
		} else if (!strcmp(*argv, "--full-text")) {
			do_fulltext = 1;
		} else if (!strcmp(*argv, "--fast")) {
			do_fast = 1;
		} else if (!strcmp(*argv, "--fast-summarizing")) {
			do_cksumdups = 0;
		} else if (!strcmp(*argv, "--confirm-host")) {
			do_confhost = 1;
		} else if (!strcmp(*argv, "--minimal-bookkeeping")) {
			do_minimalbooks = 1;
		} else if (!strcmp(*argv, "--memory-efficient")) {
			memefficient = 1;
		} else if (!strcmp(*argv, "--no-keywords")) {
			do_keywords = 0;
		} else if (!strcmp(*argv, "--no-access")) {
			object_flags |= F_NO_ACCESS;
		} else if (!strcmp(*argv, "--type-only")) {
			do_typeonly = 1;
		} else if (!strcmp(*argv, "--verbose")) {
			verbose = 1;
		} else if (!strcmp(*argv, "--quiet")) {
			verbose = 0;
		} else if (!strcmp(*argv, "--post-process")) {
			if (--argc < 1)
				usage();
			pp_rules_file = strdup(*++argv);
		} else if (!strcmp(*argv, "--version")) {
			printf("Version: %s\n", GATHERER_VERSION);
			exit(0);
		} else if (!strcmp(*argv, "--stoplist")) {
			if (--argc < 1)
				usage();
			stoplist = strdup(*++argv);
		} else if (!strcmp(*argv, "--allowlist")) {
			if (--argc < 1)
				usage();
			allowlist = strdup(*++argv);
		} else if (!strcmp(*argv, "--tmpdir")) {
			if (--argc < 1)
				usage();
			tmpdir = strdup(*++argv);
		} else if (!strcmp(*argv, "--log")) {
			if (--argc < 1)
				usage();
			logfile = strdup(*++argv);
		} else if (!strcmp(*argv, "--dbdir")) {
			if (--argc < 1)
				usage();
			dbdir = strdup(*++argv);
		} else if (!strcmp(*argv, "--libdir")) {
			if (--argc < 1)
				usage();
			libpath = xmalloc(BUFSIZ);
			sprintf(libpath, "%s:%s", *++argv, default_libpath);
		} else if (!strcmp(*argv, "--max-deletions")) {
			if (--argc < 1)
				usage();
			max_deletions = atoi(*++argv);
		} else if (!strcmp(*argv, "--max-refresh")) {
			if (--argc < 1)
				usage();
			max_refresh = atoi(*++argv);
			if (max_refresh < 1)
				max_refresh = 0;
		} else if (!strcmp(*argv, "-f")) {
			if (--argc < 1)
				usage();
			input_file = strdup(*++argv);
		} else if (!strcmp(*argv, "--gatherer-host")) {
			if (--argc < 1)
				usage();
			ghost = strdup(*++argv);
		} else if (!strcmp(*argv, "--gatherer-name")) {
			if (--argc < 1)
				usage();
			gname = strdup(*++argv);
		} else if (!strcmp(*argv, "--gatherer-version")) {
			if (--argc < 1)
				usage();
			gver = strdup(*++argv);
		} else if (!strcmp(*argv, "--default-ttl")) {
			if (--argc < 1)
				usage();
			default_ttl = atoi(*++argv);
			if (default_ttl < 1)
				default_ttl = DEFAULT_TTL;
		} else if (!strcmp(*argv, "--default-refresh")) {
			if (--argc < 1)
				usage();
			default_refresh = atoi(*++argv);
			if (default_refresh < 1)
				default_refresh = DEFAULT_REFRESH;
		} else {
			usage();
		}
	}
	/* Set the fast algoritms */
	if (do_fast) {
		do_cksumdups = 0;
	}
	if (libpath == NULL)
		libpath = strdup(default_libpath);

	/* Do initializations */
#ifdef HAVE_SETLINEBUF
	setlinebuf(stdout);
	setlinebuf(stderr);
#else
	setbuf(stdout, NULL);
	setbuf(stderr, NULL);
#endif
	if (verbose) {
		FILE *fp = stdout;

		if (logfile != NULL) {
			if ((fp = fopen(logfile, "a+")) == NULL) {
				perror(logfile);
				exit(1);
			}
			setbuf(fp, NULL);
		}
		init_log3("essence", fp, stderr);
	} else {
		init_log3("essence", NULL, stderr);
	}

	if (pp_rules_file)
		if (pp_parse_rules(pp_rules_file)) {
			errorlog("Unable to parse post-processing rules.\n");
			exit(1);
		}
	/* Initialize and verify correct environment */

#ifdef USE_QUICKSUM
	if (quicksum_file == NULL) {
		char buf[MAXPATHLEN + 1], *t, *s;
		int fd;
		s = strdup(libpath);
		for (t = strtok(s, ":"); t; t = strtok(NULL, ":")) {
			sprintf(buf, "%s/%s", t, USE_QUICKSUM_FILE);
			if ((fd = open(buf, O_RDONLY)) >= 0) {
				quicksum_file = strdup(buf);
				close(fd);
				break;
			}
		}
		free(s);
	}
	if (quicksum_file == NULL) {
		errorlog("Unable to locate %s in %s.\n",
		    USE_QUICKSUM_FILE, libpath);
		exit(1);
	}
#endif
	if (stoplist == NULL) {
		char buf[MAXPATHLEN + 1], *t, *s;
		int fd;
		s = strdup(libpath);
		for (t = strtok(s, ":"); t; t = strtok(NULL, ":")) {
			sprintf(buf, "%s/%s", t, USE_STOPLIST);
			if ((fd = open(buf, O_RDONLY)) >= 0) {
				stoplist = strdup(buf);
				close(fd);
				break;
			}
		}
		free(s);
	}
	if (stoplist == NULL) {
		errorlog("Unable to locate %s in %s.\n",
		    USE_STOPLIST, libpath);
		exit(1);
	}
	if (tmpdir == NULL) {
		tmpdir = getenv("TMPDIR") ? strdup(getenv("TMPDIR")) :
		    strdup(USE_TMPDIR);
	}
	if (access(stoplist, R_OK) < 0) {
		log_errno(stoplist);
		exit(1);
	}
	if ((allowlist != NULL) && (access(allowlist, R_OK) < 0)) {
		log_errno(allowlist);
		exit(1);
	}
	if (input_file != NULL && strcmp(input_file, "-") != 0 &&
	    access(input_file, R_OK) < 0) {
		log_errno(input_file);
		usage();
	}
	if (access(tmpdir, W_OK) < 0) {
		log_errno(tmpdir);
		exit(1);
	}
	do_startup();

	/* NOTE: DO NOT catch SIGCHLD; we always do explict waits in Essence */
	signal(SIGABRT, do_shutdown);	/* die gracefully */
	signal(SIGTERM, do_shutdown);
	signal(SIGINT, do_shutdown);

	/* Process */
	if (input_file != NULL) {
		FILE *fp;
		char buf[BUFSIZ], tbuf[BUFSIZ], *s;
		int t;

		if (!strcmp(input_file, "-"))
			fp = stdin;
		else {
			if ((fp = fopen(input_file, "r")) == NULL) {
				log_errno(input_file);
				usage();
			}
		}
		/*
		 *  The input looks like:
		 *     URL<tab>MD5:adfasdfasdfasdfasdfasd
		 *     URL<tab>Last-Modification-Time:12345
		 */
		while (fgets(buf, BUFSIZ, fp) != NULL) {
			strcpy(tbuf, buf);	/* make a copy */
			Debug(62, 1, ("Input Line: %s", tbuf));
			if (buf[0] == '#') {
				continue;	/* skip comments */
			}
			if ((s = strrchr(buf, '\n')) == NULL) {
				errorlog("Illegal input: %s\n", tbuf);
				continue;
			}
			*s = '\0';	/* strip newline */
			if ((s = strchr(buf, '\t')) != NULL) {
				*s++ = '\0';	/* delineate at the tab */
			}
			/* 
			 *  For MD5's: check database and skip if unchanged
			 *  For LMT's: check database and skip if unchanged
			 *  For no meta data, just pass it through
			 */
			if (s && !strncasecmp(s, T_MD5, strlen(T_MD5))) {
				if (dbcheck_md5(buf, s + strlen(T_MD5) + 1)) {
					continue;
				}
			} else if (s && !strncasecmp(s, T_LMT, strlen(T_LMT))) {
				t = atoi(s + strlen(T_LMT) + 1);
				if (dbcheck_timestamp(buf, t)) {
					continue;
				}
			}
			obj = create_data_object(buf, object_flags);
			if (obj == NULL) {
				errorlog("Cannot create object for %s\n", tbuf);
				continue;
			}
			process_object(obj);
			free_data_object(obj);
		}
		fclose(fp);
	} else {
		for (; argc > 0; argc--, argv++) {
			obj = create_data_object(*argv, object_flags);
			if (obj == NULL) {
				errorlog("Cannot create object for %s\n", *argv);
				continue;
			}
			process_object(obj);
			free_data_object(obj);
		}
	}

	/* Clean up */
	do_shutdown(0);
	exit(0);
}

static void do_startup()
{
	char *libpathbuf, *s, *t;
	int fd;
	char buf[BUFSIZ];

	libpathbuf = xmalloc(strlen(libpath) + 64);
	memset(libpathbuf, '\0', strlen(libpath) + 64);
	sprintf(libpathbuf, "SUMMARIZER_LIBPATH=%s", libpath);
	if (putenv(libpathbuf) < 0) {
		log_errno("putenv");
	}
	sprintf(byname, "%s/%s", default_libpath, USE_BYNAME);
	sprintf(byurl, "%s/%s", default_libpath, USE_BYURL);
	sprintf(bycontent, "%s/%s", default_libpath, USE_BYCONTENT);
	sprintf(magic, "%s/%s", default_libpath, USE_MAGIC);

	s = strdup(libpath);
	for (t = strtok(s, ":"); t; t = strtok(NULL, ":")) {
		sprintf(buf, "%s/%s", t, USE_BYNAME);
		if ((fd = open(buf, O_RDONLY)) >= 0) {
			strcpy(byname, buf);
			close(fd);
			break;
		}
	}
	free(s);

	s = strdup(libpath);
	for (t = strtok(s, ":"); t; t = strtok(NULL, ":")) {
		sprintf(buf, "%s/%s", t, USE_BYURL);
		if ((fd = open(buf, O_RDONLY)) >= 0) {
			strcpy(byurl, buf);
			close(fd);
			break;
		}
	}
	free(s);

	s = strdup(libpath);
	for (t = strtok(s, ":"); t; t = strtok(NULL, ":")) {
		sprintf(buf, "%s/%s", t, USE_BYCONTENT);
		if ((fd = open(buf, O_RDONLY)) >= 0) {
			strcpy(bycontent, buf);
			close(fd);
			break;
		}
	}
	free(s);

	s = strdup(libpath);
	for (t = strtok(s, ":"); t; t = strtok(NULL, ":")) {
		sprintf(buf, "%s/%s", t, USE_MAGIC);
		if ((fd = open(buf, O_RDONLY)) >= 0) {
			strcpy(magic, buf);
			close(fd);
			break;
		}
	}
	free(s);

	init_url();
	init_gatherer_id();
	if (init_type_recognize(byname, bycontent, byurl, magic)) {
		errorlog("init_type_recognize(%s, %s, %s, %s) failed.\n",
		    byname, bycontent, byurl, magic);
		exit(1);
	}
	init_stoplist();
	if (!do_typeonly) {
		init_presentation_unnest();
		init_summarize();
		init_db(dbdir, max_deletions);
	}
}

static void print_memory_stats()
{
#if defined(DEBUG) && defined(_HARVEST_OSF_)
	struct mallinfo mi = mallinfo();
	Log("malloc statistics:\n");
	Log("  total space in arena: %d\n", mi.arena);
	Log("  number of ordinary blocks: %d\n", mi.ordblks);
	Log("  number of small blocks: %d\n", mi.smblks);
	Log("  number of holding blocks: %d\n", mi.hblks);
	Log("  space in holding blocks: %d\n", mi.hblkhd);
	Log("  space in small blocks in use: %d\n", mi.usmblks);
	Log("  space in free blocks: %d\n", mi.fsmblks);
	Log("  space in ordinary blocks in use: %d\n", mi.uordblks);
	Log("  space in free blocks: %d\n", mi.fordblks);
	Log("  cost of enabling keep option: %d\n", mi.keepcost);
#endif
	return;
}

static void do_shutdown(x)
     int x;
{
	finish_url();
	finish_type_recognize();
	finish_stoplist();
	if (!do_typeonly) {
		finish_presentation_unnest();
		finish_summarize();
		finish_db();
	}
	if (x != 0)
		Log("Terminated abnormally (%d)...\n", x);
	else
		Log("Terminated normally.\n");
	print_memory_stats();
	exit(x);
}

static void init_gatherer_id()
{
	gatherer_id = xmalloc(sizeof(struct GID));
	gatherer_id->name = strdup(gname ? gname : "Essence");
	gatherer_id->version = strdup(gver ? gver : GATHERER_VERSION);
	if (ghost) {
		gatherer_id->host = strdup(ghost);
	} else {
		ghost = strdup(getfullhostname());
		gatherer_id->host = strdup(ghost);
	}

	Log("Running Gatherer...\n");
	Log("Gatherer-Name:\t%s\n", gatherer_id->name);
	Log("Gatherer-Host:\t%s\n", gatherer_id->host);
	Log("Gatherer-Version:\t%s\n", gatherer_id->version);
}

/*
 *  process_object() - Main guts of Essence.  First, types the object, 
 *  performs candidate selection, then either unnest it or summarizes it.
 */
static void process_object(object)
     DataObject *object;
{
#ifdef DEBUG
	print_memory_stats();
#endif
	Debug(62, 1, ("process_object(%s)\n", object->url->url));
	/* Candidate Selection by Name */
	if (allowlist == NULL && stop_byname(object)) {
		Log("Removing %s from candidate list -- name.\n",
		    object->url->url);
		return;
	}
	/* Type Recognition */
	if (object->type == NULL && type_recognize(object)) {
		errorlog("Cannot recognize type for %s\n", object->url->url);
		return;
	}
	/* Print the type and return if type's only; print directly to stdout */
	if (do_typeonly) {
		printf("Type: %s %s\n", object->type, object->url->url);
		return;
	}
	/* print "URL <TAB> Type" */
	/* print (L) if local mapping worked */
	Log("%s\t%s%s\n",
		object->url->url,
		object->type,
		object->url->flags & URL_FLAG_LOCAL_MAPPED ? " [L]" : "");

	/* Candidate Selection by Type and by Duplicate */
	if (allowlist != NULL && !allow_bytype(object)) {
		Log("Removing %s (%s) from candidate list -- type.\n",
		    object->url->url, object->type);
		return;
	}
	if (allowlist == NULL && stop_bytype(object)) {
		Log("Removing %s (%s) from candidate list -- type.\n",
		    object->url->url, object->type);
		return;
	}
	if (allowlist == NULL && stop_byduplicate(object)) {
		if (do_dupremove) {
			db_delete_byurl(object->url->url);
		} else {
			Log("Removing %s (%s) from candidate list -- duplicate.\n", object->url->url, object->type);
			return;
		}
	}
	/* Summarize or Presentation Unnest */
	if (object->flags & F_MANUAL) {
		summarize(object);
	} else if (is_nested_type(object->type)) {
		nested_feeder(object);
	} else if (!do_typeonly) {
		summarize(object);
	}
}

/*
 *  nested_feeder() - Takes a nested object an unnests it.
 *  XXX: Should re-write so that the unnester is an iterator.
 */
static void nested_feeder(object)
     DataObject *object;
{
	DataObjectList *ol, *walker, *tol;
	int nc = 0, nmakefile = 0;

	/* Summarize it first */
	summarize_nested_object(object);

	/* Unnest the object */
	if ((ol = presentation_unnest(object)) == NULL) {
		errorlog("Cannot unnest %s\n", object->url->url);
		return;
	}
	/* Type the extracted data first */
	for (walker = ol; walker; walker = walker->next) {
		if (walker->object == NULL) {
			errorlog("Fatal Internal: NULL object from unnest.\n");
			exit(1);
		}
		Debug(62, 1, ("Extracted: %s %p\n", walker->object->url->url,
			walker->object->type));
		if (walker->object->type == NULL)
			(void) type_recognize(walker->object);
	}

	/* Recognize bundles */
	for (walker = ol; walker; walker = walker->next) {
		if (walker->object->type == NULL)
			continue;
		if (!strcmp(walker->object->type, "C"))
			nc++;
		else if (!strcmp(walker->object->type, "CHeader"))
			nc++;
		else if (!strcmp(walker->object->type, "Makefile"))
			nmakefile++;
	}
	if (nc > 1 && nmakefile > 0 && !strcmp(object->type, "Directory")) {
		xfree(object->type);
		object->type = strdup("SourceDistribution");
		process_object(object);
		free_dol(ol);
		return;
	}
	/* Process the extracted files */
	walker = ol;
	while (walker != NULL) {
		process_object(walker->object);
		tol = walker;
		walker = walker->next;
		free_data_object(tol->object);
		xfree(tol);
	}
}
