static char rcsid[] = "mergedb.c,v 1.20 1996/01/04 03:46:44 duane Exp";
/*
 *  mergedb.c - Merge Automatic and Manual version of Essence database.
 *
 *  Usage: mergedb production automatic manual [manual ...]
 *
 *  This program allows administrators to manually add information about
 *  objects (URLs) to the Essence database.  In order to preserve all of
 *  the automatically generated information about objects, the manually-
 *  and automatically-generated information are merged into a new,
 *  production database.  All manually-generated information overrides
 *  any automatically-generated information.
 *
 *  Darren Hardy, hardy@cs.colorado.edu, March 1994
 *
 *  ----------------------------------------------------------------------
 *  Copyright (c) 1994, 1995.  All rights reserved.
 *  
 *    The Harvest software was developed by the Internet Research Task
 *    Force Research Group on Resource Discovery (IRTF-RD):
 *  
 *          Mic Bowman of Transarc Corporation.
 *          Peter Danzig of the University of Southern California.
 *          Darren R. Hardy of the University of Colorado at Boulder.
 *          Udi Manber of the University of Arizona.
 *          Michael F. Schwartz of the University of Colorado at Boulder.
 *          Duane Wessels of the University of Colorado at Boulder.
 *  
 *    This copyright notice applies to software in the Harvest
 *    ``src/'' directory only.  Users should consult the individual
 *    copyright notices in the ``components/'' subdirectories for
 *    copyright information about other software bundled with the
 *    Harvest source code distribution.
 *  
 *  TERMS OF USE
 *    
 *    The Harvest software may be used and re-distributed without
 *    charge, provided that the software origin and research team are
 *    cited in any use of the system.  Most commonly this is
 *    accomplished by including a link to the Harvest Home Page
 *    (http://harvest.cs.colorado.edu/) from the query page of any
 *    Broker you deploy, as well as in the query result pages.  These
 *    links are generated automatically by the standard Broker
 *    software distribution.
 *    
 *    The Harvest software is provided ``as is'', without express or
 *    implied warranty, and with no support nor obligation to assist
 *    in its use, correction, modification or enhancement.  We assume
 *    no liability with respect to the infringement of copyrights,
 *    trade secrets, or any patents, and are not responsible for
 *    consequential damages.  Proper use of the Harvest software is
 *    entirely the responsibility of the user.
 *  
 *  DERIVATIVE WORKS
 *  
 *    Users may make derivative works from the Harvest software, subject 
 *    to the following constraints:
 *  
 *      - You must include the above copyright notice and these 
 *        accompanying paragraphs in all forms of derivative works, 
 *        and any documentation and other materials related to such 
 *        distribution and use acknowledge that the software was 
 *        developed at the above institutions.
 *  
 *      - You must notify IRTF-RD regarding your distribution of 
 *        the derivative work.
 *  
 *      - You must clearly notify users that your are distributing 
 *        a modified version and not the original Harvest software.
 *  
 *      - Any derivative product is also subject to these copyright 
 *        and use restrictions.
 *  
 *    Note that the Harvest software is NOT in the public domain.  We
 *    retain copyright, as specified above.
 *  
 *  HISTORY OF FREE SOFTWARE STATUS
 *  
 *    Originally we required sites to license the software in cases
 *    where they were going to build commercial products/services
 *    around Harvest.  In June 1995 we changed this policy.  We now
 *    allow people to use the core Harvest software (the code found in
 *    the Harvest ``src/'' directory) for free.  We made this change
 *    in the interest of encouraging the widest possible deployment of
 *    the technology.  The Harvest software is really a reference
 *    implementation of a set of protocols and formats, some of which
 *    we intend to standardize.  We encourage commercial
 *    re-implementations of code complying to this set of standards.  
 *  
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <gdbm.h>
#include "util.h"
#include "template.h"

/* Local functions */
static void usage();
static void mergedb();

/* Local variables */
static int nreplace = 0;

static void usage()
{
	fprintf(stderr, "Usage: mergedb production automatic manual [manual ...]\n");
	exit(1);
}

static void process_manual_template(proddb, data, key)
     GDBM_FILE proddb;
     datum data, key;
{
	Template *mt;
	datum k;

	init_parse_template_string(data.dptr, data.dsize);
	if ((mt = parse_template()) == NULL) {
		errorlog("Manual SOIF object is corrupt: %s\n", key.dptr);
		exit(1);
	}
	finish_parse_template();
	k.dptr = strdup(mt->url);
	k.dsize = strlen(k.dptr) + 1;
	if (gdbm_store(proddb, k, data, GDBM_REPLACE))
		errorlog("gdbm_store: %s: %s\n", k.dptr, gdbm_strerror(gdbm_errno));
	free_template(mt);
	free(k.dptr);
}

static void add_manual_to_production(filename, proddb)
     char *filename;
     GDBM_FILE proddb;
{
	GDBM_FILE dbf;
	datum k, nk, d;

	dbf = gdbm_open(filename, 0, GDBM_READER, 0644, NULL);
	if (dbf == NULL) {
		errorlog("gdbm_open: %s: %s\n", filename, gdbm_strerror(gdbm_errno));
		return;
	}
	k = gdbm_firstkey(dbf);
	while (k.dptr) {
		nk = gdbm_nextkey(dbf, k);
		d = gdbm_fetch(dbf, k);
		process_manual_template(proddb, d, k);
		free(k.dptr);
		free(d.dptr);
		k = nk;
	}
	gdbm_close(dbf);

}

/*
 *  merge_auto_data() - Converts ad and pd to templates, then merges the
 *  pd template into the ad template, then replaces the data for the
 *  proddb with the newly constructed template.
 */
static void merge_auto_data(proddb, k, ad, pd)
     GDBM_FILE proddb;
     datum k, ad, pd;
{
	Template *at, *pt;
	Buffer *b;
	datum d;

	init_parse_template_string(ad.dptr, ad.dsize);
	at = parse_template();
	finish_parse_template();

	init_parse_template_string(pd.dptr, pd.dsize);
	pt = parse_template();
	finish_parse_template();

	if (at == NULL || pt == NULL)
		return;

	merge_AVList(at->list, pt->list);

	b = init_print_template(NULL);
	print_template(at);
	d.dptr = b->data;
	d.dsize = b->length;
	gdbm_store(proddb, k, d, GDBM_REPLACE);
	nreplace++;
	finish_print_template();
}

/*
 *  merge_auto() - Merge the automatic information into the production
 *  database.
 */
static void merge_auto(autodb, proddb)
     GDBM_FILE autodb;
     GDBM_FILE proddb;
{
	datum ad, pd, k, nk;

	if (autodb == NULL)
		return;

	k = gdbm_firstkey(autodb);
	while (k.dptr) {
		nk = gdbm_nextkey(autodb, k);
		ad = gdbm_fetch(autodb, k);
		if (gdbm_exists(proddb, k)) {
			pd = gdbm_fetch(proddb, k);
			merge_auto_data(proddb, k, ad, pd);
			free(pd.dptr);
		} else {
			gdbm_store(proddb, k, ad, GDBM_INSERT);
		}
		free(k.dptr);
		free(ad.dptr);
		k = nk;
	}
}

/*
 *  To merge N manual databases with one automatic databases, the N 
 *  manual databases are consolidated into the production database.  
 *  (Entire Templates are overwritten if they were present in an earlier
 *  manual database).  Then each automatic template is merged into 
 *  the production database.
 */
int main(argc, argv)
     int argc;
     char *argv[];
{
	GDBM_FILE proddb, autodb;
	int value = 256;

	if (argc < 4)
		usage();
	init_log3("mergedb", stdout, stderr);

	proddb = gdbm_open(*++argv, 0, GDBM_NEWDB | GDBM_FAST, 0644, NULL);
	if (proddb == NULL) {
		errorlog("gdbm_open: %s: %s\n", *argv, gdbm_strerror(gdbm_errno));
		log_errno(*argv);
		exit(1);
	}
	gdbm_setopt(proddb, GDBM_CACHESIZE, &value, sizeof(int));

	autodb = gdbm_open(*++argv, 0, GDBM_READER, 0644, NULL);
	if (autodb == NULL && gdbm_errno != GDBM_EMPTY_DATABASE) {
		errorlog("gdbm_open: %s: %s\n", *argv, gdbm_strerror(gdbm_errno));
		gdbm_close(proddb);
		exit(1);
	}
	argc -= 2;
	while (--argc > 0) {
		add_manual_to_production(*++argv, proddb);
	}
	if (autodb != NULL) {
		merge_auto(autodb, proddb);
		gdbm_close(autodb);
	}
	if (nreplace > 64) {
		gdbm_sync(proddb);
		gdbm_reorganize(proddb);
	}
	gdbm_close(proddb);
	exit(0);
}
