static char rcsid[] = "stor_reg.c,v 1.41 1995/09/05 18:30:02 duane Exp";
/* 
 *  stor_reg.c -- Primitives for Registry Storage on disk
 *
 *  Maintains the registry on disk.
 *
 *  Darren Hardy, University of Colorado, Boulder, September 1994
 *
 *  ----------------------------------------------------------------------
 *  Copyright (c) 1994, 1995.  All rights reserved.
 *  
 *    The Harvest software was developed by the Internet Research Task
 *    Force Research Group on Resource Discovery (IRTF-RD):
 *  
 *          Mic Bowman of Transarc Corporation.
 *          Peter Danzig of the University of Southern California.
 *          Darren R. Hardy of the University of Colorado at Boulder.
 *          Udi Manber of the University of Arizona.
 *          Michael F. Schwartz of the University of Colorado at Boulder.
 *          Duane Wessels of the University of Colorado at Boulder.
 *  
 *    This copyright notice applies to software in the Harvest
 *    ``src/'' directory only.  Users should consult the individual
 *    copyright notices in the ``components/'' subdirectories for
 *    copyright information about other software bundled with the
 *    Harvest source code distribution.
 *  
 *  TERMS OF USE
 *    
 *    The Harvest software may be used and re-distributed without
 *    charge, provided that the software origin and research team are
 *    cited in any use of the system.  Most commonly this is
 *    accomplished by including a link to the Harvest Home Page
 *    (http://harvest.cs.colorado.edu/) from the query page of any
 *    Broker you deploy, as well as in the query result pages.  These
 *    links are generated automatically by the standard Broker
 *    software distribution.
 *    
 *    The Harvest software is provided ``as is'', without express or
 *    implied warranty, and with no support nor obligation to assist
 *    in its use, correction, modification or enhancement.  We assume
 *    no liability with respect to the infringement of copyrights,
 *    trade secrets, or any patents, and are not responsible for
 *    consequential damages.  Proper use of the Harvest software is
 *    entirely the responsibility of the user.
 *  
 *  DERIVATIVE WORKS
 *  
 *    Users may make derivative works from the Harvest software, subject 
 *    to the following constraints:
 *  
 *      - You must include the above copyright notice and these 
 *        accompanying paragraphs in all forms of derivative works, 
 *        and any documentation and other materials related to such 
 *        distribution and use acknowledge that the software was 
 *        developed at the above institutions.
 *  
 *      - You must notify IRTF-RD regarding your distribution of 
 *        the derivative work.
 *  
 *      - You must clearly notify users that your are distributing 
 *        a modified version and not the original Harvest software.
 *  
 *      - Any derivative product is also subject to these copyright 
 *        and use restrictions.
 *  
 *    Note that the Harvest software is NOT in the public domain.  We
 *    retain copyright, as specified above.
 *  
 *  HISTORY OF FREE SOFTWARE STATUS
 *  
 *    Originally we required sites to license the software in cases
 *    where they were going to build commercial products/services
 *    around Harvest.  In June 1995 we changed this policy.  We now
 *    allow people to use the core Harvest software (the code found in
 *    the Harvest ``src/'' directory) for free.  We made this change
 *    in the interest of encouraging the widest possible deployment of
 *    the technology.  The Harvest software is really a reference
 *    implementation of a set of protocols and formats, some of which
 *    we intend to standardize.  We encourage commercial
 *    re-implementations of code complying to this set of standards.  
 *  
 */
#include "broker.h"
#include "log.h"

static int rfd = -1;		/* Registry file descriptor */
static char *registry_file = NULL;	/* Registry file name */

/* 
 *  init_registry_file() - Initializes the Registry file.  MUST call
 *  this function before any other of the below functions.
 */
int init_registry_file()
{
	char *s = UTIL_make_admin_filename("Registry");
	int r;

	r = set_registry_file(s);
	xfree(s);
	return (r);
}

int set_registry_file(filename)
char *filename;
{
	registry_file = xstrdup(filename);
	if ((rfd = open(registry_file, O_RDWR | O_CREAT, 0664)) < 0) {
		log_errno(registry_file);
		errorlog("Cannot write to Registry file: %s\n", registry_file);
		return ERROR;
	}
	return SUCCESS;
}

char *get_registry_file()
{
	return (xstrdup(registry_file));
}

/*
 *  finish_registry_file() - Cleans up the registry file.
 */
void finish_registry_file()
{
	xfree(registry_file);
	close(rfd);
	registry_file = NULL;
	rfd = -1;
}

#if defined(__FreeBSD__)
long my_lseek(fd, offset, whence)
int fd;
long offset;
int whence;
{
        off_t lseek(int, off_t, int);

        return((long) lseek(fd, (off_t) offset, whence));
}
#endif

/*
 *  seek_registry() - Interface to lseek(2) for the registry file.
 */
off_t seek_registry(offset, mark)
off_t offset;
int mark;
{
	static off_t pos;

#if defined(__FreeBSD__)
	if ((pos = my_lseek(rfd, offset, mark)) < 0) {
		log_errno("seek_registry: lseek");
	}
#else
	if ((pos = lseek(rfd, offset, mark)) < 0) {
		log_errno("seek_registry: lseek");
	}
#endif
	return pos;
}


/*
 *  read_header() - Reads the header from the Registry file.  Must call
 *  init_registry_file() before calling this function.  Will return a 
 *  pointer to malloc'ed space that contains the registry header upon
 *  success.  If the header's magic number or version number aren't
 *  consistent, then returns NULL.  On error, returns NULL.
 *  If the registry file is empty, then it returns REGISTRY_EOF.
 */
REGISTRY_HEADER *read_header()
{
	static REGISTRY_HEADER *rhdr;
	int n;

	if (seek_registry(0, SEEK_SET) < 0) {
		return (NULL);
	}
	rhdr = (REGISTRY_HEADER *) xmalloc(sizeof(REGISTRY_HEADER));
	if ((n = read(rfd, rhdr, sizeof(REGISTRY_HEADER))) < 0) {
		log_errno(registry_file);
		xfree(rhdr);
		return (NULL);
	}
	if (n == 0) {
		xfree(rhdr);
		return ((REGISTRY_HEADER *) REGISTRY_EOF);
	}
	rhdr->magic = ntohl(rhdr->magic);
	rhdr->version = ntohl(rhdr->version);
	rhdr->nrecords = ntohl(rhdr->nrecords);
	rhdr->nrecords_deleted = ntohl(rhdr->nrecords_deleted);
	rhdr->nrecords_valid = ntohl(rhdr->nrecords_valid);

	/* Check consistency of the header */
	if (rhdr->magic != REGISTRY_MAGIC) {
		errorlog("read_header: Registry is corrupt.\n");
		xfree(rhdr);
		return (NULL);
	}
	if (rhdr->version != REGISTRY_VERSION) {
		errorlog("read_header: Unknown Registry version %d\n",
			rhdr->version);
		xfree(rhdr);
		return (NULL);
	}
	return (rhdr);
}

/*
 *  write_header() - Writes the header from the Registry file.  Must call
 *  init_registry_file() before calling this function.  If the header's 
 *  magic number or version number aren't consistent, then returns ERROR 
 *  and doesn't write to the file.  On error, returns ERROR.
 */
int write_header(rhdr)
REGISTRY_HEADER *rhdr;
{
	REGISTRY_HEADER tmp;

	/* Check consistency of the header */
	if (rhdr->magic != REGISTRY_MAGIC) {
		errorlog("write_header: Registry is corrupt.\n");
		return ERROR;
	}
	if (rhdr->version != REGISTRY_VERSION) {
		errorlog("write_header: Unknown Registry version %d\n",
			rhdr->version);
		return ERROR;
	}
	tmp.magic = htonl(rhdr->magic);
	tmp.version = htonl(rhdr->version);
	tmp.nrecords = htonl(rhdr->nrecords);
	tmp.nrecords_deleted = htonl(rhdr->nrecords_deleted);
	tmp.nrecords_valid = htonl(rhdr->nrecords_valid);

	if (seek_registry(0, SEEK_SET) < 0) {
		return ERROR;
	}
	if (write(rfd, &tmp, sizeof(REGISTRY_HEADER)) < 0) {
		log_errno(registry_file);
		return ERROR;
	}
	return SUCCESS;
}

/*
 *  read_record() - Reads the next record from the registry.  To reset
 *  to the beginning of the registry file, use read_header().  MUST
 *  call read_header() before the first read_record().  For example,
 *
 *              read_header()
 *              while (read_record() != REGISTRY_EOF) { };
 *  
 *  Will return REGISTRY_EOF when finished reading the Registry.
 *  Will read the next record from the registry, parse it, writes the
 *  appropriate values to the allocated registry_entry, then returns SUCCESS.  
 *  If the record is marked deleted, then it returns ENTRY_DELETED and 
 *  doesn't write any data to registry_entry.  On error, it returns ERROR 
 *  and the data written to registry_entry is undefined.
 */
int read_record(registry_entry)
reg_t *registry_entry;
{
	RECORD_HEADER rhdr;
	char *record, *rp;
	GathererID MyGid;
	int n;
	num32 x;

	/* Read and canonicalize the record header */
	if ((n = read(rfd, &rhdr, sizeof(RECORD_HEADER))) < 0) {
		log_errno(registry_file);
		return ERROR;
	}
	if (n == 0)
		return REGISTRY_EOF;

	rhdr.record_size = ntohl(rhdr.record_size);
	rhdr.magic = ntohl(rhdr.magic);
	rhdr.flag = ntohl(rhdr.flag);

	/* See if the header is corrupt */
	if (rhdr.magic != REGISTRY_MAGIC) {
		errorlog("Record Header is corrupt at offset %d: 0x%08x 0x%08x 0x%08x\n",
			seek_registry(0, SEEK_CUR),
			rhdr.record_size, rhdr.magic, rhdr.flag);
		return ERROR;
	}
	/* Check to see if the record is deleted.  If so, skip nbytes */
	if (IS_DELETED(rhdr.flag)) {
		if (seek_registry(rhdr.record_size, SEEK_CUR) < 0) {
			return ERROR;
		}
		return ENTRY_DELETED;
	}
	/* We're ready to read the record.  malloc the space and read */
	record = (char *) xmalloc(rhdr.record_size);
	if ((n = read(rfd, record, rhdr.record_size)) < 0) {
		log_errno(registry_file);
		xfree(record);
		return ERROR;
	}
	/* 
	 *  Now parse the record into the registry_entry space 
	 *
	 *  Each record looks exactly like this:
	 *
	 *      4 network-order bytes of URL size
	 *      n bytes of URL
	 *      4 network-order bytes of Gatherer-Name size
	 *      n bytes of Gatherer-Name
	 *      4 network-order bytes of Gatherer-Host size
	 *      n bytes of Gatherer-Host
	 *      4 network-order bytes of Gatherer-Version size
	 *      n bytes of Gatherer-Version
	 *      4 network-order bytes of MD5
	 *      n bytes of MD5
	 *      4 network-order bytes of Description size
	 *      n bytes of Description
	 *      4 network-order bytes of FD
	 *      4 network-order bytes of TTL
	 *      4 network-order bytes of L-M-T
	 *      4 network-order bytes of Refresh-Rate
	 *      4 network-order bytes of Update-Time
	 */
	rp = record;
	memcpy(&x, rp, NUM32LEN);	/* Grab URL size and URL */
	rp += NUM32LEN;
	registry_entry->urls = ntohl(x);
	registry_entry->url = (char *) xmalloc(registry_entry->urls + 1);
	memcpy(registry_entry->url, rp, registry_entry->urls);
	registry_entry->url[registry_entry->urls] = '\0';
	rp += registry_entry->urls;

	memcpy(&x, rp, NUM32LEN);	/* Grab Gname size and Gname */
	rp += NUM32LEN;
	MyGid.gns = ntohl(x);
	MyGid.gn = (char *) xmalloc(MyGid.gns + 1);
	memcpy(MyGid.gn, rp, MyGid.gns);
	MyGid.gn[MyGid.gns] = '\0';
	rp += MyGid.gns;

	memcpy(&x, rp, NUM32LEN);	/* Grab Ghost size and Ghost */
	rp += NUM32LEN;
	MyGid.ghs = ntohl(x);
	MyGid.gh = (char *) xmalloc(MyGid.ghs + 1);
	memcpy(MyGid.gh, rp, MyGid.ghs);
	MyGid.gh[MyGid.ghs] = '\0';
	rp += MyGid.ghs;

	memcpy(&x, rp, NUM32LEN);	/* Grab Gver size and Gver */
	rp += NUM32LEN;
	MyGid.gvs = ntohl(x);
	MyGid.gv = (char *) xmalloc(MyGid.gvs + 1);
	memcpy(MyGid.gv, rp, MyGid.gvs);
	MyGid.gv[MyGid.gvs] = '\0';
	rp += MyGid.gvs;

	/* Set the GathererID, and clean up */
	MyGid.GID = -1;
	registry_entry->GID = RG_gid_register(&MyGid);
	xfree(MyGid.gn);
	xfree(MyGid.gh);
	xfree(MyGid.gv);

	memcpy(&x, rp, NUM32LEN);	/* Grab MD5 size and MD5 */
	rp += NUM32LEN;
	registry_entry->md5s = ntohl(x);
	registry_entry->md5 = (char *) xmalloc(registry_entry->md5s + 1);
	memcpy(registry_entry->md5, rp, registry_entry->md5s);
	registry_entry->md5[registry_entry->md5s] = '\0';
	rp += registry_entry->md5s;

	memcpy(&x, rp, NUM32LEN);	/* Grab Desc size and Desc */
	rp += NUM32LEN;
	registry_entry->descs = ntohl(x);
	registry_entry->desc = (char *) xmalloc(registry_entry->descs + 1);
	memcpy(registry_entry->desc, rp, registry_entry->descs);
	registry_entry->desc[registry_entry->descs] = '\0';
	rp += registry_entry->descs;

	memcpy(&x, rp, NUM32LEN);	/* Grab FD */
	rp += NUM32LEN;
	registry_entry->FD = ntohl(x);

	memcpy(&x, rp, NUM32LEN);	/* Grab TTL */
	rp += NUM32LEN;
	registry_entry->ttl = ntohl(x);

	memcpy(&x, rp, NUM32LEN);	/* Grab L-M-T */
	rp += NUM32LEN;
	registry_entry->lmt = ntohl(x);

	memcpy(&x, rp, NUM32LEN);	/* Grab Refresh-Rate */
	rp += NUM32LEN;
	registry_entry->refresh_rate = ntohl(x);

	memcpy(&x, rp, NUM32LEN);	/* Grab Update-Time */
	rp += NUM32LEN;
	registry_entry->update_time = ntohl(x);

	xfree(record);
	return SUCCESS;
}

/* ----------------------------------------------------------------- *
   write_record() - Write the record to the registry.  To reset
   to the beginning of the registry file, use read_header().  MUST
   call read_header() (or write_header) before the first read_record().  
   For example,

   read_header()
   while (write_record(r) == SUCCESS) { r = something_new(); };

   Will write the next record to the registry, then returns SUCCESS.  
   On error, it returns ERROR.
   * ----------------------------------------------------------------- */
int write_record(registry_entry)
reg_t *registry_entry;
{
	RECORD_HEADER rhdr;
	char *record, *rp;
	GathererID *MyGid;
	int n;
	num32 x;

	/* Grab a pointer to the GathererID */
	MyGid = RG_gid(registry_entry->GID);
	if (MyGid == NULL) {
		errorlog("write_record: Internal error with Gatherer ID!\n");
		return ERROR;
	}

	n = registry_entry->urls + NUM32LEN +	/* url + size */
	    MyGid->gns + NUM32LEN +		/* gatherer name + size */
	    MyGid->ghs + NUM32LEN +		/* gatherer host + size */
	    MyGid->gvs + NUM32LEN +		/* gatherer ver + size */
	    registry_entry->md5s + NUM32LEN +	/* MD5 + size */
	    registry_entry->descs + NUM32LEN +	/* description + size */
	    (5 * NUM32LEN);	/* 5 numbers */

	rhdr.record_size = htonl(n);
	rhdr.magic = htonl(REGISTRY_MAGIC);
	rhdr.flag = htonl(registry_entry->flag);

	if (write(rfd, &rhdr, sizeof(RECORD_HEADER)) < 0) {
		log_errno(registry_file);
		return ERROR;
	}
	record = (char *) xmalloc(n);
	rp = record;

	x = htonl(registry_entry->urls);	/* Copy over URL and size */
	memcpy(rp, &x, NUM32LEN);
	rp += NUM32LEN;
	memcpy(rp, registry_entry->url, registry_entry->urls);
	rp += registry_entry->urls;

	x = htonl(MyGid->gns);			/* Copy over Gname and size */
	memcpy(rp, &x, NUM32LEN);
	rp += NUM32LEN;
	memcpy(rp, MyGid->gn, MyGid->gns);
	rp += MyGid->gns;

	x = htonl(MyGid->ghs);			/* Copy over Ghost and size */
	memcpy(rp, &x, NUM32LEN);
	rp += NUM32LEN;
	memcpy(rp, MyGid->gh, MyGid->ghs);
	rp += MyGid->ghs;

	x = htonl(MyGid->gvs);			/* Copy over Gver and size */
	memcpy(rp, &x, NUM32LEN);
	rp += NUM32LEN;
	memcpy(rp, MyGid->gv, MyGid->gvs);
	rp += MyGid->gvs;

	x = htonl(registry_entry->md5s);	/* Copy over MD5 and size */
	memcpy(rp, &x, NUM32LEN);
	rp += NUM32LEN;
	memcpy(rp, registry_entry->md5, registry_entry->md5s);
	rp += registry_entry->md5s;

	x = htonl(registry_entry->descs);	/* Copy over Desc and size */
	memcpy(rp, &x, NUM32LEN);
	rp += NUM32LEN;
	memcpy(rp, registry_entry->desc, registry_entry->descs);
	rp += registry_entry->descs;

	x = htonl(registry_entry->FD);	/* Copy over FD */
	memcpy(rp, &x, NUM32LEN);
	rp += NUM32LEN;

	x = htonl(registry_entry->ttl);		/* Copy over TTL */
	memcpy(rp, &x, NUM32LEN);
	rp += NUM32LEN;

	x = htonl(registry_entry->lmt);		/* Copy over L-M-T */
	memcpy(rp, &x, NUM32LEN);
	rp += NUM32LEN;

	x = htonl(registry_entry->refresh_rate);	/* Copy over Refresh-Rate */
	memcpy(rp, &x, NUM32LEN);
	rp += NUM32LEN;

	x = htonl(registry_entry->update_time);		/* Copy over Update-Time */
	memcpy(rp, &x, NUM32LEN);
	rp += NUM32LEN;

	if (write(rfd, record, n) < 0) {
		log_errno(registry_file);
		xfree(record);
		return ERROR;
	}
	xfree(record);
	return SUCCESS;
}

/* ----------------------------------------------------------------- *
   replace_record() -- replace a record with one that is known to be
   the same size; this happens when the expiration time is updated.
   * ----------------------------------------------------------------- */
int replace_record(regent)
reg_t *regent;
{
	if (seek_registry(regent->rec_off, SEEK_SET) < 0)
		return ERROR;

	return (write_record(regent));
}

/* ----------------------------------------------------------------- *
   get_record() -- 
   * ----------------------------------------------------------------- */
int get_record(regent)
reg_t *regent;
{
	off_t x;

	if ((x = seek_registry(0, SEEK_CUR)) < 0)
		return ERROR;

	if (regent->rec_off == 0)
		regent->rec_off = x;
	else if (regent->rec_off != x) {
		errorlog("RECORD OFFSET MISMATCH!!!\n");
		return ERROR;
	}
	return (read_record(regent));
}

/* ----------------------------------------------------------------- *
   append_new_record()
   * ----------------------------------------------------------------- */
int append_new_record(regent)
reg_t *regent;
{

	if ((regent->rec_off = seek_registry(0, SEEK_END)) < 0)
		return ERROR;

	return (write_record(regent));
}

/* ----------------------------------------------------------------- *
   remove_record()
   * ----------------------------------------------------------------- */
int remove_record(regent)
reg_t *regent;
{
	RECORD_HEADER r;
	int status;

	if (seek_registry(regent->rec_off, SEEK_SET) < 0)
		return ERROR;

	status = read_record_hdr(&r);
	if (status == SUCCESS) {
		SET_DELETED(r.flag);
		if (seek_registry(regent->rec_off, SEEK_SET) < 0)
			return ERROR;
		return (write_record_hdr(&r));
	} else if (status == ENTRY_DELETED)
		return SUCCESS;

	return ERROR;
}

/* ----------------------------------------------------------------- *
   read_record_hdr()
   * ----------------------------------------------------------------- */
int read_record_hdr(rhdr)
RECORD_HEADER *rhdr;
{
	int n;

	/* Read and canonicalize the record header */
	if ((n = read(rfd, rhdr, sizeof(RECORD_HEADER))) < 0) {
		log_errno(registry_file);
		return ERROR;
	}
	if (n == 0)
		return REGISTRY_EOF;

	rhdr->record_size = ntohl(rhdr->record_size);
	rhdr->magic = ntohl(rhdr->magic);
	rhdr->flag = ntohl(rhdr->flag);

	/* See if the header is corrupt */
	if (rhdr->magic != REGISTRY_MAGIC) {
		errorlog("Record Header is corrupt at offset %d: 0x%08x 0x%08x 0x%08x\n",
			seek_registry(0, SEEK_CUR),
			rhdr->record_size, rhdr->magic, rhdr->flag);
		return ERROR;
	}
	/* Check to see if the record is deleted.  If so, skip nbytes */
	if (IS_DELETED(rhdr->flag)) {
		if (seek_registry(rhdr->record_size, SEEK_CUR) < 0) {
			log_errno(registry_file);
			return ERROR;
		}
		return ENTRY_DELETED;
	}
	return SUCCESS;
}

/* ----------------------------------------------------------------- *
   write_record_hdr()
   * ----------------------------------------------------------------- */
int write_record_hdr(rhdr)
RECORD_HEADER *rhdr;
{
	RECORD_HEADER tmp;

	tmp.record_size = htonl(rhdr->record_size);
	tmp.magic = htonl(rhdr->magic);
	tmp.flag = htonl(rhdr->flag);
	if (write(rfd, &tmp, sizeof(RECORD_HEADER)) < 0) {
		log_errno(registry_file);
		return ERROR;
	}
	return SUCCESS;
}

/* Place holder routines: set_registry_mark, restore_registry_mark */
static off_t rmark = 0;
off_t set_registry_mark()
{
	rmark = seek_registry(0, SEEK_CUR);
	return(rmark);
}

off_t restore_registry_mark()
{
	return(seek_registry(rmark, SEEK_SET));
}
