static char rcsid[] = "lsm2soif.c,v 1.20 1996/01/05 20:28:19 duane Exp";
/*
 *  lsm2soif - Converts Linux Software Maps (lsm) to SOIF.
 *
 *  Usage: lsm2soif url local-file
 *
 *  Darren Hardy, hardy@cs.colorado.edu, June 1994
 *  Updated for new IAFA-like Aug94 LSM format, April 1995
 *
 *  ----------------------------------------------------------------------
 *  Copyright (c) 1994, 1995.  All rights reserved.
 *  
 *    The Harvest software was developed by the Internet Research Task
 *    Force Research Group on Resource Discovery (IRTF-RD):
 *  
 *          Mic Bowman of Transarc Corporation.
 *          Peter Danzig of the University of Southern California.
 *          Darren R. Hardy of the University of Colorado at Boulder.
 *          Udi Manber of the University of Arizona.
 *          Michael F. Schwartz of the University of Colorado at Boulder.
 *          Duane Wessels of the University of Colorado at Boulder.
 *  
 *    This copyright notice applies to software in the Harvest
 *    ``src/'' directory only.  Users should consult the individual
 *    copyright notices in the ``components/'' subdirectories for
 *    copyright information about other software bundled with the
 *    Harvest source code distribution.
 *  
 *  TERMS OF USE
 *    
 *    The Harvest software may be used and re-distributed without
 *    charge, provided that the software origin and research team are
 *    cited in any use of the system.  Most commonly this is
 *    accomplished by including a link to the Harvest Home Page
 *    (http://harvest.cs.colorado.edu/) from the query page of any
 *    Broker you deploy, as well as in the query result pages.  These
 *    links are generated automatically by the standard Broker
 *    software distribution.
 *    
 *    The Harvest software is provided ``as is'', without express or
 *    implied warranty, and with no support nor obligation to assist
 *    in its use, correction, modification or enhancement.  We assume
 *    no liability with respect to the infringement of copyrights,
 *    trade secrets, or any patents, and are not responsible for
 *    consequential damages.  Proper use of the Harvest software is
 *    entirely the responsibility of the user.
 *  
 *  DERIVATIVE WORKS
 *  
 *    Users may make derivative works from the Harvest software, subject 
 *    to the following constraints:
 *  
 *      - You must include the above copyright notice and these 
 *        accompanying paragraphs in all forms of derivative works, 
 *        and any documentation and other materials related to such 
 *        distribution and use acknowledge that the software was 
 *        developed at the above institutions.
 *  
 *      - You must notify IRTF-RD regarding your distribution of 
 *        the derivative work.
 *  
 *      - You must clearly notify users that your are distributing 
 *        a modified version and not the original Harvest software.
 *  
 *      - Any derivative product is also subject to these copyright 
 *        and use restrictions.
 *  
 *    Note that the Harvest software is NOT in the public domain.  We
 *    retain copyright, as specified above.
 *  
 *  HISTORY OF FREE SOFTWARE STATUS
 *  
 *    Originally we required sites to license the software in cases
 *    where they were going to build commercial products/services
 *    around Harvest.  In June 1995 we changed this policy.  We now
 *    allow people to use the core Harvest software (the code found in
 *    the Harvest ``src/'' directory) for free.  We made this change
 *    in the interest of encouraging the widest possible deployment of
 *    the technology.  The Harvest software is really a reference
 *    implementation of a set of protocols and formats, some of which
 *    we intend to standardize.  We encourage commercial
 *    re-implementations of code complying to this set of standards.  
 *  
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "util.h"
#include "url.h"
#include "template.h"

#define LSM_DTYPE "Linux-Software"

/* Local functions */
static void do_lsmtosoif();

/* Local variables */
static int n_flag = 0;

static void usage()
{
	fprintf(stderr, "Usage: lsm2soif url local-file\n");
	exit(1);
}

static void do_lsmtosoif(url, filename)
     char *url;
     char *filename;
{
	char buf[BUFSIZ], attr[BUFSIZ], value[BUFSIZ], pattr[BUFSIZ];
	char *sv, *pv, *fv, *s, *p;
	int i, mode_aug94 = 1, mode_old = 0;
	Template *t;
	FILE *fp;
	URL *up;
	Buffer *val;
	AVList *walker;

	if ((up = url_open(url)) == NULL) {
		errorlog("Cannot open URL: %s\n", url);
		return;
	}
	/* Build the template */
	t = create_template(NULL, up->url);

	/* Read the file and build a SOIF template from it */
	if ((fp = fopen(filename, "r")) == NULL) {
		log_errno(filename);
		url_close(up);
		return;
	}
	val = create_buffer(BUFSIZ);
	pattr[0] = '\0';
	while (fgets(buf, BUFSIZ, fp)) {
		/* strip trailing newline */
		if ((s = strrchr(buf, '\n')) != NULL)
			*s = '\0';

		/* check for Begin/End tags */
		if (!strcmp(buf, "End"))
			break;
		if (!strcmp(buf, "Begin3")) {
			mode_aug94 = 1;
			mode_old = 0;
			continue;
		} else if (!strcmp(buf, "Begin")) {
			mode_old = 1;
			mode_aug94 = 0;
			continue;
		} else if (!strcmp(buf, "Begin2")) {
			mode_old = 1;
			mode_aug94 = 0;
			continue;
		}
		if (mode_old) {	/* very old-style */
			if ((s = strchr(buf, '=')) == NULL)
				continue;	/* not an old-style LSM line */
			for (p = buf, i = 0; p < s && !isspace(*p); p++, i++)
				attr[i] = *p;
			attr[i] = '\0';
			if (i < 1)
				continue;	/* null attribute */
			if (isdigit(attr[--i]))
				attr[i] = '\0';		/* strip attribute number */

			while (*s != '\0' && (*s == '=' || isspace(*s)))
				s++;
			if (!strcmp(attr, "Site") ||
			    !strcmp(attr, "Path") ||
			    !strcmp(attr, "File")) {
				if ((p = strchr(s, ' ')) != NULL)
					*p = '\0';
				if ((p = strchr(s, '\t')) != NULL)
					*p = '\0';
			}
			if (strlen(s) < 1)	/* empty line */
				continue;
			strcpy(value, s);
			if (t->list)
				append_AVList(t->list, attr, value,
				    strlen(value));
			else
				t->list = create_AVList(attr, value,
				    strlen(value));
		}
		if (!mode_aug94 || buf[0] == '\0')
			continue;
		/* current aug94 format */
		strncat(buf, "\n", 1);	/* replace newline */
		/*
		 *  This is a simple state machine.  Either the
		 *  line contains an attribute, or the line
		 *  contains data associated with the previous attr.
		 */
		if (!isspace(buf[0]) && sscanf(buf, "%[A-Za-z-]:", attr) == 1) {
			if (pattr[0] == '\0')
				strcpy(pattr, attr);
			memset(buf, ' ', strlen(attr) + 1);	/* erase attr */
		}
		/* See if we've switch attributes, if so purge */
		if (strcmp(pattr, attr) != 0) {
			if (t->list)
				append_AVList(t->list, pattr, val->data,
				    val->length);
			else
				t->list = create_AVList(pattr, val->data,
				    val->length);
			shrink_buffer(val);
		}
		for (p = buf; *p && isspace(*p); p++)
			/* skip spaces */ ;
		add_buffer(val, p, strlen(p));
		(void) strcpy(pattr, attr);
		memset(buf, '\0', sizeof(buf));
	}
	fclose(fp);
	if (mode_aug94 && pattr[0]) {
		if (t->list)
			append_AVList(t->list, pattr, val->data, val->length);
		else
			t->list = create_AVList(pattr, val->data, val->length);
	}
	free_buffer(val);

	if (mode_old) {
		AVPair *site_avp, *path_avp, *file_avp;

		/* Reset t->url to the file that the LSM points to */
		site_avp = extract_AVPair(t->list, "Site");
		if (site_avp == NULL)
			site_avp = extract_AVPair(t->list, "Maintained-At");
		if (site_avp == NULL)
			site_avp = extract_AVPair(t->list, "MaintAt");

		path_avp = extract_AVPair(t->list, "Path");
		if (path_avp == NULL)
			path_avp = extract_AVPair(t->list, "PathFile");

		file_avp = extract_AVPair(t->list, "File");
		if (file_avp == NULL)
			file_avp = extract_AVPair(t->list, "Package-Name");
		if (file_avp == NULL)
			file_avp = extract_AVPair(t->list, "PkgName");
		if (file_avp == NULL)
			file_avp = extract_AVPair(t->list, "PathFile");

		if (site_avp) {
			sv = strdup(site_avp->value);
			pv = strdup(path_avp ? path_avp->value : "/???/");
			fv = strdup(file_avp ? file_avp->value : "???");
			for (p = sv; *p && !isspace(*p); p++);
			*p = '\0';
			for (p = pv; *p && !isspace(*p); p++);
			*p = '\0';
			for (p = fv; *p && !isspace(*p); p++);
			*p = '\0';
			if (*pv == '/' && *fv == '/')
				sprintf(buf, "ftp://%s%s%s", sv, pv, fv);
			else if (*pv == '/' && *fv != '/')
				sprintf(buf, "ftp://%s%s/%s", sv, pv, fv);
			else if (*pv != '/' && *fv == '/')
				sprintf(buf, "ftp://%s/%s%s", sv, pv, fv);
			else
				sprintf(buf, "ftp://%s/%s/%s", sv, pv, fv);
			xfree(t->url);
			t->url = strdup(buf);
			xfree(sv);
			xfree(pv);
			xfree(fv);
		}
	} else if (mode_aug94) {
		AVPair *pavp, *aavp, *oavp, *avp;
		/* Reset t->url to the file that the LSM points to */
		pavp = extract_AVPair(t->list, "Primary-Site");
		aavp = extract_AVPair(t->list, "Alternate-Site");
		oavp = extract_AVPair(t->list, "Original-Site");
		avp = pavp ? pavp : (aavp ? aavp : (oavp ? oavp : NULL));
		if (avp) {
			char stuff[3][BUFSIZ];

			if (sscanf(avp->value, "%s %s\n%[^\n]\n",
				stuff[0],	/* site */
				stuff[1],	/* base directory */
				stuff[2])	/* size + filename */
			    == 3) {
				sv = strdup(stuff[0]);
				pv = strdup(stuff[1]);
				for (p = sv; *p && !isspace(*p); p++);
				*p = '\0';
				for (p = pv; *p && !isspace(*p); p++);
				*p = '\0';
				/* fv is last segment */
				for (p = stuff[2] + strlen(stuff[2]); p > stuff[2]; p--)
					if (isspace(*p)) {
						p++;
						break;
					}
				fv = strdup(p);
				if (strchr(fv, '/') != NULL) {	/* fv has full path */
					if (*fv == '/')
						sprintf(buf, "ftp://%s%s", sv, fv);
					else
						sprintf(buf, "ftp://%s/%s", sv, fv);
				} else if (*pv == '/')
					sprintf(buf, "ftp://%s%s/%s", sv, pv, fv);
				else
					sprintf(buf, "ftp://%s/%s/%s", sv, pv, fv);
				xfree(t->url);
				t->url = strdup(buf);
				if (sv)
					xfree(sv);
				if (pv)
					xfree(pv);
				if (fv)
					xfree(fv);
			}
		}
	}
	/* verify attributes in the template */
	for (walker = t->list; walker; walker = walker->next) {
		if ((p = strchr(walker->data->attribute, ':')) != NULL) {
			strcpy(buf, ++p);
			strcpy(walker->data->attribute, buf);
		}
		/* Make Desc lines Description lines */
		if (!strcmp(walker->data->attribute, "Desc")) {
			xfree(walker->data->attribute);
			walker->data->attribute = strdup("Description");
		}
	}
	if (t->list)
		append_AVList(t->list, "Type", LSM_DTYPE, strlen(LSM_DTYPE));
	else
		t->list = create_AVList("Type", LSM_DTYPE, strlen(LSM_DTYPE));

	/* Print out the template */
	(void) init_print_template(stdout);
	print_template(t);
	finish_print_template();
	free_template(t);
	url_close(up);
}

int main(argc, argv)
     int argc;
     char *argv[];
{
	char *url, *filename;

	if (argc != 3)
		usage();
	url = strdup(argv[1]);
	filename = strdup(argv[2]);

	init_log(stderr, stderr);
	init_url();
	do_lsmtosoif(url, filename);
	finish_url();
	exit(0);
}
