static char rcsid[] = "url_relative.c,v 1.9 1996/01/05 20:28:30 duane Exp";

/* This code is taken from libwww-3.0                                   */

/*                                                                    HTParse.c
 *    URI MANAGEMENT
 * 
 *    (c) COPYRIGHT CERN 1994.
 *    Please first read the full copyright statement in the file COPYRIGH.
 * 
 *  history:
 *    May 12 94       TAB added as legal char in HTCleanTelnetString
 * 
 */

/*
 *  url_relative.c - Build a full URL from a partial URL and its relative URL
 *
 *  Duane Wessels, wessels@cs.colorado.edu, June 1995
 *
 *  ----------------------------------------------------------------------
 *  Copyright (c) 1994, 1995.  All rights reserved.
 *  
 *    The Harvest software was developed by the Internet Research Task
 *    Force Research Group on Resource Discovery (IRTF-RD):
 *  
 *          Mic Bowman of Transarc Corporation.
 *          Peter Danzig of the University of Southern California.
 *          Darren R. Hardy of the University of Colorado at Boulder.
 *          Udi Manber of the University of Arizona.
 *          Michael F. Schwartz of the University of Colorado at Boulder.
 *          Duane Wessels of the University of Colorado at Boulder.
 *  
 *    This copyright notice applies to software in the Harvest
 *    ``src/'' directory only.  Users should consult the individual
 *    copyright notices in the ``components/'' subdirectories for
 *    copyright information about other software bundled with the
 *    Harvest source code distribution.
 *  
 *  TERMS OF USE
 *    
 *    The Harvest software may be used and re-distributed without
 *    charge, provided that the software origin and research team are
 *    cited in any use of the system.  Most commonly this is
 *    accomplished by including a link to the Harvest Home Page
 *    (http://harvest.cs.colorado.edu/) from the query page of any
 *    Broker you deploy, as well as in the query result pages.  These
 *    links are generated automatically by the standard Broker
 *    software distribution.
 *    
 *    The Harvest software is provided ``as is'', without express or
 *    implied warranty, and with no support nor obligation to assist
 *    in its use, correction, modification or enhancement.  We assume
 *    no liability with respect to the infringement of copyrights,
 *    trade secrets, or any patents, and are not responsible for
 *    consequential damages.  Proper use of the Harvest software is
 *    entirely the responsibility of the user.
 *  
 *  DERIVATIVE WORKS
 *  
 *    Users may make derivative works from the Harvest software, subject 
 *    to the following constraints:
 *  
 *      - You must include the above copyright notice and these 
 *        accompanying paragraphs in all forms of derivative works, 
 *        and any documentation and other materials related to such 
 *        distribution and use acknowledge that the software was 
 *        developed at the above institutions.
 *  
 *      - You must notify IRTF-RD regarding your distribution of 
 *        the derivative work.
 *  
 *      - You must clearly notify users that your are distributing 
 *        a modified version and not the original Harvest software.
 *  
 *      - Any derivative product is also subject to these copyright 
 *        and use restrictions.
 *  
 *    Note that the Harvest software is NOT in the public domain.  We
 *    retain copyright, as specified above.
 *  
 *  HISTORY OF FREE SOFTWARE STATUS
 *  
 *    Originally we required sites to license the software in cases
 *    where they were going to build commercial products/services
 *    around Harvest.  In June 1995 we changed this policy.  We now
 *    allow people to use the core Harvest software (the code found in
 *    the Harvest ``src/'' directory) for free.  We made this change
 *    in the interest of encouraging the widest possible deployment of
 *    the technology.  The Harvest software is really a reference
 *    implementation of a set of protocols and formats, some of which
 *    we intend to standardize.  We encourage commercial
 *    re-implementations of code complying to this set of standards.  
 *  
 */

#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include "util.h"

#ifndef NULL
#define NULL    0
#endif

#define TOLOWER(c) (isupper(c) ? tolower(c) : (c))


static char *HTSimplify();

struct struct_parts {
	char *access;		/* Now known as "scheme" */
	char *host;
	char *absolute;
	char *relative;
/*      char * search;          no - treated as part of path */
	char *anchor;
};


/*      Scan a filename for its consituents
 * **   -----------------------------------
 * **
 * ** On entry,
 * **   name    points to a document name which may be incomplete.
 * ** On exit,
 * **      absolute or relative may be nonzero (but not both).
 * **   host, anchor and access may be nonzero if they were specified.
 * **   Any which are nonzero point to zero terminated strings.
 */
static void scan(name, parts)
     char *name;
     struct struct_parts *parts;
{
	char *after_access;
	char *p;
	int length = strlen(name);

	parts->access = 0;
	parts->host = 0;
	parts->absolute = 0;
	parts->relative = 0;
	parts->anchor = 0;

	after_access = name;
	for (p = name; *p; p++) {
		if (*p == ':') {
			*p = 0;
			parts->access = after_access;	/* Scheme has been specified */

			after_access = p + 1;

			if (0 == strcasecmp("URL", parts->access)) {
				parts->access = NULL;	/* Ignore IETF's URL: pre-prefix */
			} else
				break;
		}
		if (*p == '/')
			break;	/* Access has not been specified */
		if (*p == '#')
			break;
	}

	for (p = name + length - 1; p >= name; p--) {
		if (*p == '#') {
			parts->anchor = p + 1;
			*p = 0;	/* terminate the rest */
		}
	}
	p = after_access;
	if (*p == '/') {
		if (p[1] == '/') {
			parts->host = p + 2;	/* host has been specified      */
			*p = 0;	/* Terminate access             */
			p = strchr(parts->host, '/');	/* look for end of host name if any */
			if (p) {
				*p = 0;		/* Terminate host */
				parts->absolute = p + 1;	/* Root has been found */
			}
		} else {
			parts->absolute = p + 1;	/* Root found but no host */
		}
	} else {
		parts->relative = (*after_access) ? after_access : 0;	/* zero for "" */
	}

}


/*      Parse a Name relative to another name
 * **   -------------------------------------
 * **
 * **   This returns those parts of a name which are given (and requested)
 * **   substituting bits from the related name where necessary.
 * **
 * ** On entry,
 * **   aName           A filename given
 * **      relatedName     A name relative to which aName is to be parsed. Give
 * **                      it an empty string if aName is absolute.
 * ** On exit,
 * **   returns         A pointer to a malloc'd string which MUST BE FREED
 */
char *url_parse_relative(aName, relatedName)
     char *aName;
     char *relatedName;
{
	char *result = 0;
	char *return_value = 0;
	int len;
	char *name = 0;
	char *rel = 0;
	char *p;
	char *access;
	struct struct_parts given, related;

	if (!relatedName)	/* HWL 23/8/94: dont dump due to NULL */
		relatedName = "";

	/* Make working copies of input strings to cut up:
	 */
	len = strlen(aName) + strlen(relatedName) + 10;
	result = (char *) xmalloc(len);		/* Lots of space: more than enough */

	name = xstrdup(aName);
	rel = xstrdup(relatedName);

	scan(name, &given);
	scan(rel, &related);
	result[0] = 0;		/* Clear string  */
	access = given.access ? given.access : related.access;

	if (access) {
		strcat(result, access);
		strcat(result, ":");
	}
	if (given.access && related.access)	/* If different, inherit nothing. */
		if (strcmp(given.access, related.access) != 0) {
			related.host = 0;
			related.absolute = 0;
			related.relative = 0;
			related.anchor = 0;
		}
	if (given.host || related.host) {
		strcat(result, "//");
		strcat(result, given.host ? given.host : related.host);
	}
	if (given.host && related.host)		/* If different hosts, inherit no path. */
		if (strcmp(given.host, related.host) != 0) {
			related.absolute = 0;
			related.relative = 0;
			related.anchor = 0;
		}
	if (given.absolute) {	/* All is given */
		strcat(result, "/");
		strcat(result, given.absolute);
	} else if (related.absolute) {	/* Adopt path not name */
		strcat(result, "/");
		strcat(result, related.absolute);
		if (given.relative) {
			p = strchr(result, '?');	/* Search part? */
			if (!p)
				p = result + strlen(result) - 1;
			for (; *p != '/'; p--);		/* last / */
			p[1] = 0;	/* Remove filename */
			strcat(result, given.relative);		/* Add given one */
			result = HTSimplify(result);
		}
	} else if (given.relative) {
		strcat(result, given.relative);		/* what we've got */
	} else if (related.relative) {
		strcat(result, related.relative);
	} else {		/* No inheritance */
		strcat(result, "/");
	}



	if (given.anchor || related.anchor) {
		strcat(result, "#");
		strcat(result, given.anchor ? given.anchor : related.anchor);
	}
	xfree(rel);
	xfree(name);

	return_value = xstrdup(result);
	xfree(result);
	return return_value;	/* exactly the right length */
}



/*              Simplify a URI
 * //           --------------
 * // A URI is allowed to contain the seqeunce xxx/../ which may be
 * // replaced by "" , and the seqeunce "/./" which may be replaced by "/".
 * // Simplification helps us recognize duplicate URIs. 
 * //
 * //   Thus,   /etc/junk/../fred       becomes /etc/fred
 * //           /etc/junk/./fred        becomes /etc/junk/fred
 * //
 * //      but we should NOT change
 * //           http://fred.xxx.edu/../..
 * //
 * //   or      ../../albert.html
 * //
 * // In the same manner, the following prefixed are preserved:
 * //
 * //   ./<etc>
 * //   //<etc>
 * //
 * // In order to avoid empty URLs the following URLs become:
 * //
 * //           /fred/..                becomes /fred/..
 * //           /fred/././..            becomes /fred/..
 * //           /fred/.././junk/.././   becomes /fred/..
 * //
 * // If more than one set of `://' is found (several proxies in cascade) then
 * // only the part after the last `://' is simplified.
 * //
 * // Returns: A string which might be the old one or a new one.
 */
static char *HTSimplify(filename)
     char *filename;
{
	char *path;
	char *p;

	if (!filename) {
		return filename;
	}
	if ((path = strstr(filename, "://")) != NULL) {		/* Find host name */
		char *newptr;
		path += 3;
		while ((newptr = strstr(path, "://")) != NULL)
			path = newptr + 3;
	} else if ((path = strstr(filename, ":/")) != NULL) {
		path += 2;
	} else
		path = filename;
	if (*path == '/' && *(path + 1) == '/') {	/* Some URLs start //<foo> */
		path += 1;
	} else if (!strncmp(path, "news:", 5)) {
		char *ptr = strchr(path + 5, '@');
		if (!ptr)
			ptr = path + 5;
		while (*ptr) {	/* Make group or host lower case */
			*ptr = TOLOWER(*ptr);
			ptr++;
		}
		return filename;	/* Doesn't need to do any more */
	}
	if ((p = path)) {
		int segments = 0;

		/* Parse string first time to find number of `real' tokens */
		while (*p) {
			if (*p == '/' || p == path) {
				if (!((*(p + 1) == '/' || !*(p + 1)) ||
					(*(p + 1) == '.' && (*(p + 2) == '/' || !*(p + 2))) ||
					(*(p + 1) == '.' && *(p + 2) == '.' && (*(p + 3) == '/' || !*(p + 3)))))
					segments++;
			}
			p++;
		}

		/* Parse string second time to simplify */
		p = path;
		while (*p) {
			if (*p == '/') {
				if (p > path && *(p + 1) == '.' && (*(p + 2) == '/' || !*(p + 2))) {
					char *orig = p, *dest = p + 2;
					while ((*orig++ = *dest++));	/* Remove a slash and a dot */
					p--;
				} else if (segments > 1 && *(p + 1) == '.' && *(p + 2) == '.' &&
				    (*(p + 3) == '/' || !*(p + 3))) {
					char *q = p;
					while (q > path && *--q != '/');	/* prev slash */
					if (strncmp(q, "/../", 4) && strncmp(q, "/./", 3) &&
					    strncmp(q, "./", 2)) {
						char *orig = q, *dest = p + 3;
						if (*q != '/')
							dest++;
						while ((*orig++ = *dest++));	/* Remove /xxx/.. */
						segments--;
						p = q - 1;	/* Start again with prev slash */
					} else
						p++;
				} else if (*(p + 1) == '/') {
					while (*(p + 1) == '/') {
						char *orig = p, *dest = p + 1;
						while ((*orig++ = *dest++));	/* Remove multiple /'s */
					}
				}
			}
			p++;
		}		/* end while (*p) */
	}
	return filename;
}


#ifdef MAKE_MAIN

main(argc, argv)
     int argc;
     char *argv[];
{
	char *url;

	url = urlParseRelative(argv[1], argv[2]);
	printf("%s\n", url);
}

#endif
