/*
    Gn: A Server for the Internet Gopher Protocol(*).
    File: mkcache/mkcache.c
    Version 2.23
    
    Copyright (C) 1993  <by John Franks>

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 1, or (at your option)
    any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

    (*) Gopher is a registered trademark of the Univ. of Minn.
*/

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <string.h>
#include "mkcache.h"
#include "reg.h"
#include "regi.h"

int	isgnlink,
	is_urllink,
	ishname,
	islocal;

char	item[MAXLEN],
	host[MAXLEN],
	type[MAXLEN],
	port[MAXLEN],
	owner[MAXLEN],
	separator[MAXLEN],
	section[MAXLEN],
	content[MAXLEN],
	encoding[MIDLEN],
	attribute[MIDLEN],
	suffix[MAXLEN],
	path[MAXLEN],
	mfpath[MAXLEN],
	cfpath[MAXLEN];

static void	mkcache(),
		writeitem(),
		cache_composite();


main( argc, argv)
int	argc;
char	*argv[];
{
	init( argc, argv);
	mkcache( mfpath, cfpath);
	return (0);
}

static void
mkcache( mpath, cpath)
char	*mpath,
	*cpath;
{
	FILE	*mfp,
		*cfp;

	char	*cp,
		*text,
		*bufp,
		mbuf[MAXLEN];

	int	firstline = TRUE;

	if ( (mfp = fopen( mpath, "r")) == (FILE *) NULL ) {
		fprintf( stderr, "Can't open %s\n", mpath);
		exit( 2);
	}

	if ( (cfp = fopen( cpath, "w")) == (FILE *) NULL ) {
		fprintf( stderr, "Can't open %s\n", cpath);
		exit( 2);
	}

	if ( !quiet)
		printf( "Writing cache file %s\n", cpath);

	while ( bufp = get_next_line(mbuf, MAXLEN, mfp)) {
		cp = strchr( bufp, '=');
		*cp++ = '\0';
		text = cp;
		strlower( bufp);

		if ( streq( bufp, "maintainer")) {
			if (firstline)
				fprintf( cfp, "owner:%s\n", text);
			else if ( !quiet) {
				fprintf( stderr,
				"\tMaintainer line not first: ignoring it|\n");
			}
			firstline = FALSE;
			continue;
		}
		firstline = FALSE;
		if ( streq( bufp, "name") ||
                     streq( bufp, "hname") ||
                     streq( bufp, "urllink") ||
                     streq( bufp, "urlhlink") ||
                     streq( bufp, "gopheronly") ||
                     streq( bufp, "gnlink")) {
			if ( item[0] != '\0')
				writeitem( cfp, mpath, cpath);
			copy( item, text);
			if ( streq( bufp, "hname"))
				ishname = TRUE;
			else if ( streq( bufp, "gopheronly"))
				strcpy( attribute, "gopheronly");
			else if ( streq( bufp, "urllink")) {
				strcpy( attribute, "urllink");
				is_urllink = TRUE;
			}
			else if ( streq( bufp, "urlhlink")) {
				strcpy( attribute, "urllink");
				is_urllink = TRUE;
				ishname = TRUE;
			}
			else if ( streq( bufp, "gnlink"))
				isgnlink = TRUE;
			continue;
		}

		if ( !is_urllink && streq( bufp, "path")) {
			copy( path, text);
			continue;
		}

		if ( is_urllink && streq( bufp, "url")) {
			copy( path, text);
			continue;
		}

		if ( streq( bufp, "type")) {
			copy( type, text);
			continue;
		}

		if ( streq( bufp, "host")) {
			copy( host, text);
			continue;
		}

		if ( streq( bufp, "port")) {
			copy( port, text);
			continue;
		}


		/* Use strcpy to allow trailing whitespace 
		  and tabs in separator and section regexps */

		if ( streq( bufp, "separator")) {
			strcpy( separator, text);
			continue;
		}

		if ( streq( bufp, "section")) {
			strcpy( section, text);
			continue;
		}

		if ( streq( bufp, "contenttype")) {
			copy( content, text);
			continue;
		}

		if ( streq( bufp, "contentencoding")) {
			copy( encoding, text);
			continue;
		}

		if ( streq( bufp, "attribute")) {
			copy( attribute, text);
			strlower( attribute);
			if ( streq( attribute, "gnlink"))
				isgnlink = TRUE;
			continue;
		}

		if ( streq( bufp, "text")) {
			if ( item[0] != '\0')
				writeitem( cfp, mpath, cpath);
			write_text( cfp, mfp, "i");
			continue;
		}
		if ( streq( bufp, "httptext")) {
			if ( item[0] != '\0')
				writeitem( cfp, mpath, cpath);
			write_text( cfp, mfp, "http:");
			continue;
		}

	}	
	if ( item[0] != '\0' )
		writeitem( cfp, mpath, cpath);
	fclose( cfp);
	fclose( mfp);
}


static void 
writeitem( fp, mpath, cpath)
FILE	*fp;
char	*mpath,
	*cpath;
{
	struct stat stat_buf;

	char	*cp1,
		*cp2,
		*word,
		type1,
		encsuffix[SMALLLEN],
		tmppath[MAXLEN],
		mnewpath[MAXLEN],
		cnewpath[MAXLEN];

	if ( (*path == '\0') && !quiet ) {
		if ( is_urllink)
			word = "This won't work! URL";
		else
			word = "Path field";
		fprintf( stderr, 
			"Warning: %s for item %s is empty.\n", word, item);
	}
	else {
		strcpy( tmppath, path);
		if ( ( cp1 = strrchr( tmppath, '(')) != NULL)
			*cp1 = '\0';
		if ( ((cp1 = strrchr( tmppath, '.')) != NULL) &&
					strlen( cp1) <= 5 ) {
			strcpy( suffix, cp1 + 1);
			strlower( suffix);
			if ( streq( suffix, "gz") || streq( suffix, "z") ) {
				strcpy( encsuffix, suffix);
				*cp1 = '\0';
				if ( ((cp1 = strrchr(tmppath, '.')) != NULL)
						&& strlen( cp1) <= 5 ) {
					strcpy( suffix, ++cp1);
					strlower( suffix);
				}
			} else
				*encsuffix = '\0';

			if ( *encsuffix && !*encoding && (path[1] != 'Z') ) {
				switch (*encsuffix) {
				case 'g':
					strcpy( encoding, "x-gzip");
					break;
				case 'z':
					strcpy( encoding, "x-compress");
					break;
				}
			}
		}
	}
	strcpy( tmppath, path);

	if ( *host == '\0' || streq( host, "+"))
		strcpy( host, myhost);

	if ( *type == '\0' )  {
		if ( strncmp( tmppath, "exec", 4) == 0 ) {
			type1 = tmppath[4];
			if ( (type1 == ':') || (type1 == '/'))
				type1 = '0';
		}
		else			
			type1 = (tmppath[0] ? tmppath[0] : '1');
	}
	else
		type1 = *type;
	if ( *port == '\0' || streq( port, "0") || streq( port, "+")) {
		strcpy( port,
			((type1 == '8' || type1 == 'T') ? "23" : defaultport));
		if ( is_urllink )
			strcpy( port, "80");
	}

	strlower( host);
	strlower( myhost);
	if ( streq( host, myhost) && streq( port, defaultport))
		islocal = isgnlink = TRUE;

	/* If we have a composite (structured) file write its cache now */
	if ( (strncmp( path, "1m", 2) == 0) ||(strncmp( path, "7m", 2) == 0))
		if (separator[0] != '\0')
			cache_composite( path, cpath);
		else if ( !quiet) {
			printf( "\tWarning: item %s has no Separator\n", path);
			printf( "\tPresumably %s..cache has been made\n",path);
		}

	if ( (type1 == '7') || (type1 == '1') || (*content =='\0'))
			getcontent( type1);

	/* Now write primary line */
	if ( ishname )
		fprintf( fp, "hname:" );
	if ( is_urllink)
		fprintf( fp, "0%s\t%s\tremote.host\t\n", item, path);
	else
		fprintf( fp, "%c%s\t%s\t%s\t%s\n",
			type1, item, path, host, port);

	/* Then the secondary line */
	if ( islocal )
		fprintf( fp, "\t%s\t%s\t%s\t%s\n", 
			content, suffix, encoding, attribute);
	else if ( isgnlink)
		fprintf( fp, "\tgn_link\t\t\t%s\n", attribute );
	else if ( is_urllink)
		fprintf( fp, "\turl_link\t\t\turllink\n");
	else
		fprintf( fp, "\tgopher_link\t\t\t%s\n", attribute );

	islocal = isgnlink = is_urllink = ishname = FALSE;
	item[0] = path[0] = host[0] = port[0] = type[0] = '\0';
	separator[0] = section[0] = '\0';
	content[0] = suffix[0] = encoding[0] = attribute[0] = '\0';

	if (recurse && (type1 == '1') && (tmppath[1] != 'm')) {

		if ( tmppath[ strlen(tmppath) - 1] == '/')
			tmppath[ strlen(tmppath) - 1] = '\0';
		if ( ( cp1 = strchr( tmppath, '/')) == NULL)
			return;

		/*
		 * Kill any '/' at the end of a path.  If that was the
                 * only '/' or there were none to begin with, then the path
		 * is the root or something weird and we don't want
		 * to recurse so we return.
		 */

		/* Next build new menu and cache path names and recurse */

		strcpy( mnewpath, mpath);
		cp1 = strrchr( mnewpath, '/');
		cp2 = strrchr( tmppath, '/');
		sprintf( cp1, "%s/%s", cp2, mfname);

                strcpy( cnewpath, cpath);
		cp1 = strrchr( cnewpath, '/');
		cp2 = strrchr( tmppath, '/');
		sprintf( cp1, "%s/%s", cp2, cfname);

		/* Check menu file exists, otherwise just return */
		if ( stat( mnewpath, &stat_buf) == 0 )
			mkcache( mnewpath, cnewpath);
	}
}


static void
cache_composite( selector, cpath)
char	*selector,
	*cpath;
{

	FILE	*cachefp,
		*compfp;

	struct regprog	*sep_rp,
			*title_rp;

	int	len,
		nextline = FALSE,
		firsttime = TRUE,
		titlefound = FALSE;

	long	count = 0L,
		start = 0L,
		end = 0L;

	register char	*cp,
			*cp2;

	char	compcache[MAXLEN],
		compfile[MAXLEN],
		rangesel[MAXLEN],
		default_title[MAXLEN],
		ltitle[MAXLEN],
		ttitle[MAXLEN],
		linebuf[MAXLEN];


	/* 
	 * selector (from Path= field in menu file) has the form 
	 *      1m/path/to/file   or 7m/path/to/file
	 * The separator and section arrays contain the regular 
	 * expressions to be used to separate the file into sections
	 * and make a menu item for each.
	 */


	strcpy( compfile, cpath);
	cp = strrchr( compfile, '/');
	cp2 = strrchr( selector, '/');
	strcpy( cp, cp2);
	sprintf( compcache, "%s.%s", compfile, cfname);

	strcpy( rangesel, selector);
	rangesel[0] = '1';

	if ( *section == '$') {
		strcpy( section, section + 1);
		nextline = TRUE;
	}
	if ( ((sep_rp = regcomp( separator)) == NULL) ||
			((title_rp = regcomp( section)) == NULL)) {
		fprintf( stderr, "Syntax error in %s\n or in %s\n",
			separator, section);
		exit( 2);
	}

	if ( (cachefp = fopen( compcache, "w")) == (FILE *) NULL ) {
		fprintf( stderr, "Warning: can't open %s\n", compcache);
		fprintf( stderr, "Skipping file %s.\n", compfile);
		return;
	}
	if ( (compfp = fopen( compfile, "r")) == (FILE *) NULL ) {
		fprintf( stderr, "Warning: can't open %s\n", compfile);
		fprintf( stderr, "Skipping file %s.\n", compfile);
		unlink( compcache);
		return;
	}


	if ( !quiet)
		printf( "Writing composite cache file %s\n", compcache);

	ltitle[0] = '\0';
	getcontent( '0');
	while ( fgets( linebuf, MAXLEN, compfp)) {
		len = strlen( linebuf);
		chop( linebuf);

		if ( regfind( sep_rp, linebuf) ) {
			end = count;
			sprintf( default_title, " Range: %ld--%ld", start, end);
			if ( ! firsttime) {
				copy( ttitle, ltitle);
				fprintf( cachefp, "0%s\tR%ld-%ld-%s\t%s\t%s\n",
					( *ttitle ? ttitle : default_title),
					start, end, rangesel, host, port);
				fprintf( cachefp, "\t%s\t%s\n",
					content, suffix);
			}
			start = count;
			*ltitle = '\0';
			firsttime = titlefound = FALSE;
		}

		if ( ( ! titlefound) && nextline ) {
			fgets( linebuf, MAXLEN, compfp);
			len += strlen( linebuf);
			chop( linebuf);
		}
		if ( (! titlefound) && regfind( title_rp, linebuf)) {
			strcpy( ltitle, regrp( 0));
			titlefound = TRUE;
		}

		count += len;
	}	
	end = count;
	copy( ttitle, ltitle);
	fprintf( cachefp, "0%s\tR%ld-%ld-%s\t%s\t%s\n",
		( *ttitle ? ttitle : default_title),
		start, end, rangesel, host, port);
	fprintf( cachefp, "\t%s\t%s\n",	content, suffix);
	fclose( cachefp);
	fclose( compfp);
}


/*
 * copy( s1, s2) copies string s2 to s1 expanding tabs to 8 space boundaries
 * and removing any trailing spaces.
 */

void
copy( s1, s2)
char	*s1,
	*s2;
{
	char	*cp,
		*cp2;
	int	i;


	cp = s2;
	cp2 = s1;
	while ( *cp){
		if ( *cp == '\t') {
			for ( i = strlen(s1) % 8; i < 8; i++ )
				*cp2++ = ' ';
			cp++;
		} else {
			*cp2++ = *cp++;
		}
	}
	*cp2 = '\0';
	if ( cp2 > s1)
		cp2--;
	while ( (*cp2 == ' ') && ( cp2 >= s1 ))
		*cp2-- = '\0';
}

/* 
 * Read in line, skip lines with no "=" in them, deal with comments (#),
 * get rid of leading whitespace.
 */
			
char
*get_next_line( buf, len, fp)
char	*buf;
int	len;
FILE	*fp;
{
	char	*cp,
		*bufp;

	while ( bufp = fgets( buf, len, fp)) {
		chop( bufp);
		if ( (cp = strchr( bufp, '#')) != NULL) {
			if ( ( cp != bufp) && (*(cp-1) == '\\'))
				strcpy( cp-1, cp);
			else
				*cp = '\0';
		}
		if ( (cp = strchr( bufp, '=')) == NULL)
			continue;
		while ( (*bufp == '\t') || (*bufp == ' '))
			bufp++;
		break;
	}
	return bufp;
}
	

void
write_text( cfp, mfp, type)
FILE	*cfp,
	*mfp;
char	*type;

{
	register char	*cp;
	char		buf[MAXLEN],
			buf2[MAXLEN];
		
	while ( cp = fgets( buf, MAXLEN, mfp)) {
		chop( cp);
		strcpy( buf2, cp);
		strlower( buf2);

		if ( streq( type, "i")) {
			if ( strncmp( buf2, "endtext=", 8 ) == 0 ) {
				return;
			}
			if ( *cp == '\0') {
				fprintf( cfp, "i<BR>\ti/\t%s\t%s\n",
					myhost, defaultport, cp);
				continue;
			}
			else {
				fprintf( cfp, "i%s\ti/\t%s\t%s\n",
					cp, myhost, defaultport);
				continue;
			}
		}
		else {  /* it's httpText */
			if ( strncmp( buf2, "endtext=", 8 ) == 0 ) {
				return;
			}
			if ( *cp == '\0') {
				continue;
			}
			else {
				fprintf( cfp, "%s%s\n", type, cp);
				continue;
			}
		}
	}
	fprintf( stderr, "Error:  End of file reached with no 'endText'\n");
}

char *
strlower( st)
char	*st;
{
	register char	*cp;

	cp = st;
	while ( *cp) {
		*cp =  (isupper(*cp) ? *cp - 'A' + 'a' : *cp );
		cp++;
	}
	return (st);
}




