/*
 * htn2html.c - World-Wide Web gateway for Peter Scott's HYTELNET program.
 *
 * SYNOPSIS:
 *	htn2html [path]
 *
 * EXPLANATION
 *      This is a format converter to put Lynx files into HTML.
 *      HTML is the markup language used by the World-Wide Web global
 *      information initiative.
 *
 *	This file may be used in a a server to make Peter Scott's data
 *	or any other data in the same format, available to the world.
 *
 *      The database consists of many more-or-less plain text files,
 *      all in a single directory.  Any text in angle brackets (eg <HELP>)
 *      is a link.  Selecting it moves you to the named file.  Filenames
 *      are mapped to lower case (so <HELP> refers to the file 'help').
 *
 *      The program notices telnet commands embedded in the text files,
 *      and will generate links to the telnet sites.
 *
 * NOTES
 *	This corresponds to HYTELNET 5.x for the IBM PC.
 *
 *
 * BUGS
 *    - Ignores the page & line numbers in more complex HYPERRES
 *      links <file page line>.
 *    - Should be able to handle more than one database.
 *
 * AUTHORS
 *      Earl Fogel, Computing Services, University of Saskatchewan
 *	fogel@jester.usask.ca
 *	Tim Berners-Lee, CERN, Geneva, Switzerland.  timbl@info.cern.ch
 *
 * CHANGE LOG
 *	Dec 1992 - W3 gateway made from interactive program (TBL)
 *
 * NOTICE
 *	Permissions to copy, use and redistribute this program are hereby
 *	granted provided the Copyright notice and this comment remain
 *	intact.
 *
 *		    (c) Copyright 1992 Earl Fogel
 *
 */

#include "new_globals.c"  /* these are some global variables */

#include <stdio.h>
#include <ctype.h>
#include <signal.h>
#include <string.h>

#include <sys/types.h>			/* For stat() */
#include <sys/stat.h>			/* For stat() */

#ifndef TRUE
#define TRUE 1
#define FALSE 0
#endif


#define ON	1
#define OFF	0
#define STREQ(a,b) (strcmp(a,b) == 0)
#define STRNEQ(a,b,c) (strncmp(a,b,c) == 0)
#define printable(c) (((c)>31 && (c)<=127) || (c)==9 || (c)==10)
#define HLINE	-60	/* horizontal line segment */
#define VLINE	-77	/* vertical line segment */
#define VLINE2	-70	/* vertical line segment */
#define UL	-38	/* upper left corner */
#define UR	-65	/* upper right corner */
#define LL	-64	/* lower left corner */
#define LR	-39	/* lower right corner */
#define UL2	-42	/* upper left corner */
#define UR2	-73	/* upper right corner */
#define LL2	-45	/* lower left corner */
#define LR2	-67	/* lower right corner */
#define BULLET	4	/* list item marker */
#define BLOT1	-78
#define BLOT2	-79
#define BLOT3	-80

#define UPARROW	201
#define DNARROW	202
#define RTARROW	203
#define LTARROW	204

/* values for links.tcmd */
#define TELNET 1
#define TN3270 2


int col=0;			/* column number on output */

int relative_paths= FALSE;
int add_html= FALSE;
char *title= NULL;

int more = FALSE;
char *interaddr(), *nextword(),  *mystrncpy();
void getlinkname();
void convert();  void outch();


#ifdef VMS
#   define strncasecmp strncmp   /* vms doesn't have strncasecmp */
#endif VMS

int server;				/* Use telnet-style CRLF? */
int main(argc, argv)
int argc;
char *argv[];
{
    int  i;
    char *startfile = STARTFILE;
    char *startdir = STARTDIR;
    char *outputfile = NULL;
    char name_equalizer[120];
    char *cp, *cp2;
    FILE *fp;

    /* set the delimiter defaults */
    delimiter.link = '\0';
    delimiter.token = ':';
    delimiter.end_link = '>';
    strcpy(delimiter.target, "[*");
    strcpy(delimiter.end_target, "*]");

    /*
     * Process arguments - with none, look for the database in STARTDIR,
     * starting with STARTFILE.
     *
     * If a pathname is given, use it as the starting point.  Split it
     * into directory and file components, 'cd' to the directory, and
     * view the file.
     */
    server = TRUE;		/* Doesn't harm much anyway */
    for (i=1; i<argc; i++) {

	if (!strncmp(argv[i], "-lf",3)) {	/* just lf on end */
	    server = FALSE;

	} else if (!strncmp(argv[i], "-rel",4)) { /* use relative paths */
	   server = FALSE;
	   relative_paths = TRUE;

	} else if (!strncmp(argv[i], "-addhtml",8)) { /* use relative paths */
	   server = FALSE;
	   add_html = TRUE;
	
	} else if (!strncmp(argv[i], "-title",6)) { /* use relative paths */
	   title = argv[++i];

	} else {	/* alternate database path */
	
	    if(server)
	        cp = strchr(argv[i], '/'); /* find the first slash */ 
	    else
		cp = NULL;
	    
	    /* if it is just a slash then it is requesting the default file */
	    if (!strcmp(argv[i],"/") || !strcmp(argv[i],"/\r")) { 
		/* dont do nothing */
		
	    } else if (cp == NULL) {
		startfile = argv[i];
	    } else {
		/* startdir = argv[i];*/  /* cannot change the startdir */
		startfile = cp+1;  /* file is referenced from startdir */
		/* make sure there are no other slashes or dots */
		while(*startfile == '/' || *startfile == '.') startfile++; /* */
		/* *cp = '\0'; */
	    }

	    {  /* can get a CR on command line if from server script */
	        char * cr = strrchr(startfile, '\r');
		if (cr) *cr='\0';	/* chop off CR if any */
	    }
	}
    }
    if(server)
        (void) chdir(startdir);

    /* http looks for files relative to the last file
     * lynx looks for files relative to a base directory
     * the name_equalizer will subtract a number of directories
     * equal to the number of directories that this file is
     * nested from each file name so that we can continue to
     * access files
     */
/*    strcpy(name_equalizer, "./");
    cp = startfile;
    while((cp2 = strchr(cp,'/')) != NULL)  {
	strcat(name_equalizer,"../");
	cp = cp2+1;
    }
/* */
    /*
     * make sure we can read the first file
     */
    if ((fp=fopen(startfile, "r")) == NULL) {
	fprintf(stderr, "Server: can't find file: %s/%s\n", startdir, startfile);
    } else {
        convert(fp, startfile, name_equalizer);
	(void) fclose(fp);
    }
    exit(0);
    /* NOTREACHED */
    return 0;  /* for gcc -Wall */
}



/*
 * display one file
 *
 * Read & display one file of text.
 * Looks for  links, and converts IBM PC line drawing
 * characters to standard ascii
 *
 */
void
convert(FILE *fp, char *cur_file, char *name_equalizer)
{
    int lineno;
    static char line[LINESIZE];
    char *cp, *cp2, *cp3, *acp;
    char defines_left=TRUE;
    char show_next_char=TRUE;
    char gopher_info[]="";
    struct attribtype attrib;
    int link_count;
    char *port, *tmptr;
    char target[MAXTARGET];

    /* add the <head> part since this is the first line */
    printf("<HTML>\n<HEAD>\n<TITLE>");

    if(title)
	printf("%s",title);
    else
	printf("Lynx File Converted to HTML");

    printf("</TITLE>\n");


    lineno = 0;
    while (fgets(line, LINESIZE, fp) != NULL) {

	/* defineswitch may add some text, specifically a <LINK>
         * tag which may define the mail address of a person
         * since we are still in the <HEAD> part all of this will
	 * be added to the head 
	 */
	if(defines_left) {  /* get the defines */
	    if(!defineswitch(line))
		defines_left = FALSE;
	    else
		continue;
  	}

	    /* end the head, start the body, and make all the text PRE */
	if (lineno == 0)
	    printf("</HEAD>\n<BODY>\n<PRE>");

	cp = line+strlen(line)-1;
	if (*cp == '\n') *cp-- = '\0';		/* remove trailing <lf> */
	if (*cp == '\r') *cp-- = '\0';		/* remove trailing <return> */
	while(cp>line && *cp==' ') *cp-- = '\0';/* remove trailing space */
	col = 0;
	for (cp=line; *cp != '\0'; cp++) {

	  /* check for targets */
	  if (*cp == delimiter.target[0] && 
	       (*(cp+1) == delimiter.target[1] || delimiter.target[1]=='\0')) {

	       cp2 = cp;	
	       cp2++;
	       if(*cp2 == delimiter.target[1])
		   cp2++;

		cp3 = target;
	       for(;( !(*cp2 == delimiter.end_target[1] || 
				delimiter.end_target[1] == '\0') 
			|| *(cp2-1)!=delimiter.end_target[0] ) &&
			*cp2 != '\0'; cp2++, cp3++)
		    *cp3 = *cp2;

		*cp3 = *cp2;
		*(cp3+1) = '\0';

		shorten_target(target);
		if(*cp2 != '\0') {  /* then it was a target */
		     cp = cp2+1;
		     printf("<A name=\"%s\"></A>", target);
		}

	   } else if (*cp == delimiter.link && *(cp+1) == delimiter.link) {

		cp++;  /* skip double delimiters */

	     /* check for links */
	   } else if (*cp == delimiter.link && *(cp-1) != delimiter.link) {
		/* start of link? */

	      cp = parselinks(cp, base1, col, lineno, "", 
			   &show_next_char, &attrib, gopher_info);

	      for(link_count=0; link_count < nlinks; link_count++) {

		   if(*links[link_count].lname == '!') {
			 convert_command_link(links[link_count].lname);

		   } else {  /* file or gopher link */

		     tmptr = links[link_count].lname;
		     while((tmptr = strchr(tmptr, '@')) != NULL
						&& *(tmptr-1) == '\\')
			tmptr++; /* null body */

	       	     remove_backslashes(links[link_count].lname);

		     if(is_url(links[link_count].lname)) {
		           printf("<A HREF=\"%s\">", links[link_count].lname);

		     } else if(tmptr == NULL) {  /* non gopher link */

			if(*links[link_count].target == '\0') {
			   if(relative_paths) 
		             printf("<A HREF=\"%s", links[link_count].lname);
			   else
		             printf("<A HREF=\"/%s", links[link_count].lname);

			   if(add_html)
				printf(".html\">");
			   else
				printf("\">");
			} else {
			   shorten_target(links[link_count].target);
			   if(*links[link_count].lname == '\0') {
		                printf("<A HREF=\"#%s", 
					links[link_count].target);
			   } else {
			      if(relative_paths)  {
		                 printf("<A HREF=\"%s#%s", 
					links[link_count].lname,
					links[link_count].target);
			      } else {
		                printf("<A HREF=\"/%s#%s", 
			    		links[link_count].lname, 
					links[link_count].target);
			      }

			   } /* end if(*links[ */

			   if(add_html)
				printf(".html\">");
			   else
				printf("\">");
			}

		     } else {  /* this is a gopher link */
			 convert_gopher_link(links[link_count].lname);
		     }
		  }

		  /* put in the text and anchor */
		  if(1) {
			/* output the hightext one char at a time
			 * in html mode
			 */
		      int i;
		      for(i=0; links[link_count].hightext[i] != '\0'; i++)
			  outch(links[link_count].hightext[i]);
		  }
			   
		  printf("</A>");
		  cp += strlen(links[link_count].hightext);

	    }	/* for nlinks */
	    nlinks=0; /* set this just in case */

	    /* end if cp==< */
	  } else if(*cp=='T' || *cp=='t') { 
		char buf[256];

		if( (!strncasecmp(cp,"tn3270",6) || !strncasecmp(cp,"TN3270",6) )
						&& interaddr(cp+7, buf)) {
			port = strchr(buf, ' ');
                        printf("<A HREF=\"tn3270://%s:%s/\">tn3270</a>",
			    			buf, (port ? port+1 : ""));
			cp+=6;

		} else if((!strncasecmp(cp,"telnet",6) || !strncasecmp(cp,"TELNET",6))
						  && interaddr(cp+7, buf)) {
                        printf("<A HREF=\"telnet://%s/\">telnet</a>", buf);
			cp+=6;
                }

	  }

	    
	    if(show_next_char)
	        outch(*cp);
	    else
		show_next_char=TRUE;
	}			/* next character */

	lineno++;
	outch('\n');		/* newline */
    }				/* next line */

    /* end the HTML document */
    printf("</PRE>\n</BODY>\n</HTML>");
    
    return;
}




/*	Display one charceter
 *
 *   Converts from IBM-PC format to SGML representation of basic ascii.
 */
 
void outch(ch)
    char ch;
{
    static char last_char=0;

    if(last_char == '_')
	if(ch == 8) {  /* back space */
	   last_char = 0;
	   return;  /* kill those backspaces */
	} else
	   putchar('_');

    last_char = ch;

    if(ch == '_')
	return;

    if (!printable(ch)) {
	if      (ch == HLINE)	putchar('-');
	else if (ch == VLINE)	putchar('|');
	else if (ch == VLINE2)	putchar('|');
	else if (ch == UL)	putchar('+');
	else if (ch == UR)	putchar('+');
	else if (ch == LL)	putchar('+');
	else if (ch == LR)	putchar('+');
	else if (ch == UL2)	putchar('+');
	else if (ch == UR2)	putchar('+');
	else if (ch == LL2)	putchar('+');
	else if (ch == LR2)	putchar('+');
	else if (ch == BULLET)	putchar('*');
	else if (ch == BLOT1)	putchar('*');
	else if (ch == BLOT2)	putchar('*');
	else if (ch == BLOT3)	putchar('*');
	else {
#ifdef DEBUG
	    /* shouldn't happen - see what we've missed */
	    if (ch != 13) {
		printw("%d", ch);
		refresh();
	    }
#endif
	    putchar(' ');
	}
    }
    else if (ch == '<') printf("&lt;");
    else if (ch == '>') printf("&gt;");
    else if (ch == '&') printf("&amp;");
    else if (ch == '\t') {			/* expand tabs */
    	do { outch(' '); } while ((col % 8) != 0);
	return;
    }
    else if (ch == '\n') {
    	printf(server ? "\r\n" : "\n");   /* Telnet style for server */
	col= 0;
	return;
    }
    else putchar(ch);
    col++;
}


/*
 * Search through a string for the first valid internet address.
 * We define an internet address as a word consisting of alpha-
 * numeric characters and dots (and with at least one embedded dot).
 *
 * If the address is immediately followed by a space and a port number,
 * those are included as well.
 *
 * This is not a general purpose routine.
 *
 * Returns an pointer to the address if one is found, NULL otherwise.
 */
char *
interaddr(cp, buf)
char *cp, *buf;
{
    int isaddr, isname;
    char *cp2, *word;

    while (*cp != '\0') {

	isaddr = FALSE;
	isname = FALSE;

	while (*cp == ' ') cp++;		/* skip spaces */

	word = cp;				/* start of word? */
	while (*cp == '"' || *cp == '-' || *cp == '.' || isalnum(*cp)) {
	    if (*cp == '.' && isalnum(*(cp+1)) )
		isaddr = TRUE;
	    if (isalpha(*cp))
		isname = TRUE;
	    cp++;
	}

	if (isaddr) {			/* it's an internet address */
/*	    if ((telnet_by_name && isname) ||
		(telnet_by_number && !isname) ) 
*/
	    {
		if (*cp == ' ') {	/* check for port number */
		    for (cp2=cp+1; isdigit(*cp2); cp2++)
			; /* NULL BODY */
		    if (cp2 > cp+1)
			cp = cp2;
		}
		while (!isalnum(*cp) && *cp != '"') cp--;
		mystrncpy(buf,word,cp-word+1);
		/* printw("interaddr: %s\n", buf); refresh(); */
		return(word);
	    }
	} else {  /* if it's the word "or", try again, otherwise give up */
	    if (strncmp(word, "or", 2) != 0)
		return(NULL);
	}
    }
    return(NULL);
}

/*
 * Return pointer to the second word in a string
 */
char *
nextword(cp)
char *cp;
{
    while (*cp != ' ' && *cp != '\0') cp++;	/* skip non-spaces */
    while (*cp == ' ') cp++;			/* skip spaces */
    return(cp);
}


/*
 * my strncpy() terminates strings with a null byte.
 * Writes a null byte into the n+1 byte of dst.
 */
char *
mystrncpy(dst, src, n)
char *dst, *src;
int n;
{
    char *val;

    val = strncpy(dst, src, n);
    *(dst+n) = '\0';
    return val;
}



/*
 * extract the filename portion of a link <filename[ ...]>
 */
void getlinkname(buf, lname)
char *lname;
char *buf;
{
    char *cp;

    for (cp=lname+1;
	    *cp != '>' && *cp != '\0'; cp++) {
	*buf = *cp;
	buf++;
    }
    *buf = '\0';
}

