#include <stdio.h>
#include <string.h>
#include <ctype.h>
/* maximum number of tags per line */
#define MAXTAGS 100
/* minimum column for right margin */
#define MINMID MINRM/2
#define MINRM 60
#define MAXINDENT 15
#define SENT 10

/*
  change backspace/underline text into Tk text tags
  -- used to do this in TCL, but it was too slow

  I hope the compiler has good common subexpression elimination
     for all the pointer arithmetic.


  Copyright (c) 1993  T.A. Phelps (phelps@cs.Berkeley.EDU)
     written March 24, 1993

    2-Apr  bullets, change bars, copyright symbol
    5      boldface, other SGI nicks
    7      skip unrecognized escape codes
   10      small caps
   13      underscores considered uppercase so show up
              in default small caps font
           screen out Ultrix junk (code getting pretty tangled now)
   14      until Tk text has better tab support, replace tabs by
           spaces until get to next tab stop (for Ultrix)
           -t gives tabstop spacing
   20      Solaris support
    3-Jun  section subheading parsing (Per-Erik Martin)
*/


/* TO DO ****

   may just want to include own macro file to screen out all the varieties
      and format everything (which takes only an imperceptable amount of time)
      whole process take less time, as don't have to process each character
	    through a tangled web of if's
   screen out more varieties of headers and footers
   maybe write bs2tk with lex--bigger binary, but maybe faster execution,
      easier description of cases
*/



/*** tag management ***/

enum tagtype { TITLE, ITALICS, BOLD, SYMBOL, SMALLCAPS, BOLDITALICS };
char *tagstrings[] = {
	"title", "high", "bold", "symbol", "sc", "bi"
};

struct {
	int type;
	int line;
	int first;
	int last;
} tags[MAXTAGS];

int tagc=0;

void
addtag(int type, int line, int first, int last)
{
	tags[tagc].type = type;
	tags[tagc].line = line;
	tags[tagc].first = first;
	tags[tagc].last = last;
	tagc++;
}



void
format(int tabstops, int subsections)
{
	int line=1;
	char buf[BUFSIZ+SENT];
	char plain[BUFSIZ];
	char *p;
     int i,ci,hl,maybesc;
	int alluc,subsect,osubsect=0;
	int title=1;
	char *bads = "\\\"[]$";
/*	char *sects = "12345678oln";*/
	int sectcnt=0;
     int seealso=0;
	int ncnt=0,scnt=0,oscnt=-1;
	int tt=-1;
	int indent=-1;

	/* sentinals */
     for (i=0;i<SENT;i++) buf[BUFSIZ+i]='\0';


	while (gets(buf)!=NULL) {
		/*** determine global indentation ***/
		/* as defined by first line with non-whitespace in column MAXINDENT or earlier */
		/* could have used expand (easier but slower) */
		if (indent==-1) {
			for (ci=i=0; buf[i] && ci<MAXINDENT; i++,ci++) {
				if (buf[i]=='\t') ci+=tabstops-1;
				else if (!isspace(buf[i])) {indent=ci; break;}
			}
			if (indent==-1) continue;
		}

		/* skip over global indentation, mindful of EOL and tabs */
		scnt=0;
		for (ci=i=0; buf[i] && ci<indent; ci++,i++)
			if (buf[i]=='\t') ci+=(tabstops-1);
		p = &buf[i];
		if (p[0]=='\0') {ncnt++; continue;}
		if (ci>indent) scnt=ci-indent;

		/* skip over (additional) initial spaces */
		for (; *p && isspace(*p); p++) {
			if (*p=='\t') scnt+=(tabstops-1);
			scnt++;
		}


		/*** strip out per-page titles ***/

		if (scnt==0 || scnt>MINMID) {

			if (strchr("UTFSXPHAlGV",p[0])!=NULL &&
			    /* maybe ifdef for Sun, HP */
			    /* would be good to use an FSM to do these all at once */
			    (strncmp(p,"Sun",3)==0 ||
				strncmp(p,"X Vers",6)==0 ||
				strncmp(p,"Page ",5)==0 ||
				strncmp(p,"Hewlett-",8)==0 ||
				strncmp(p,"HP-UX",5)==0 ||
				strncmp(p,"local",5)==0 ||
				strncmp(p,"Adobe",5)==0 ||
				strncmp(p,"Free Sof",8)==0 ||
				strncmp(p,"Version ",8)==0 ||
				strncmp(p,"GNU ",4)==0 ||
				strncmp(p,"Tcl  ",5)==0 ||
				strncmp(p,"UCB  ",5)==0
				/* you may need to add your own checks here */
				/* (make sure to add first letter to strchr above) */
				)) {
				title=1; continue;
			}

			/* page numbers */
			for(i=0; p[i] && isdigit(p[i]); i++)
				/* empty */;
			if (!p[i]) {title=1; continue;}

			/* name of command */
			for (i=0; isalnum(p[i]) || p[i]=='-'||p[i]=='.'||p[i]=='_'||p[i]=='&'||p[i]=='\x08'; i++)
				/* empty */;
			if (p[i++]=='(' /*&& isdigit(p[i++]) /*&& strchr(sects,p[i++])!=NULL
			    && (!isalpha(p[i]) || i++) && p[i]==')'*/)
				{title=1; continue;}
		}


		/*** interline spacing ***/
		alluc = (scnt==0 && isupper(p[0]));
		printf("$w.show insert end \"");

		if (title) ncnt=(scnt!=oscnt || isupper(p[0]));
		for (i=0; i<ncnt; i++) {putchar('\\'); putchar('n');}
		line+=ncnt; ncnt=0;
		title=0;


		/*** process line, looking for special ones ***/
		ci=scnt;

		/*** change bars ***/
		i = strlen(p)-1;		/* gets should return this */
          if (i+scnt>MINRM) {
			for (; i>=0 && p[i]=='|'; i--) {
				putchar('|');
				ci++;
			}
		}
		/* strip trailing spaces */
		for (; i>=0 && isspace(p[i]); i--)
			/* empty */;
		p[i+1]='\0';

		if (i>=0) printf("%*s",scnt,"");
		oscnt=scnt;
		subsect=(subsections && scnt==2);


		/*** handle underlining, tabbing, copyrights ***/
		hl=maybesc=-1;
		for (i=0; p[i]; i++) {
			/* case statement here in place of if chain? */
/* Tk 3.2's text widget tabs too crazy
			if (p[i]==' ' && strncmp("     ",&p[i],5)==0) {
				putchar('\t'); i+=5-1; ci++; continue;
			} else */
/* copyright symbol: too much work for so little
			if (p[i]=='o' && strncmp("opyright (C) 19",&p[i],15)==0
					    || strncmp("opyright (c) 19",&p[i],15)==0) {
				printf("opyright \xd3 19");
				addtag(SYMBOL, line, ci+9, ci+10);
				i+=15-1; ci+=13; continue;
			} else */

			if (p[i]=='\t') {
				/* Ultrix puts tabs within sentences--gag! */
				do { 
					putchar(plain[ci++-scnt]=' ');
				} while ((ci+indent)%tabstops);
				continue;
			} else if ((p[i]=='_' && p[i+1]=='\x08' && p[i+2]!='_' && p[i+3]!='\x08')
				|| (p[i]=='\x08' && p[i+1]=='_')) {
				/* italics */
				/* start tag only if not already in one */
				if (hl==-1) { hl=ci; tt = ITALICS; }
				i+=2;
#ifdef SOLARIS
			} else if (p[i]=='_' && p[i+2]==p[i+4] && p[i+1]=='\x08' && p[i+3]=='\x08' && p[i+2]!='_') {
				/* bold italics (BRAIN DEAD!) */
				if (hl==-1) { hl=ci; tt = BOLDITALICS; }
				for (i+=2; p[i]==p[i+2] && p[i+1]=='\x08';)
					i+=2;
#endif
			} else if (p[i]==p[i+2] && p[i+1]=='\x08') {
				/* boldface */
				if (hl==-1) { hl=ci; tt=BOLD; }
				while (p[i]==p[i+2] && p[i+1]=='\x08')
					i+=2;
			} else if (p[i+1]=='\x08' &&
					 ((p[i]=='o' && p[i+2]=='+') ||
					 (p[i]=='+' && p[i+2]=='o')) ) {
				/* bullets */
				i+=2; putchar('\xb7');
				addtag(SYMBOL, line, ci, ci+1);
				ci++; continue;
			} else if (p[i]=='\x1b' /*&& (p[i+1]=='9'||p[i+1]=='8')*/) {
				/* skip unrecognized escape codes */
				i++; continue;
			} else if (!alluc && (isupper(p[i]) || p[i]=='_')) {
				if (hl==-1 && maybesc==-1) {maybesc=ci;}
			} else {
				/* end of tag, one way or another */
				if (hl>=0) {
					addtag(tt, line, hl, ci);
				} else if (maybesc>=0 && ci-maybesc>=2) {
					addtag(SMALLCAPS, line, maybesc, ci);
				}
				maybesc=hl=-1;
			}
			if (!p[i]) break;	/* safety check */
			/* escape some chars for TCL */
			if (strchr(bads,p[i])!=NULL) {putchar('\\');}
/*			switch (p[i]) {
			   case '\\': case '"': case '[': case ']': case '$':
				putchar('\\');
			}
*/
#ifndef ULTRIX
			if (alluc && islower(p[i])) alluc=0;
#endif
			putchar(p[i]);
			plain[ci++-scnt]=tolower(p[i]);
		}
		if (hl>=0) {addtag(tt, line, hl, ci);}
		else if (maybesc>=0 && ci-maybesc>=2) addtag(SMALLCAPS, line, maybesc, ci);
		printf("\\n\"\n");
		plain[ci-scnt]='\0';


		/*** deal with section titles, hyperlinks ***/

		if (alluc || subsect) {
			for (i=ci-scnt-1; isspace(plain[i]) && i>=0; i--) /* empty */;
			plain[i+1]='\0';
			if (!subsect) {
				seealso = strcmp(plain,"see also")==0;
				addtag(TITLE, line, 0, ci);
				printf("$w.sections.m add command -label {%s} -command \"$w.show yview %d\"\n",
					  plain, line-1 /* pickplace has zero-based offset */);
			} else if (!osubsect && sectcnt<40) {	/* damage control */
				printf("$w.sections.m add command -label {   %s} -command \"$w.show yview %d\"\n",
					  plain, line-1 /* pickplace has zero-based offset */);
			}
			sectcnt++;
		} else if (seealso) printf("append manx(links) {%s,}\n", plain);
		osubsect=subsect;

		line++;
		if (line==100) printf("update idletasks\n");

		/* write tags right after line */
		for (i=0; i<tagc; i++) {
			printf("$w.show tag add %s %d.%d %d.%d\n",
				  tagstrings[tags[i].type],
				  tags[i].line, tags[i].first,
				  tags[i].line, tags[i].last);
		}
		tagc=0;
	}

	if (!sectcnt) printf("$w.sections.m add command -label (none)\n");
     printf("$w.search.cnt configure -text \"%d lines\"\n", line-1);
}



int
main(int argc, char **argv)
{
	int c;
	extern char *optarg;
	extern int optind, opterr;

	int tabstops=8;
	int subsections=0;

	while ((c=getopt(argc,argv,"vt:b"))!=-1)
		switch (c) {
		   case 't':
			tabstops = atoi(optarg);
			break;
		   case 'b':
			subsections=1;
			break;
		   case 'v':
			printf("bs2tk $Revision: 1.23 $\n");
			exit(0);
		   default:
			fprintf(stderr, "%s: unidentified option -%c\n",argv[0],c);
			exit(1);
		}

	format(tabstops,subsections);
	return(0);
}
