/*
	Probably should install a simpler scanner.

	reader.c - troffcvt output reader.

	Tokenizes input stream

	tcrArgv[] points into cstBuf or ptBuf and its elements should
	be considered READ-ONLY.

	The reader turns off the TS escaping mechanism, so any application
	using both the reader and TS routines itself should be aware of
	that and modify/restore the scanner accordingly.
*/

# include	<stdio.h>
# include	<ctype.h>
# ifdef	THINK_C			/* for atoi()/atol()/atof() */
# include	<stdlib.h>
# else /* !THINK_C */
# include	<math.h>
# endif /* THINK_C */

# include	"etm.h"
# include	"memmgr.h"
# include	"tokenscan.h"

# include	"tcr.internal.h"
# include	"tcr.h"

# define	maxArg	10


static void	LookupInit ();
static void	Lookup ();
static int	Hash ();

static int	ChIn ();


static int	prevChar = '\n';


int	tcrClass;
int	tcrMajor;
int	tcrMinor;
char	*tcrArgv[maxArg];
int	tcrArgc = 0;

/*
	Control and special text lines are read into this buffer, and
	tcrArgv[] is set up to point at the words in the line.
	ptBuf is used for single-char plain text tokens.
*/

static char	cstBuf[bufSiz];
static char	ptBuf[2];


/*
	Initialize the reader.  This may be called multiple times,
	to read multiple files.  The only thing not reset is the input
	stream; that must be done with TCRSetStream().
*/

void TCRInit ()
{
TSScanner	scanner;
char		*scanEscape;

	/* initialize lookup tables */

	LookupInit ();

	tcrClass = tcrEOF;
	tcrArgc = 0;
	tcrArgv[0] = (char *) NULL;

	/*
		Turn off backslash escape mechanism while parsing
		action file.  Restore it later.  WHERE???
	*/
	TSGetScanner (&scanner);
	scanEscape = scanner.scanEscape;
	scanner.scanEscape = "";
	TSSetScanner (&scanner);
}


/*
	Read input stream to find next token.  If stream is at
	beginning of line (prevChar = '\n') look for control or special
	text line, read the whole thing in one swoop and parse it apart
	to set the tcrArgv[] vector.  Otherwise the character is just
	plain text, which becomes its own token.
*/

int TCRGetToken ()
{
int	c;
char	*p;

	if ((c = getchar ()) == EOF)
		return (tcrEOF);

	if (prevChar != '\n' || (c != '\\' && c != '@'))	/* plain text */
	{
		ptBuf[0] = c;
		ptBuf[1] = '\0';
		tcrArgv[tcrArgc = 0] = ptBuf;
		tcrMajor = prevChar = c;
		return (tcrClass = tcrText);
	}

	/* control or special text */
	if (c == '\\')
		tcrClass = tcrControl;
	else
		tcrClass = tcrSText;

	cstBuf[0] = c;
	if (!TCRGetLine (cstBuf+1, (int) sizeof (cstBuf)-1, stdin))
		cstBuf[1] = '\0';
	prevChar = '\n';
	TSScanInit (cstBuf);
	tcrArgc = 0;
	while ((p = TSScan ()) != (char *) NULL)
	{
		/* comments are a special case */
		if (tcrArgc == 0 && strcmp (p, "\\comment") == 0)
		{
			tcrArgv[tcrArgc++] = p;
			tcrArgv[tcrArgc++] = TSGetScanPos ();
			break;
		}
		if (tcrArgc + 1 >= maxArg)
		{
			ETMMsg ("too many arguments on %s line", tcrArgv[0]);
			break;
		}
		tcrArgv[tcrArgc++] = p;
	}
	tcrArgv[tcrArgc] = (char *) NULL;
	if (tcrArgc == 0)
		ETMPanic ("trash line found");
	Lookup (tcrArgv[0], tcrClass);
	return (tcrClass);
}


/*
	Read a line from a file, strip any cr, lf or crlf from the
	end.  Return zero for EOF, non-zero otherwise.
*/

int TCRGetLine (buf, size, f)
char	*buf;
int	size;
FILE	*f;
{
int	len;

	if (fgets (buf, size, f) == (char *) NULL)
		return (0);
	if ((len = strlen (buf)) > 0 && buf[len-1] == '\n')
		buf[len-1] = '\0';
	if ((len = strlen (buf)) > 0 && buf[len-1] == '\r')
		buf[len-1] = '\0';
	return (1);
}


long TCRStrToNum (s)
char	*s;
{
	return (atol (s));
}


/* ---------------------------------------------------------------------- */

/*
	Symbol lookup routines
*/


typedef struct TCRKey	TCRKey;

struct TCRKey
{
	int	tcrKMajor;	/* major number */
	int	tcrKMinor;	/* minor number */
	char	*tcrKStr;	/* symbol name */
	int	tcrKHash;	/* symbol name hash value */
};


/*
	A minor number of -1 means the token has no minor number
	(all valid minor numbers are >= 0).
*/


static TCRKey	tcrControlKey[] =
{
	tcrComment,	-1,		"comment",		0,
	tcrBeginSetup,	-1,		"begin-setup",		0,
	tcrEndSetup,	-1,		"end-setup",		0,
	tcrResolution,	-1,		"resolution",		0,
	tcrBreak,	-1,		"break",		0,
	tcrFont,	-1,		"font",			0,
	tcrPointSize,	-1,		"point-size",		0,
	tcrSpacing,	-1,		"spacing",		0,
	tcrLineSpacing,	-1,		"line-spacing",		0,
	tcrOffset,	-1,		"offset",		0,
	tcrIndent,	-1,		"indent",		0,
	tcrTempIndent,	-1,		"temp-indent",		0,
	tcrLineLength,	-1,		"line-length",		0,
	tcrPageLength,	-1,		"page-length",		0,
	tcrPageNumber,	-1,		"page-number",		0,
	tcrSpace,	-1,		"space",		0,
	tcrCFA,		tcrCenter,	"center",		0,
	tcrCFA,		tcrNofill,	"nofill",		0,
	tcrCFA,		tcrAdjFull,	"adjust-full",		0,
	tcrCFA,		tcrAdjLeft,	"adjust-left",		0,
	tcrCFA,		tcrAdjRight,	"adjust-right",		0,
	tcrCFA,		tcrAdjCenter,	"adjust-center",	0,
	tcrUnderline,	-1,		"underline",		0,
	tcrCUnderline,	-1,		"cunderline",		0,
	tcrNoUnderline,	-1,		"nounderline",		0,
	tcrULineFont,	-1,		"underline-font",	0,
	tcrBreakSpread,	-1,		"break-spread",		0,
	tcrExtraSpace,	-1,		"extra-space",		0,
	tcrLine,	-1,		"line",			0,
	tcrMark,	-1,		"mark",			0,
	tcrMotion,	-1,		"motion",		0,
	tcrBeginBracket, -1,		"begin-bracket",	0,
	tcrEndBracket,	-1,		"end-bracket",		0,
	tcrBeginOverstrike, -1,		"begin-overstrike",	0,
	tcrEndOverstrike, -1,		"end-overstrike",	0,
	tcrBeginPage,	-1,		"begin-page",		0,
	tcrZeroWidth,	-1,		"zero-width",		0,
	tcrSpaceSize,	-1,		"space-size",		0,
	tcrConstantWidth, -1,		"constant-width",	0,
	tcrNeed,	-1,		"need",			0,
	tcrEmbolden,	-1,		"embolden",		0,
	tcrSEmbolden,	-1,		"embolden-special",	0,
	tcrResetTabs,	-1,		"reset-tabs",		0,
	tcrFirstTab,	-1,		"first-tab",		0,
	tcrNextTab,	-1,		"next-tab",		0,
	tcrHyphenate,	-1,		"hyphenate",		0,
	tcrBegDiversion, -1,		"begin-diversion",	0,
	tcrAppDiversion, -1,		"append-diversion",	0,
	tcrEndDiversion, -1,		"end-diversion",	0,
	tcrTabChar,	-1,		"tab-char",		0,
	tcrLeaderChar,	-1,		"leader-char",		0,
	tcrTitleLength,	-1,		"title-length",		0,
	tcrBeginTitle,	-1,		"begin-title",		0,
	tcrEndTitle,	-1,		"end-title",		0,

	0,		-1,		(char *) NULL,		0
};


static TCRKey	tcrSTextKey[] =
{
	tcrBackslash,	-1,		"backslash",		0,
	tcrAtSign,	-1,		"at",			0,
	tcrLSglQuote,	-1,		"lsglquote",		0,
	tcrRSglQuote,	-1,		"rsglquote",		0,
	tcrLDblQuote,	-1,		"ldblquote",		0,
	tcrRDblQuote,	-1,		"rdblquote",		0,
	tcrZeroSpace,	-1,		"zero-space",		0,
	tcrTwelfthSpace, -1,		"twelfth-space",	0,
	tcrSixthSpace,	-1,		"sixth-space",		0,
	tcrDigitSpace,	-1,		"digit-space",		0,
	tcrHardSpace,	-1,		"hard-space",		0,
	tcrMinus,	-1,		"minus",		0,
	tcrAcuteAccent,	-1,		"acute-accent",		0,
	tcrGraveAccent,	-1,		"grave-accent",		0,
	tcrLeader,	-1,		"leader",		0,
	tcrLeaderPad,	-1,		"leader-pad",		0,
	tcrTab,		-1,		"tab",			0,
	tcrBackspace,	-1,		"backspace",		0,
	tcrEmDash,	-1,		"em-dash",		0,
	tcrOptHyphen,	-1,		"opt-hyphen",		0,

	0,		-1,		(char *) NULL,		0
};


/*
	Initialize lookup table hash values.  Only need to do this the
	first time it's called.
*/

static void LookupInit ()
{
static int	inited = 0;
TCRKey	*rp;

	if (inited == 0)
	{
		for (rp = tcrControlKey; rp->tcrKStr != (char *) NULL; rp++)
			rp->tcrKHash = Hash (rp->tcrKStr);
		for (rp = tcrSTextKey; rp->tcrKStr != (char *) NULL; rp++)
			rp->tcrKHash = Hash (rp->tcrKStr);
		++inited;
	}
}


/*
	Determine major and minor number of control token.  If it's
	not found, the major number turns into tcr{C,ST}Unknown.
*/

static void Lookup (s, class)
char	*s;
{
TCRKey	*rp;
int	hash;

	++s;			/* skip over the leading \ character */
	hash = Hash (s);
	if (class == tcrControl)
	{
		rp = tcrControlKey;
		tcrMajor = tcrCUnknown;
	}
	else if (class == tcrSText)
	{
		rp = tcrSTextKey;
		tcrMajor = tcrSTUnknown;
	}
	else
		ETMPanic ("Lookup: class = %d", class);

	for (/* empty */; rp->tcrKStr != (char *) NULL; rp++)
	{
		if (hash == rp->tcrKHash && strcmp (s, rp->tcrKStr) == 0)
		{
			tcrMajor = rp->tcrKMajor;
			tcrMinor = rp->tcrKMinor;
			break;
		}
	}
}


/*
	Compute hash value of symbol
*/

static int Hash (s)
char	*s;
{
char	c;
int	val = 0;

	while ((c = *s++) != '\0')
		val += (int) c;
	return (val);
}
