/*
	What does it mean to do .nx inside a macro?

	input.c - input operations

	Comments (\"...) are deleted whenever input is read or reread.
	Embedded newlines (\(newline)) are also deleted whenever input
	is read, except within comments.

	FChin(), SChIn(), MChIn(), LChIn() return EOF when input source is
	exhausted. FChIn() discards nulls and characters with high-bit set.

	ChIn0() always returns the next character from the input stream, EOF
	when there is no more input from any source.

	ChIn() collapses escChar+next char into one character as appropriate
	(depending on whether copy mode is on or not).

	Ugly hack: In copy mode, \\, \t, \a and \. are returned as \, tab,
	SOH and ., respectively.  If the character is pushed back, there's
	no way to determine from looking at the character itself that a \
	character should be pushed back, too.  (E.g., if you read a \\, you
	get a \; if you push it back, the first \ is lost, which loses a
	level of escaping - not good.)  The flag escStripped is used to keep
	track of whether the last character returned by ChIn() had an escape
	stripped off or not, and UnChIn() looks at that flag to be able to
	tell whether to put it back.  This would be a problem if more than
	one character were read and pushed back in copy mode, but in practice
	that doesn't happen.  ICK.
*/

# include	<stdio.h>

# include	"etm.h"
# include	"memmgr.h"

# include	"troffcvt.h"


# define	maxPushChar	32
# define	maxMacRecursion	10

# define	fileSource		1
# define	stringSource		2
# define	macroSource		3
# define	anonStringSource	4


typedef	struct FileInfo	FileInfo;

struct FileInfo
{
	FILE	*filePtr;
	int	prevChar;
	Param	inputLine;
};


/*
	Information for macro input sources.  Keep a pointer to the current
	position within the macro body, a pointer to the end position, and
	an array of the arguments for references to $1..$9.  These are exported
	out when necessary through the MacroArgument() function.  Any arguments
	beyond those actually given on the macro invocation line will be empty,
	not null.

	Macro arguments are char.  They don't need to be UChar since they're	
	parsed in copy mode.
*/

typedef	struct MacInfo	MacInfo;

struct MacInfo
{
	Macro	*macPtr;
	long	curPos;
	long	endPos;
	int	margc;
	char	*margv[maxMacArgs];
};

typedef	struct StrInfo	StrInfo;

struct StrInfo
{
	String	*strPtr;
	UChar	*curSPtr;
};

typedef	struct AnonStrInfo	AnonStrInfo;

struct AnonStrInfo
{
	UChar	*anonStr;
	UChar	*curAPtr;
};


/*
	Input source information structure.

	Each input source specifies the function needed to return the next
	character from the source.  Input functions must return EOF at end
	of input, no matter what the source.  E.g., SChIn() returns EOF,
	not a null byte, when it encounters the null byte at the end of
	the string
*/

typedef struct Source	Source;

struct Source
{
	int	sType;			/* source type */
	int	(*sChIn) ();		/* character input function */
	union
	{
		FileInfo	sFileInfo;
		StrInfo		sStrInfo;
		MacInfo		sMacInfo;
		AnonStrInfo	sAnonStrInfo;
	} sInfo;
# define	sFile		sInfo.sFileInfo
# define	sString		sInfo.sStrInfo
# define	sMacro		sInfo.sMacInfo
# define	sAnonString	sInfo.sAnonStrInfo
	Source	*sNext;
};


static void	NewSource ();
static void	PopSource ();
static char	*MacroArgument ();
static int	EncodeEscape ();

static int	FChIn ();	/* next character from file */
static int	SChIn ();	/* next character from string */
static int	MChIn ();	/* next character from macro */
static int	LChIn ();	/* next character from literal string */
static int	ChIn0 ();

static Source	*curSrc = (Source *) NULL;

static int	pushCount = 0;
static char	pushBuf[maxPushChar];

static int	inputLevel = 0;
static int	copyMode = 0;
static int	escStripped = 0;


/*
	Turn copy mode on or off.  Turning it on triggers some
	horrendously complex shenanighans inside of ChIn().
*/

void CopyMode (onoff)
int	onoff;
{
	copyMode = onoff;	/* non-zero to turn on */
}


/*
	Allocate a new input source structure, fill in the type, set
	up the input function pointer, and switch the current source
	pointer to it.  The caller must fill in the union information.
*/

static void PushSource (type)
int	type;
{
Source	*sp = New (Source);

	switch (sp->sType = type)
	{
	case fileSource:	sp->sChIn = FChIn; break;
	case stringSource:	sp->sChIn = SChIn; break;
	case macroSource:	sp->sChIn = MChIn; break;
	case anonStringSource:	sp->sChIn = LChIn; break;
	default:		ETMPanic ("NewSource: unknown input source type");
	}
	sp->sNext = curSrc;	/* link to head of source list */
	curSrc = sp;
	++inputLevel;
}

/*
	Switch to another file without saving current file.  If this
	fails, the caller should cause an exit; no effort should be
	made to continue processing.

	THIS REALLY NEEDS TO UNWIND ALL ACTIVE INPUT SOURCES.
*/

int SwitchFile (name)
char	*name;
{
	if (Bug (bugInputStack))
		ETMMsg ("switch file <%s>",
			name == (char *) NULL ? "(stdin)" : name);
	if (name == (char *) NULL)
		return (0);
	if (curSrc == (Source *) NULL)
		ETMPanic ("SwitchFile: no current source");
	if (curSrc->sType != fileSource)
	{
		ETMMsg ("cannot switch file: current input source not file");
		return (0);
	}
	pushCount = 0;		/* throw away any pushback */
	if (freopen (name, "r", curSrc->sFile.filePtr) == (FILE *) NULL)
		return (0);
	curSrc->sFile.prevChar = '\n';
	curSrc->sFile.inputLine = 0;
	SetRegisterValue (".c", (Param) 0);
	SetRegisterValue ("c.", (Param) 0);
	return (1);
}


/*
	Switch to a file and save the current input source on a stack.

	If name is NULL, switch to stdin (this fact is used in main() when
	there are no named input files, but should not be done otherwise).
*/

int PushFile (name)
char	*name;
{
FILE	*f;

	if (Bug (bugInputStack))
		ETMMsg ("push file <%s>",
			name == (char *) NULL ? "(stdin)" : name);
	if (name == (char *) NULL)
		f = stdin;
	else if ((f = fopen (name, "r")) == (FILE *) NULL)
		return (0);
	PushSource (fileSource);
	curSrc->sFile.filePtr = f;
	curSrc->sFile.prevChar = '\n';
	curSrc->sFile.inputLine = 0;
	SetRegisterValue (".c", (Param) 0);
	SetRegisterValue ("c.", (Param) 0);
	return (1);
}


/*
	Switch to a named string and save the current input source on a stack.
*/

int PushString (name, sp)
UChar	*name;
String	*sp;
{
	if (Bug (bugInputStack))
		ETMMsg ("push string <%s> <%s>",
					UStrToStr (name), sp->strValue);
	if (sp->strInUse++)
		ETMPanic ("PushString: name <%s> referenced recursively",
							UStrToStr (name));
	PushSource (stringSource);
	curSrc->sString.strPtr = sp;
	curSrc->sString.curSPtr = (UChar *) sp->strValue;
	return (1);
}


/*
	Switch to a macro and save the current input source on a stack.
	Note that this sets register ".$".
*/

int PushMacro (s, argc, argv)
UChar	*s;
int	argc;
char	**argv;
{
Macro	*mp;
int	i;

	if (Bug (bugInputStack))
		ETMMsg ("push macro <%s>", UStrToStr (s));
	if ((mp = LookupMacro (s)) == (Macro *) NULL)
		return (0);
	if (mp->macInUse >= maxMacRecursion)
	{
		ETMMsg ("macro <%s> invoked %d levels deep, further recursion refused",
					UStrToStr (s), maxMacRecursion);
		return (0);
	}
	PushSource (macroSource);
	++mp->macInUse;
	curSrc->sMacro.macPtr = mp;
	curSrc->sMacro.curPos = 0L;
	curSrc->sMacro.endPos = mp->macSiz;
	curSrc->sMacro.margc = argc;
	SetRegisterValue (".$", (Param) argc);
	if (Bug (bugInputStack))
		ETMMsg (".$ set to %ld", GetRegisterValue (".$"));
	for (i = 0; i < argc; i++)
		curSrc->sMacro.margv[i] = StrAlloc (argv[i]);
	return (1);
}


/*
	Return a pointer to one of the current macro's arguments.  This
	will be empty if the current input source isn't a macro.
	Indexing is skewed; n is in range 1..9, but arg 1 is in margv[0].
*/

static char *MacroArgument (n)
int	n;
{
	if (curSrc->sType != macroSource || n < 0 || n > curSrc->sMacro.margc)
		return ("");
	if (Bug (bugInputStack))
		ETMMsg ("macro arg %d is <%s>", n, curSrc->sMacro.margv[n-1]);
	return (curSrc->sMacro.margv[n-1]);
}


/*
	Switch to a literal string (not a string register), obtained, e.g.,
	by macro argument or number register reference.

	Save the current input source on a stack.
*/

int PushAnonString (s)
UChar	*s;
{
	if (Bug (bugInputStack))
		ETMMsg ("push lstring <%s>", UStrToStr (s));
	PushSource (anonStringSource);
	curSrc->sAnonString.anonStr = UStrAlloc (s);
	curSrc->sAnonString.curAPtr = curSrc->sAnonString.anonStr;
	return (1);
}


/*
	Terminate current input source, resume previous.  Return non-zero if
	there was a previous source, zero otherwise.

	If the new current source is a macro, restore register .$ to the
	number of arguments, otherwise look through the list and try to
	find the previous macro; if none, set it to zero.

	If the current source is a macro, check whether it needs to be
	destroyed (it will if it was redefined while it was executing,
	since removal has to be deferred in that case).
*/

static void PopSource ()
{
Source	*sp;
Macro	*mp;
int	i;
Param	nargs = 0;

	if (Bug (bugInputStack))
		ETMMsg ("pop input source");
	if (curSrc == (Source *) NULL)
		ETMPanic ("PopSource: logic error");
	switch (curSrc->sType)
	{
	case fileSource:
		(void) fclose (curSrc->sFile.filePtr);
		break;
	case stringSource:
		curSrc->sString.strPtr->strInUse = 0;
		break;
	case macroSource:
		mp = curSrc->sMacro.macPtr;
		for (i = 0; i < curSrc->sMacro.margc; i++)
			Free (curSrc->sMacro.margv[i]);
		/* if removal flag set, destroy the thing if not still in use */
		if (--mp->macInUse  == 0 && mp->macRemove)
			RemoveNameDef (mp->macParent);
		break;
	case anonStringSource:
		UFree (curSrc->sAnonString.anonStr);
		break;
	default:
		ETMPanic ("PopSource: unknown input source type");
	}
	sp = curSrc;
	curSrc = sp->sNext;
	Free ((char *) sp);
	--inputLevel;

	sp = curSrc;
	while (sp != (Source *) NULL)
	{
		if (sp->sType == macroSource)
		{
			nargs = sp->sMacro.margc;
			break;
		}
		sp = sp->sNext;
	}
	SetRegisterValue (".$", nargs);
	if (curSrc != (Source *) NULL && curSrc->sType == fileSource)
	{
		SetRegisterValue (".c", curSrc->sFile.inputLine);
		SetRegisterValue ("c.", curSrc->sFile.inputLine);
	}
}


/*
	Return next character from file input.

	This routine also turns CR or CRLF into LF, so that text files
	created on a Macintosh or MS-DOS machine can be read properly
	without problem.  The pushback here is done using stdio mechanism
	instead of UnChIn(), since the latter's at too high a level of
	abstraction.
*/

static int FChIn ()
{
int	c, c2;

	for (;;)
	{
		if ((c = getc (curSrc->sFile.filePtr)) == cr)
		{
			/* have CR, look at next char, push back if not LF */
			if ((c2 = getc (curSrc->sFile.filePtr)) != linefeed)
				ungetc (c2, curSrc->sFile.filePtr);
			c = linefeed;
		}
		/* filter nulls and non-ascii junk characters for fun */
		if (c != '\0' && !((c & 0x80) && c != EOF))
			break;
	}
	if (curSrc->sFile.prevChar == '\n')
	{
		++curSrc->sFile.inputLine;
		SetRegisterValue (".c", curSrc->sFile.inputLine);
		SetRegisterValue ("c.", curSrc->sFile.inputLine);
	}
	curSrc->sFile.prevChar = c;	/* save for next call */
	return (c);
}


static int SChIn ()
{
int	c;

	if ((c = curSrc->sString.curSPtr[0]) != '\0')
		++(curSrc->sString.curSPtr);
	else
		c = EOF;
	return (c);
}


static int MChIn ()
{
MacInfo	*mp = &curSrc->sMacro;

	if (mp->curPos < mp->endPos)
		return (mp->macPtr->macBuf[mp->curPos++]);
	return (EOF);
}


static int LChIn ()
{
int	c;

	if ((c = curSrc->sAnonString.curAPtr[0]) != '\0')
		++(curSrc->sAnonString.curAPtr);
	else
		c = EOF;
	return (c);
}


/*
	Return the next character from the current input source.  If there
	is anything in the pushback queue, return next character from it.
	Otherwise call the input function for the source, unwinding to
	previous source when current source returns EOF.
*/

static int ChIn0 ()
{
int	c = EOF;

	while (curSrc != (Source *) NULL)
	{
		if (pushCount > 0)
		{
			c = pushBuf[--pushCount];
			break;
		}
		if ((c = (*(curSrc->sChIn)) ()) != EOF)
		{
			if (Bug (bugChIn02))
				fputc (c, stderr);
			break;
		}
		PopSource ();	/* resume reading previous source */
	}

	if (Bug (bugChIn0))
	{
		if (c == EOF)
			ETMMsg ("0 %d <EOF>", inputLevel);
		else if (c == linefeed)
			ETMMsg ("0 %d <lf>", inputLevel);
		else if (Esc (c))
			ETMMsg ("0 %d {%#x}", inputLevel, c);
		else
			ETMMsg ("0 %d <%c>", inputLevel, c);
	}
	return (c);
}


/*
	Process character following an escape.  If the sequence is one
	such that it causes a new input source to be pushed, return '\0'.
	Otherwise map the character to an escape or special char code if
	necessary and return the result.

	Following are interpreted in copy mode (see sec. 7.2):

	\n	pushes input to string representing number register value
	\*	pushes input to string
	\$	pushes input to macro argument (if in macro)
	\\	convert to single '\'
	\t,\a	convert to tab, leader character
	\.	convert to plain '.'

	Following are interpreted when not in copy mode:

	\n	same as in copy mode.
	\*	same as in copy mode.
	\$	same as in copy mode.
	\(xx	convert to special character code
	\w'str'	calculate width of str and interpolate into input stream.
		everything else converted to escape code for the character

	These actions perhaps are not especially obvious from the code below.
*/

static int EncodeEscape (c)
int	c;
{
Register	*rp;
String	*sp;
SpChar	*scp;
UChar	*p;

	/*
		The following are interpolated whether copy
		mode is on or not.  Process them first.
	*/

	switch (c)
	{
	case 'n':
		/*
			This is pretty ugly because the value of the .z
			register is the NAME of the current diversion, not
			a number.
		*/
		if ((rp = ParseRegisterRef ()) != (Register *) NULL)
		{
			if (UStrCmp (rp->regParent->ndName, ".z") == 0)
				p = (UChar *) CurrentDiversion ();
			else
				p = (UChar *) FormatRegister (rp);
		}
		else
			p = (UChar *) "0";
		(void) PushAnonString (p);
		return ('\0');
	case '*':
		if ((p = ParseNameRef ()) != (UChar *) NULL
			&& (sp = LookupString (p)) != (String *) NULL)
		{
			(void) PushString (p, sp);
			UFree (p);
		}
		return ('\0');
	case '$':
		/*
			If reference is malformed (next char not a digit),
			next char is eaten and DROPPED.
		*/
		if (Digit (c = ChIn ())
			&& (p = (UChar *) MacroArgument (c - '0'))
							!= (UChar *) NULL)
			PushAnonString (p);
		return ('\0');
	}

	/*
		There are a few more to be interpreted if copy mode is on.
		If in copy mode and the character is NOT interpreted, push
		it back so it'll be returned on the next input call, and
		return the escape character that preceded it.
	*/

	if (copyMode)
	{
		escStripped = 1;
		if (c == escChar)	/* not constant - can't be in switch */
			return (c);
		switch (c)
		{
		case 't':
			return ('\t');
		case 'a':
			return (1);	/* ASCII SOH */
		case '.':
			return ('.');
		}
		/* it isn't interpreted */
		escStripped = 0;
		UnChIn (c);
		return (escChar);
	}

	/* not copy mode - return escape code for character unless \( or \w */

	if (c == '(')
	{
		UnChIn (c);
		if ((p = ParseNameRef ()) == (UChar *) NULL)
			return ('\0');
		if (!PlainStr (p))	/* MUST be 2 plain characters */
		{
			ETMMsg ("warning: illegal special character <\\(%s>",
							UStrToStr (p));
			return ('\0');
		}
		if ((scp = LookupSpChar (p)) == (SpChar *) NULL)
		{
			ETMMsg ("warning: special character <%s> not defined",
									p);
			NewSpChar (p, "");
			scp = LookupSpChar (p);
		}
		UFree (p);
		return (scp->spCode);
	}
	else if (c == 'w')
	{
		ParseWidth ();
		return ('\0');
	}
	return (ToEsc (c));
}



/*
	Return next character from input stream, perhaps processing
	escape sequence or pushing input first.  Returns EOF when there
	is no more input or if AExit() has been called.
*/

int ChIn ()
{
static int eofCount = 0;
int	c;

	if (!allowInput)
		return (EOF);

	escStripped = 0;
	for (;;)
	{
		if ((c = ChIn0 ()) == EOF)
			break;
		/*
			If next char is not escape, it's just a normal
			character.
		*/
		if (c != escChar || !doEscapes)
			break;
		if ((c = ChIn0 ()) == EOF)	/* malformed */
			break;
		if (c == linefeed)	/* embedded newline, discard */
			continue;
		if (c == '"')		/* comment, discard until newline */
		{
			while (!Eol (ChIn0 ()))
			{
				/* gobble rest of line */
			}
			c = linefeed;
			break;
		}
		/*
			Encode the character following the escape if
			necessary.  If it's an input source (\*, \$, \n),
			push input and continue trying to read.  If it's
			a \}, decrement ifLevel, discard and continue
			reading.
		*/
		if ((c = EncodeEscape (c)) != '\0')
		{
			if (c != ToEsc ('}'))
				break;
			--ifLevel;
		}
	}
	/*
		This bit of ugliness helps track down errors in input logic
		that results from some hunk of code running wild in a
		ChIn()/UnChIn() loop.
	*/
	if (c == EOF)
	{
		if (eofCount++ > 100)
			ETMPanic ("Program terminated, input logic error");
	}
	else
		eofCount = 0;

	if (Bug (bugChIn))
	{
		if (c == EOF)
			ETMMsg ("%d <EOF>", inputLevel);
		else if (c == linefeed)
			ETMMsg ("%d <lf>", inputLevel);
		else if (Esc (c))
			ETMMsg ("%d {%#x}", inputLevel, c);
		else
			ETMMsg ("%d <%c>", inputLevel, c);
	}
	return (c);
}


int ChPeek ()
{
int	c;

	UnChIn (c = ChIn ());
	return (c);
}


/*
	Push back a character onto the current input source.

	EOF is allowed (so callers don't have to check whether they're
	pushing a "real" character), but discarded.

	Escape character codes (high bit set) are converted back into the two
	character equivalent; both characters are pushed back.  When they are
	reread, they'll be converted back into the escape code.  This is
	NECESSARY, because an escaped character might first be seen in
	non-copy mode, pushed, and then reread in copy mode.  If the escape
	code itself were pushed, the character wouldn't be interpreted in
	copy mode properly.
*/

void UnChIn (c)
int	c;
{
int	need;
SpChar	*sp;
char	*p;

	if (c == EOF)
	{
		if (Bug (bugUnChIn))
			ETMMsg ("push %d <EOF> (discarded)", inputLevel);
		return;
	}
	if (Special (c))
	{
		sp = LookupSpCharByCode (c);
		if (sp == NULL)
			ETMPanic ("UnChIn: woof (%d)", c);
		need = strlen (sp->spName) + 2;		/* 2 for "\(" */
	}
	else
		need = 2;

	if (pushCount + need > maxPushChar)
		ETMPanic ("UnChIn: character pushback limit exceeded");
	if (Special (c))
	{
		p = sp->spName + need - 2;
		while (p > sp->spName)
			pushBuf[pushCount++] = *--p;
		pushBuf[pushCount++] = '(';
		pushBuf[pushCount++] = escChar;

	}
	else if (Esc (c))
	{
		pushBuf[pushCount++] = FromEsc (c);
		pushBuf[pushCount++] = escChar;
	}
	else
	{
		pushBuf[pushCount++] = c;
		if (escStripped)
		{
			pushBuf[pushCount++] = escChar;
			escStripped = 0;
		}
	}
	if (Bug (bugUnChIn))
	{
		if (c == linefeed)
			ETMMsg ("push %d <lf>", inputLevel);
		else if (Esc (c))
			ETMMsg ("%d {%#x}", inputLevel, c);
		else
			ETMMsg ("push %d <%c>", inputLevel, c);
	}
}


/*
	Read a line from a file, strip any cr, lf or crlf from the
	end.  Return zero for EOF, non-zero otherwise.
*/

int GetLine (buf, size, f)
char	*buf;
int	size;
FILE	*f;
{
int	len;

	if (fgets (buf, size, f) == (char *) NULL)
		return (0);
	if ((len = strlen (buf)) > 0 && buf[len-1] == '\n')
		buf[len-1] = '\0';
	if ((len = strlen (buf)) > 0 && buf[len-1] == '\r')
		buf[len-1] = '\0';
	return (1);
}
