/*
    $Header: /usr/local/src/et/work/xemp5.0/lib/parse/RCS/lex.c,v 5.1 93/03/14 16:50:39 etienne Exp Locker: etienne $
    $Date: 93/03/14 16:50:39 $
    $Author: etienne $
    $Id: lex.c,v 5.1 93/03/14 16:50:39 etienne Exp Locker: etienne $
    $Locker: etienne $
    $Log:	lex.c,v $
 * Revision 5.1  93/03/14  16:50:39  etienne
 * *** empty log message ***
 * 
 * Revision 5.0  93/02/06  09:23:02  greyhelm
 * Fixed backward compatabilty with Merc/KSU
 * Changed MOTD to show new version and authors
 * 
 * Revision 4.4  1993/02/06  04:40:53  greyhelm
 * Added RCS headers - Karl Hagen
 *

*/
/*
 * Lex.c
 */

#include <ctype.h>
#include "parser.h"
#include "symtab.h"
#include "token.h"
#include "Lpars.h"

static Strings strings;		/* Current set op input lines        */
static char *cur_pos;		/* Current position in "base_string" */
static char *start_pos;		/* Previous position in "base_string" */
 
static int line_number;
Strings parse_errors;

#define COMMENT_CHAR	';'

extern double atof();
extern char *GetNextString();

static bool ScanNumber();
static char *LexScanText();

TOKEN current_token, stacked_token;

void LexInitStrings(strs)
Strings strs;
{
	extern Strings InitStrings();

	if (parse_errors != (Strings) 0)
		FreeStrings(parse_errors);
	parse_errors = InitStrings();

	strings = strs;
	InitStringList(strings);

	cur_pos = GetNextString(strings);
	line_number = 1;
}

static void LexSkipSpaceAndComments()
{
	if (cur_pos == (char *) 0)
		return;

	while (isspace(*cur_pos))
		cur_pos++;

	if (*cur_pos == COMMENT_CHAR) {
		do
			cur_pos++;
		while (*cur_pos != '\0');
	}

	if (*cur_pos != '\0') 
		start_pos = cur_pos;
	else {
		cur_pos = GetNextString(strings);
		line_number++;
		LexSkipSpaceAndComments();
	}
}

static bool match(list, line, length)
char *list, *line;
int length;
{
	while (length--) {
		if (*list++ != *line++)
			break;
	}

	return (length >= 0 || *list != '\0') ? False : True;
}

int NextToken ()
{
	Symtab list;
	char *text;
	int length;

	if (stacked_token.value != 0) {
		current_token = stacked_token;
		stacked_token.value = 0;
		return current_token.value;
	}

	LexSkipSpaceAndComments();

	if (cur_pos == (char *) 0)
		return EOFILE;

	if (isalpha(*cur_pos) || *cur_pos == '_') {
		do {
			cur_pos++;
		} while (isalpha(*cur_pos) || *cur_pos == '_');

		length = (int) (cur_pos - start_pos);

		for (list = text_symtab; list->name != NULL; list++)
			if (match(list->name, start_pos, length) == True)
				break;
		
		if (list->name == (char *) 0) {
			ParseError("Unknown identifier \"%.*s\".", length,
				   start_pos);
			return NextToken();
		}

		current_token.value  = list->token;
		current_token.symtab = list;
	}
	else if (isdigit(*cur_pos) && ScanNumber() == True) {
		current_token.value      = CONSTANT;
		current_token.t_constant = atof(start_pos);
	}
	else if (*cur_pos == '\'') {
		cur_pos++;

		if (*(cur_pos + 1) != '\'') {
			ParseError("Error in character constant.");
			return NextToken();
		}

		current_token.value       = CHARACTER;
		current_token.t_character = *cur_pos++;

		cur_pos++;
	}
	else if (*cur_pos == '"') {
		if ((text = LexScanText()) == NULL)
			return NextToken();

		current_token.value  = STRING;
		current_token.t_text = text;
	}
	else {
		for (list = special_chars_symtab; list->name != NULL; list++)
			if (match(list->name,
				  cur_pos, strlen(list->name)) == True)
				break;
		
		if (list->name == (char *) 0) {
			ParseError("Unknown character '%c'.", *cur_pos);
			cur_pos++;

			return NextToken();
		}

		cur_pos += strlen(list->name);

		current_token.value  = list->token;
		current_token.symtab = list;
	}

	return current_token.value;
}

/*
 * Finite automaton for real or integer recognition.
 *
 *    0-9		  0-9
 *    ---		  ---
 *    \ / .	    0-9	  \ /	*
 *     0---------->1------>2-------->6 (UIT)
 *    / \	  /	  e|	     ^ real
 *   *|	 \e	 /*	   |	     |
 *    |	  \	/	   V	     |
 *    |	   ----/---------->3	     |
 *    |	      /		  /|\0-9     |
 *    V<-----/		 / | \	     |
 *    7 (UIT)	      * /  |  ----\  |
 *	integer	     /--   |	   \ |
 *		    /	   V+/-	    \|
 *	     (FOUT)8<------4-------->5
 *			 *  0-9	    / \
 *				    ---
 *				    0-9
 */
static bool ScanNumber()
{
	int state, length;
	char *ptr;
	
	ptr   = cur_pos;
	state = 0;

	while (state < 6) {
		switch (state) {
		case 0:
			if (*ptr == '.')
				state = 1;
			else if (*ptr == 'e')
				state = 3;
			else if (!isdigit(*ptr))
				state = 7;
			break;
		case 1:
			if (isdigit(*ptr))
				state = 2;
			else {
				ptr--;
				state = 7;
			}
			break;
		case 2:
			if (*ptr == 'e')
				state = 3;
			else if (!isdigit(*ptr))
				state = 6;
			break;
		case 3:
			if (isdigit(*ptr))
				state = 5;
			else if (*ptr == '+' || *ptr == '-')
				state = 4;
			else
				state = 8;
			break;
		case 4:
			state = isdigit(*ptr) ? 5 : 8;
			break;
		case 5:
			if (!isdigit(*ptr))
				state = 6;
			break;
		}
		ptr++;
	}

	ptr--;
	if (state == 6 || state == 7) {
		cur_pos = ptr;
		return True;
	}

	return False;
}

static char *LexScanText()
{
	char *text = cur_pos;

	for (++cur_pos; *cur_pos != '"'; cur_pos++) {
		if (*cur_pos == '\0') {
			ParseError("Missing '\"'.");
			return NULL;
		}
	}

	*cur_pos = '\0';
	text = Str(text + 1);
	*cur_pos++ = '"';

	return text;
}

#include <varargs.h>

/*VARARGS0*/
ParseError(va_alist)
va_dcl
{
	char msg_buf[100];
	va_list arglist;
	char *fmt;

	parse_error = True;

	sprintf (msg_buf, "Line %d: ", line_number);

	va_start(arglist);
	fmt = va_arg(arglist, char *);

	vsprintf(&msg_buf[strlen(msg_buf)], fmt, arglist);
	va_end(arglist);

	AddString(parse_errors, msg_buf);
}
