/*
 *  Hand-built, lexical analyzer for decoding formatted UNITS specifications.
 *
 *  Has macro-substitution capability.
 */

/*LINTLIBRARY*/

#include "udposix.h"
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include "uderrmsg.h"
#include "udunits.h"
#include "parser.h"	/* #include "units.h" before this one */
#include "scanner.h"

#undef	GETC
#define GETC(buf, ptr)	 (*(ptr) > (buf) && (*(ptr))[-1] == 0 \
			    ? 0 \
			    : *((*(ptr))++))

#ifndef lint
    static char rcsid[]	= "$Id: scanner.c,v 1.3 1991/04/02 17:09:45 steve Exp $";
    static char afsid[]	= "$__Header$";
#endif

static int	myungetc();		    /* defined as function to eliminate
					     * the lint(1) of a #define'd "do {}
					     * while (0)" statement */


/*
 *  Return the next character in a name or 0 if no name-type characters left.
 */
    static int
NextCharOfName(buf, ptr)
    const char	*buf;
    const char	**ptr;
{
    int		chr	= 0;
    int		c	= GETC(buf, ptr);

    if (isalpha(c) || c == '_') {
	chr	= c;
    } else if (isdigit(c)) {
	if (((*ptr)[0] != 0 && 
		(((*ptr)[0] == 'C' || (*ptr)[0] == 'F') ||
		 ((*ptr)[1] == 'C' || (*ptr)[1] == 'F'))) ||
	    (strncmp(*ptr-2, "C12", 3) == 0 || 
		(*ptr >= buf+3 && strncmp(*ptr-3, "C12", 3) == 0))) {
	    
	    /*
	     * Temperature specification (e.g. "inch_Hg_32F") or
	     * Carbon-12 specification (e.g. "C12_faraday").
	     */
	    chr = c;
	} else if (c == '2' && ((*ptr)[-2] == 'H' || (*ptr)[-2] == 'h')
			    && ((*ptr)[ 0] == 'O' || (*ptr)[ 0] == 'o')) {
	    /*
	     * "Water" specification (e.g. "foot_H2O").
	     */
	    chr = c;
	}
    }

    return chr;
}


/*
 * Decode a numeric value and indicate its type.
 *
 * This function returns:
 *	0	not a numeric token (buffer state is unchanged);
 *	1	numeric token decoded (buffer state is advanced).
 */
    static int
ScanValue(ptr, TokenIDp)
    const char	**ptr;
    int		*TokenIDp;
{
    int		status;
    char	*NextTokenp;		/* start of next token */
    double	rval	= strtod(*ptr, &NextTokenp);

    if (isdigit(**ptr) || **ptr == '.' && NextTokenp > *ptr+1) {
	/*
	 * Token is truely a numeric value.  Indicate whether it's integral
	 * or not and advance the buffer.
	 */
	char	*cp = strpbrk(*ptr, ".DdEe");

	if (cp != NULL && cp < NextTokenp) {
	    *TokenIDp	= REAL;
	    yylval.rval	= rval;
	} else {
	    *TokenIDp	= INT;
	    yylval.ival = rval + .5;
	}
	*ptr	= NextTokenp;
	status	= 1;
    } else {
	/*
	 * Token is not a numeric value.
	 */
	status	= 0;
    }

    return status;
}


/*
 *  Lexical analyzer for the "units" grammar:
 */
    int
utLex(buf, ptr)
    const char	*buf;
    const char	**ptr;
{
    int		c;			/* current character */
    int		TokenID;		/* returned-token ID */
    static char me[]	= "utLex";	/* this function's name */

    /*
    udadvise("%s: Input = \"%s\"", me, *ptr);
    */

    if (isspace(c = GETC(buf, ptr))) {
	while (isspace(c = GETC(buf, ptr)))
	    continue;

	if (c == 0) {
	    TokenID	= END;
	} else if (c == '@') {
	    while (isspace(GETC(buf, ptr)))
		continue;
	    (void)myungetc(buf, ptr);
	    TokenID	= SHIFT;
	} else if (strncmp((*ptr)-1, "per", 3) == 0 && 
		   ((*ptr)[2] == 0 || isspace((*ptr)[2]))) {
	    TokenID	= DIVIDE;
	    (*ptr)	+=2;
	    while (isspace(GETC(buf, ptr)))
		continue;
	    (void)myungetc(buf, ptr);
	} else {
	    (void)myungetc(buf, ptr);
	    TokenID	= MULTIPLY;
	}

    } else if (c == 0) {
	TokenID = END;

    } else if (c == '@') {
	while (isspace(GETC(buf, ptr)))
	    continue;
	(void)myungetc(buf, ptr);
	TokenID	= SHIFT;

    } else if (isdigit(c)) {
	(void)myungetc(buf, ptr);
	(void)ScanValue(ptr, &TokenID);

    } else if (c == '.') {
	(void)myungetc(buf, ptr);
	if (!ScanValue(ptr, &TokenID)) {
	    GETC(buf, ptr);
	    TokenID	= MULTIPLY;
	}

    } else if (isalpha(c)) {
	char		*cp = yylval.name;

	do {
	    if (cp >= yylval.name + UT_NAMELEN - 1) {
		*cp = '\0';
		udadvise("%s: Name \"%s\" is too long.", me, yylval.name);
		TokenID = ERR;
		break;
	    }
	    *cp++   = c;
	} while ((c = NextCharOfName(buf, ptr)) != 0);
	(void)myungetc(buf, ptr);
	*cp = '\0';

	TokenID	= strcmp(yylval.name, "per") == 0
		    ? DIVIDE
		    : NAME;

    } else if (c == '*') {
	if (GETC(buf, ptr) == '*') {
	    TokenID	= EXPONENT;
	} else {
	    (void)myungetc(buf, ptr);
	    TokenID	= MULTIPLY;
	}

    } else if (c == '^') {
	TokenID = EXPONENT;

    } else if (c == '-' && !isdigit(**ptr)) {
	TokenID	= MULTIPLY;

    } else if (c == '/') {
	TokenID	= DIVIDE;

    } else {
	/*
	 * Individual character seen.
	 */
	TokenID = c;
    }

    return TokenID;
}


/*
 *  Unget the current character
 */
    static int
myungetc(buf, ptr)
    const char	      *buf;
    const char	      **ptr;
{
    if (*ptr > buf)
	--*ptr;

    return **ptr;
}
