/* NS32000 Assembler
 * Phase1.c
 * Read the input file.  Handles all parsing: instructions, operands,
 * labels, comments, pseudo-ops.
 */
#include <ctype.h>
#include <stdio.h>
#ifdef MSDOS
#  include "a_out.h"
#else
#  include "a.out.h"
#endif
#include "glob.h"

/* Macros for parsing */
#define SCANC (curc = getc (infp))     /* scan a character into curc */
#define SCANNS {while (curc=='\t'||curc==' ') SCANC;} /* scan for nonspace */
#define SCANNL {while (curc!='\n'&&curc!=EOF) SCANC;} /* scan newline/EOF */
#define ISID1 (curc=='_'||curc=='.'||(isalpha(curc))) /* is id start char */
#define ISID (curc=='_'||curc=='.'||(isalnum(curc)))  /* is other id char */
#define SHIFTUP(x) ((x)&~('a'-'A'))
#define TOUPPER(x) (isalpha(x)?SHIFTUP(x):(x))
#define TONUM(x) (isalpha(x)?SHIFTUP(x)+10-'A':(isdigit(x)?(x)-'0':16))

/* Drives parsing of input file.  Parses optional op and label.  Expects
 * op handler to parse operands, if any.
 */
phase1()
{
  opptr curop;
  hashptr equhash, asciihash;

  phase = 1;
  init_exp();
  SCANC;
  if (debug == 4) print_toks();

  equhash = find (".EQU", T_OP);       /* for .equ special case */
  asciihash = find (".ASCII", T_OP);   /* for .ascii special case */
  for (;;) {
    scan();
    ++lnnum;
    curlbl = NULL;                     /* catches .equ without label */
    if (curtok == tLBL) {              /* get label */
      if (curhash == NULL) {           /* insert in hash table */
        curhash = (hashptr) insert_lbl (curstr, T_UNDF);
        ((lblptr)curhash)->exp = NULL;
      }
      curlbl = (lblptr)curhash;        /* save ptr since scan clobbers it */
      scan();
      if (curtok == ':') {
        scan();                        /* get first colon */
        if (curtok == ':') {
          scan();                      /* get second colon */
          curlbl->typ &= ~T_STATIC;
        } else if (!(curlbl->typ & T_EXPORTDIR))
          curlbl->typ |= T_STATIC;     /* single :, static unless .EXPORT */
      } else {
        error ("#bad mneumonic or label without colon: %s", curlbl->id);
        curlbl = NULL;
        scannl();
      }
    }
    if (curtok == tOP) {               /* process inst or directive */
      if (curhash != equhash &&        /* special case for .EQU  -- type */
      curlbl != NULL)                  /*   might not be current segment */  
        proc_lbl(curlbl);
      curop = (opptr)curhash;          /* save ptr since scan clobbers it */
      if (curhash == asciihash)        /* must precede next scan! (ugly) */
        gcc = TRUE;
      scan();
      (*curop->f)(curop);              /* handler parses rest */
    } else if (curlbl != NULL) proc_lbl (curlbl);
    if (curtok == tEOF) break;
    if (curtok != '\n') {
      error ("#unexpected characters before end of line");
      scannl();
    }
  }
}

static int in_string;			/* for scanning long strings

/* Lexical scanner.  Uses global variables.  Curc is a look ahead character.
 * Returns current token type in curtok and current token value (if any) in
 * curval (numbers), curstr (labels), or curlbl (hashed instructions or
 * labels).
 */
scan()
{
  char upstr [LNLEN], *p, *q;

  if (in_string) {			/* kludge for scanning long strings */
    scanstr();
    return;
  }
  SCANNS;                              /* scan non space */
  if (ISID1) {                         /* could be beginning of label, op */
    p = curstr;
    q = upstr;
    *p++ = curc;
    *q++ = TOUPPER (curc);
    SCANC;
    if (curc == '\'') {                /* check num with radix, e.g. H'123 */
      switch (*(upstr)) {
        case 'B':                      /* binary num: B'1101 */
          SCANC;
          scannum (2);
          return;
        case 'O':                      /* octal num: Q'7712 */
        case 'Q':
          SCANC;
          scannum (8);
          return;
        case 'D':                      /* decimal num: D'99123 */
          SCANC;
          scannum (10);
          return;
        case 'X':                      /* hex num: H'ffec */
        case 'H':
          SCANC;
          scannum (16);
          return;
	case 'F':			/* float: f'-123.456e+7 */
	  SCANC;
	  scanfloat();
	  return;
      }
    } 
    while (ISID &&                     /* not num, get rest of id */
    p - curstr < LNLEN-1) {  
      *p++ = curc;
      *q++ = TOUPPER (curc);
      SCANC;
    }
    *p = '\0';
    *q = '\0';
    if (*curstr != '_' &&              /* optimization - skip C labels */
    NULL != (curhash = find (upstr,    /* check for mneumonic, asm directive, */
    T_OP | T_AOP | T_REG))) {          /* register or arithmetic operator     */
      if (curhash->typ == T_AOP)
        curtok = ((opptr)curhash)->inst;
      else if (curhash->typ == T_REG) {
        curtok = tREG;
        curval = ((opptr)curhash)->inst;
      } else curtok = tOP;
      return;
    }
    curtok = tLBL;                     /* must be label */
    curhash = find (curstr, T_LBL);
    return;
  }                                    /* end of labels, inst, directives */
  if (isdigit (curc)) {                /* decimal numbers */
    scannum (10);
    return;
  }
  if (curc == ';' || curc == '\n') {   /* get comment, newline */
    curtok = '\n';
    SCANNL;
    SCANC;
    return;
  } 
  for (p = ":@,+-*/()[]"; *p; ++p)     /* literal characters */
    if (curc == *p) {
      curtok = curc;
      SCANC;
      return;
    }
  if (curc == '\'' || curc == '"') {   /* string */
    scanstr();
    return;
  }
  if (curc == EOF) {                   /* end of file */
    curtok = tEOF;
    return;
  }
  curtok = tBAD;                       /* call error elsewhere */
  SCANC;
}

/* Parse a number from the input stream.
 */
scannum(base)
int base;
{
  register int digit;

  curtok = tNUM;
  if ((digit = TONUM(curc)) >= base) {
    error ("#bad digit in number: %c", curc);
    curval = 0;
    return;
  }
  curval = digit;
  for (;;) {
    SCANC;
    if ((digit = TONUM(curc)) >= base) break;
    curval = base*curval + digit;
  }
}

/* Parse a floating point number, put value in curfloat.
 */
scanfloat()
{
  char *p;
  static char float_chars[] = "+-.eE0123456789";
  double atof();

  curtok = tFLOAT;
  for (p = curstr; index (float_chars, curc);) {
    if (p - curstr >= LNLEN) break;
    *p++ = curc;
    SCANC;
  }
  if (p == curstr || p - curstr >= LNLEN) {
    error ("#floating point number syntax");
    curfloat = 0.0;
    return;
  }
  *p = '\0';
  curfloat = atof (curstr);
}
  
/* Scan a string -- current character is ' or ".  Put string in curstr.
 * Also treat string as a number -- 'a' = 0x61, '3a' = 0x316a -- and
 * place value in curval.
 *
 * Fixes for Gcc:
 *   Long strings (> LNLEN) result in tSTR being returned multiple times.
 */
scanstr()
{
  static int startc;			/* static for long string */
  int val;
  char *p;

  if (in_string)			/* long string kludge */
    in_string = 0;
  else {
    startc = curc;
    SCANC;
  }
  p = curstr;
  curval = 0;
  curtok = tSTR;
  for (;;) {
    if (curc == startc) {
      SCANC;
      if (gcc || curc != startc) break;
      val = curc;
      SCANC;
    } else if (gcc && curc == '\\') {
      SCANC;
      if (isdigit (curc)) {
	val = 0;
	do {
	  val *= 8;
	  val += curc - '0';
	  SCANC;
	} while (isdigit (curc));
      } else {
	val = curc;
	SCANC;
      }
    } else if (curc == EOF || curc == '\n') {
      error ("#string does not end with a quote");
      break;
    } else {
      val = curc;
      SCANC;
    }
    curval |= val << (p - curstr) * 8;
    *p++ = val;
    if (p - curstr > LNLEN - 3) {	/* long string kludge */
      in_string = 1;
      break;
    }
  }
  *p = '\0';
  toklen = p - curstr;
}

/* Does for curtok what SCANNL does for curc: scans until
 * newline token.
 */
scannl()
{
  if (curtok == '\n') return;
  SCANNL;
  scan();
}

/* Scan an expression from the input and compile it into tokenized form.
 * Recursize descent.  Keeps track of size of stack needed to evaluate
 * expression with sp and reports error if sp gets too big.  Error strategy:
 * report and, if necessary, return phoney but legal token string.
 * Length is in char's.
 */
compile_exp (expstr, len)
expptr expstr;
int *len;
{
  int sp;
  expptr curpos;

  sp = 0;
  curpos = expstr;
  exp0 (expstr, &curpos, &sp);
  if (curtok == ':') {                 /* allow optional size attribute */
    scan();                            /* treat as a no op */
    if (curtok != tLBL)
      error ("#expected size attribute");
    else {
      if (1 != strlen (curstr) || -1 == strindex ("BWDbwd", *curstr))
        error ("#bad size attribute in expression");
      scan();
    }
  } 
  if (curpos == NULL)                  /* ran out of room -- make phoney exp */
    build_zero_exp (expstr, len);
  else {
    *curpos++ = tEOE;                  /* terminate token string */
    *len = curpos - expstr;
  }
  if (debug == 3) print_exp (expstr);
}

/* Build an expression with value zero.  Used for error handling and
 * defaults.
 */
build_zero_exp (expstr, len)
expptr expstr;
int *len;
{
  register expptr curpos;

  curpos = expstr;
  *curpos++ = tNUM;
  curpos = ALIGN (curpos);
  *((U32 *)curpos) = 0;
  curpos += sizeof (U32);
  *curpos++ = tEOE;
  *len = curpos - expstr;
}

/* Compiles lowest precedence level of expressions.  Sets curpos to NULL if
 * out of room in expstr.
 * BNF: exp1 [{+|-|or|xor} exp1]*
 */
exp0 (expstr, curpos, sp)
expptr expstr, *curpos;
int *sp;
{
  int curop;

  exp1 (expstr, curpos, sp);
  while (curtok == '+' || curtok == '-' ||
  curtok == tOR || curtok == tXOR) {
    curop = curtok;
    scan();
    exp1 (expstr, curpos, sp);
    if (*curpos == NULL) continue;
    if (*curpos - expstr >=            /* check expression too long */
    MAXEXP) { 
      error ("#expression too long");
      *curpos = NULL;
    } else {
      *(*curpos)++ = curop;            /* insert operator token */
      --*sp;                           /* binary op removes num from stack */
    }
  }
}

/* Compiles middle precedence level of expressions.
 * BNF: exp2 [{*|/|and|mod|shl|shr} exp2]*
 */
exp1 (expstr, curpos, sp)
expptr expstr, *curpos;
int *sp;
{
  int curop;

  exp2 (expstr, curpos, sp);
  while (curtok == '*' || curtok == '/' || curtok == tAND ||
  curtok == tMOD || curtok == tSHL || curtok == tSHR) {
    curop = curtok;
    scan();
    exp2 (expstr, curpos, sp);
    if (*curpos == NULL) continue;
    if (*curpos - expstr >=            /* check expression too long */
    MAXEXP) { 
      error ("#expression too long");
      *curpos = NULL;
    } else {
      *(*curpos)++ = curop;            /* insert operator token */
      --*sp;                           /* binary op removes num from stack */
    }
  }
}

/* Compiles highest precedence level of expressions.
 * BNF: {+|-|not|com}* {tNUM | tSTR | tLBL | ( exp0 ) | * | reglist |
 *      config}
 */
exp2 (expstr, curpos, sp)
expptr expstr, *curpos;
int *sp;
{
  int tok;
  U32 val;

  if (curtok == '(') {                 /* ( <expression> ) */
    scan();
    exp0(expstr, curpos, sp);
    if (curtok != ')') error ("#expected )");
    else scan();
    return;
  }
  if (curtok == '-' ||                 /* unary ops */
  curtok == tNOT || curtok == tCOM ||
  curtok == '+') {
    if (curtok == '-') tok = tUMINUS;
    else tok = curtok;
    scan();
    exp2 (expstr, curpos, sp);
    if (tok == '+' || *curpos == NULL) return;
    if (*curpos - expstr >=            /* check expression too long */
    MAXEXP) { 
      error ("#expression too long");
      *curpos = NULL;
    } else *(*curpos)++ = tok;         /* insert operator token */
    return;
  }
  switch (curtok) {
    case tNUM:                         /* constants */
    case tSTR:
      val = curval;
      tok = tNUM;
      scan();
      break;
    case tLBL:
      if (curhash == NULL) {           /* insert in hash table */
        curhash = (hashptr)insert_lbl (curstr, T_UNDF);
        ((lblptr)curhash)->exp = NULL;
      }
      if ((curhash->typ & T_LBL) == T_IMM) {
        val = ((lblptr)curhash)->val;  /* immediate label */
        tok = tNUM;
      } else {
        val = (U32)curhash;            /* other label */
        tok = tLBL;
      }
      scan();
      break;
    case '[':
      scan();
      tok = tNUM;
      val = get_bracket();
      break;
    case '*':
      scan();
      val = (U32) myalloc (sizeof (struct lblnode));
      ((lblptr)val)->typ = T_UNDF | T_STATIC;
      ((lblptr)val)->id = NULL;
      ((lblptr)val)->exp = NULL;
      proc_lbl ((lblptr)val);          /* create dummy label */
      tok = tLBL;
      break;
    default:
      error ("#expected expression factor");
      *curpos = NULL;
      break;
  }
  if (*curpos == NULL) return;
  if (ALIGN (*curpos+1) +              /* check expression too long */
  sizeof (U32) - expstr > MAXEXP) {
    error ("#expression too long");
    *curpos = NULL;
  } else if (++*sp > MAXSTK) {         /* requires too big a stack? */
    error ("#expression too complex");
    *curpos = NULL;
  } else {                             /* insert token */
    *(*curpos)++ = tok;                /* token type */
    *curpos = ALIGN (*curpos);
    *((U32 *)(*curpos)) = val;         /* value or pointer */
    *curpos += sizeof (U32);
  }
}

/* Called when already found [.  Get config list or reg list and return
 * value.  This is a departure from legal NS32000 assembler but makes
 * the compiler much more efficient (at least for reg list).  The compiler
 * can generate "SAVE REG_LIST" in a function prologue and generate
 * "REG_LIST: .EQU [R0, R1, R7]", for example, in the function epilogue.
 * National's Genix compiler generates horrible code instead.
 */
U32
get_bracket()
{
  U32 val;
  int i;

  val = 0;
  if (curtok == tREG) {                /* reg list */
    for (;;) {
      if (curval < rR0 || curval > rR0 + 7)
        error ("#register in register list not R0-R7");
      val |= 1 << curval - rR0;
      scan();
      if (curtok == ',') {
        scan();
        if (curtok != tREG) {
          error ("#bad register list syntax");
          break;
        }
      } else break;
    }
  } else if (curtok == tLBL) {         /* config list */
    for (;;) {
      if (curtok != tLBL || 1 != strlen (curstr) ||
      -1 == (i = strindex ("IFMC", TOUPPER (*curstr)))) {
        error ("#bad configuration list syntax");
        break;
      }
      val |= 1 << i;
      scan();
      if (curtok != ',') break;
      else scan();
    }
  }
  if (curtok == ']') scan();
  else error ("#reg list or configuration list syntax -- missing ]");
  return val;
}
  
/* For debugging scanner.  Prints tokens.
 */
print_toks()
{
  char *p;

  lnnum = 1;
  for (;;) {
    scan();
    switch (curtok) {
      case tFLOAT:
	printf ("Float constant: %0.15g\n", curfloat);
	break;
      case tNUM:
        printf ("Numeric constant: 0x%lx\n", curval);
        break;
      case tSTR:
        printf ("String: ");
	for (p = curstr; p < curstr + toklen; ++p)
	  if (*p >= 0x20 && *p <= 0x7e) putchar (*p);
	  else if (*p == 0) printf ("\\0");
	  else printf ("\\0%o", *p);
	printf (" 0x%lx\n", curval);
        break;
      case tLBL:
        printf ("Label: %s\n", curstr);
        break;
      case tOP:
        printf ("Op: %s\n", curhash->id);
        break;
      case tREG:
        printf ("Register: %s 0x%lx\n", curhash->id, curval);
        break;
      case tEOF:
        puts ("End of file");
        return;
      case tBAD:
        puts ("Bad token");
        break;
      case '\n':
        puts ("End of line");
        ++lnnum;
        break;
      default:
        if (curtok > 0x20)
          printf ("Literal: %c\n", curtok);
        else printf ("Arithmetic op: %s %d\n", curstr, curtok);
        break;
    }
  }
}

/* Parse two general operands, return relevent info.
 */
parse_gengen (p1, p2)
genptr p1, p2;
{
  parse_gen (p1);
  if (curtok != ',') {
    error ("#expected second general operand");
    p2->type1 = p2->type2 = 0;         /* return R0 mode */
  } else {
    scan();
    parse_gen (p2);
  }
}

/* Parse general operand, return expression(s) and addressing mode(s).
 * Require Fx if flt is TRUE, Rx if flt is false.  All other fields of *p
 * are for output only.
 */
parse_gen (p)
register genptr p;
{
  p->type1 = p->type2 = 0;             /* init these in case of error */
  if (curtok == tREG) {
    if (p->flt && curval >= rF0 &&     /* Fx */
    curval <= rF0 + 7)
      p->type1 = curval - rF0;
    else if (!p->flt &&                /* Rx */
    curval >= rR0 && curval <= rR0 + 7)
      p->type1 = curval - rR0;
    else if (curval == rTOS)           /* TOS */
      p->type1 = mTOS;
    else error ("#general operand syntax -- bad register");
    scan();
  } else if (curtok == tEXT)           /* EXT(disp)+disp */
    getext (p);
  else if (curtok == '@') {            /* @disp (absolute) */
    scan();
    compile_exp (p->exp1, &p->len1);
    p->type1 = mABS;    
  } else if (curtok == tFLOAT && p->flt) {
    p->fltimm = curfloat;
    p->type1 = mIMM;
    scan();
  } else {                             /* expression... */
    compile_exp (p->exp1, &p->len1);
    if (curtok != '(') {               /* immediate */
      p->type1 = mIMM;
      if (p->flt) error ("#expected floating point constant");
    } else {                           /* disp(... */
      scan();
      if (curtok == tREG)              /* register indirect */
        single_ind (p);
      else                             /* double register indirect */
        double_ind (p);
      scan();
      if (curtok == ')') scan();
      else error ("#general operand syntax -- missing )");
    }                                  /* end of disp(... */
  }                                    /* end of disp... */
  if (curtok == '[')                   /* get [Rx:b|w|d|q] */
    getinx (p);
  if (debug == 6) printf ("gen op: line=%d type1=0x%x type2=0x%x\n",
    lnnum, p->type1, p->type2);
}

/* Get external addressing mode: EXT(disp1)[+disp2]
 */
getext (p)
register genptr p;
{
  char *err_msg;

  err_msg = "#bad external addressing mode syntax";
  scan();
  if (curtok != '(') {                 /* get ( */
    error (err_msg);
    return;
  }
  scan();
  compile_exp (p->exp1, &p->len1);     /* get disp1 */
  if (curtok != ')') {                 /* get ) */
    error (err_msg);
    return;
  }
  scan();
  if (curtok == '+' || curtok == '-')  /* check for disp2 */
    compile_exp (p->exp2, &p->len2);   /* get disp2 */
  else                                 /* get default zero expression */
    build_zero_exp (p->exp2, &p->len2);
  p->type1 = mEXT;
}

/* Get single indirect addressing mode.  Called when "disp (" has been
 * seen and the current token is tREG.  Do not get last ).
 */
single_ind (p)
register genptr p;
{
  if (curval >= rR0 &&                 /* disp(Rx) */
  curval <= rR0 + 7)
    p->type1 = mRXIND + curval - rR0;
  else if (curval >= rFP && curval <= rPC)
    p->type1 = mFPIND + (0x3 & curval);  /* disp(fp|sp|sb|pc) */
  else
    error ("#general operand syntax -- bad register");
}

/* Get double indirect addressing mode.  Called when "disp (" has been seen
 * and the current token is not a register.  Parse disp(fp|sp|sb).
 */
double_ind (p)
register genptr p;
{
  register expptr dst, src;

  dst = p->exp2;                       /* note: disp2 comes first! */
  src = p->exp1;                       /*   so move disp1 to disp2 */
  for (; src < p->exp1 + p->len1;)
    *dst++ = *src++;
  p->len2 = p->len1;
  compile_exp (p->exp1, &p->len1);     /* now get disp1 */
  if (curtok != '(') {
    error ("#general operand syntax -- expected (");
    return;
  }
  scan();
  if (curtok != tREG || curval < rFP || curval > rSB) {
    error ("#general operand syntax -- bad register");
    return;
  }
  p->type1 = mFPDBLIND + (0x3 & curval);
  scan();
  if (curtok != ')')
    error ("#general operand syntax -- missing )");
}

/* Parse index syntax: [Rx:b|w|d|q].  Set p->inxreg to the index register and
 * set p->type2 to scaled index addressing mode.
 */
getinx (p)
register genptr p;
{
  int i;

  if (p->type1 == mIMM)
    error ("#scaled index not allowed with immediate mode");
  scan();
  if (curtok != tREG || curval < rR0 || curval > rR0 + 7) {
    error ("#scaled index syntax -- expected general register");
    return;
  }
  p->inxreg = (curval - rR0) & 0x7; /* create index byte */
  scan();
  if (curtok != ':') {
    error ("#scaled index syntax -- missing :");
    return;
  }
  scan();
  if (curtok != tLBL || strlen (curstr) != 1 ||
  -1 == (i = strindex ("BWDQ", SHIFTUP (*curstr)))) {
    error ("#scaled index syntax -- expected B, W, D, or Q");
    return;
  }
  p->type2 = i + mBYTINX;
  scan();
  if (curtok != ']')
    error ("#scaled index syntax -- missing ]");
  else scan();
}

/* Return index of c in string str.  Return -1 if not found.
 */
int
strindex (str, c)
char *str, c;
{
  register char *p;

  for (p = str; *p; ++p)
    if (c == *p) return p - str;
  return -1;
}
