/* NS32000 Assembler
 * Expr.c
 * Evaluates expressions, prints expression token strings for debugging.
 */
#include <stdio.h>
#ifdef MSDOS
#  include "a_out.h"
#else
#  include "a.out.h"
#endif
#include "glob.h"

/* Print tokenized, RPN expressions.
 */
print_exp (expstr)
expptr expstr;
{
  expptr p;
  char *id;

  printf ("exp: ");
  for (p = expstr;;) {
    if (p - expstr > MAXEXP) {
      printf ("print_exp: expression seems too long\n");
      return;
    }
    switch (*p) {
      case '+':
      case '-':
      case '*':
      case '/':
        printf ("%c ", *p++);
        break;
      case tAND:
        printf ("AND ");
        ++p;
        break;
      case tCOM:
        printf ("COM ");
        ++p;
        break;
      case tMOD:
        printf ("MOD ");
        ++p;
        break;
      case tUMINUS:
        printf ("UMINUS ");
        ++p;
        break;
      case tNOT:
        printf ("NOT ");
        ++p;
        break;
      case tOR:
        printf ("OR ");
        ++p;
        break;
      case tSHL:
        printf ("SHL ");
        ++p;
        break;
      case tSHR:
        printf ("SHR ");
        ++p;
        break;
      case tXOR:
        printf ("XOR ");
        ++p;
        break;
      case tNUM:
        p = ALIGN (p + 1);
        printf ("NUM(0x%lx) ", *(U32 *)p);
        p += sizeof (U32);
        break;
      case tLBL:
        p = ALIGN (p + 1);
        id = (*(lblptr *)p)->id;
        printf ("LBL(%s) ", id == NULL? "*": id);
        p += sizeof (U32);
        break;
      case tEOE:
        printf ("\n");
        return;
      default:
        printf ("bad_token(0x%x) ", *p++);
    }
  }
}

/* Expression evaluation stack element.
 */
struct stkelt {
  int undf,                  /* true if references undefined symbol */
      tcnt, dcnt, bcnt;      /* counts of references to segments */
  U32 val;                   /* value */
};

/*    Evaluate tokenized, RPN expressions.  
 *
 *    "Resolved" in the assembler means we know everything we will ever
 *    know about an expression or symbol.  For example, after phase 1, if
 *    a label has not been defined, we know that it is T_UNDF and, so, it
 *    is resolved.  Text, data and bss labels do not get resolved until the
 *    end of phase 2 for the following reasons.  Text is the worst off --
 *    displacements will be minimized so the label offsets will change.
 *    Before the end of phase 2, data and bss labels are close to correct --
 *    their values are the offsets in their respective segments.  They
 *    just need to have the length of the preceding segments added to
 *    their offsets so that their values are module offsets.  This cannot
 *    happen until the end of phase 2, when the length of the text segment
 *    is determined.
 *
 *    There are three modes of expression evaluation, depending on the
 *    phase of the assembly:
 *
 * 1) When scanning input.  If we can resolve an expression to an immediate,
 *    we can store just the equation value.  This saves space -- no expression
 *    is stored -- and saves time -- no backpatching is needed.  Many
 *    expressions will be unresolved since we have not seen all the input.
 * 2) When minimizing the size of displacements in text segment.  Still cannot
 *    resolve all expressions since we are in the process of changing text
 *    symbols.  However, we can determine the type of all symbols.  Symbols
 *    of type T_TEXT, and displacements between symbols of type T_TEXT,
 *    will only have their values get smaller when they are resolved.
 *    When unresolved symbols are referenced, try to resolve them recursively
 *    before continuing evaluation of current expression.
 * 3) When emitting code.  Now it should be possible to resolve all symbols.
 *    When unresolved symbols are referenced, must resolve them recursively.
 *
 *    Expressions may be:
 *
 * 1) Unresolved.  Return with T_RESOLVED not set.
 * 2) T_UNDF.  Expression references a symbol which the source file does
 *    not define (symbol never appears to the left of a colon).  In this
 *    case the expression's value consists of a pointer to the undefined
 *    symbol (undf) and an offset (val).  Set T_RESOLVED on return.
 * 3) T_IMM.  Immmediate.  Val returns the value.  Set T_RESOLVED on return.
 * 4) T_TEXT, T_DATA, T_BSS.  Expression's value boils down to the start of
 *    some segment plus some offset.  Val returns the offset.  Determining
 *    if this is the case requires keeping count of references to the various
 *    segments, e.g. text_sym1 - text_sym2 is immediate, but text_sym1 - 
 *    text_sym2 + bss_sym is T_BSS, and text_sym1 + bss_sym is illegal.
 *    Set T_RESOLVED on return.
 *
 *    Only called with exp == NULL or with expressions which are legal (leave
 *    one item on the stack) and can be evaluated using a stack of size MAXSTK.
 *    Exp == NULL only in phase 2 or 3.  This happens if evaluating an
 *    T_UNDF or T_TEXT symbol on recursive evaluations.
 *
 *    Note that .equ symbols never do get resolved unless 1) they can be
 *    resolved during phase1 or 2) they are referenced in an expression which
 *    is evaluated and recursively resolves the .equ symbol.
 */
eval_exp (exp, typ, val, undf)
expptr exp;
U16 *typ;
lblptr *undf;
U32 *val;
{
  expptr p;
  lblptr lp;
  struct stkelt stk [MAXSTK], *sp;
  register tok;

  *undf = NULL;                        /* catches multiple undefined syms */
  if (exp == NULL) {                   /* must be phase 2 or 3, sym: or undf */
    if ((*typ & T_LBL) == T_UNDF ||    /* all but text known if phase 2 */
    phase == 3)                        /* even text known if phase 3 */
      *typ |= T_RESOLVED;
    return;
  }
  *typ |= T_RESOLVED;                  /* assume we can resolve it */
  sp = stk - 1;                        /* init stack pointer */
  for (p = exp; tEOE != (tok = *p++);) {
    if (tok == tNUM) {
      ++sp;
      sp->tcnt = sp->dcnt = sp->bcnt = sp->undf = 0;
      p = ALIGN (p);
      sp->val = *(U32 *)p;
      p += sizeof (U32);
    } else if (tok == tLBL) {
      p = ALIGN (p);
      lp = *(lblptr *)p;               /* get pointer to label */
      p += sizeof (U32);
      eval_lbl_tok (lp, ++sp, typ, undf);
    } else if (tok == '+' || tok == '-') {
      --sp;
      eval_plus_minus (sp, tok, typ);
    } else if (tok == tCOM || tok == tUMINUS || tok == tNOT) {
      eval_unary_op (sp, tok, typ);
    } else {
      --sp;
      eval_binary_op (sp, tok, typ);
    }
  }
  *val = sp->val;
  eval_typ (sp, typ);
}

/* Applies + and - to the top two elements on the stack.  Puts the result
 * and its type info in *sp.  + and - have more subtle effect on type
 * than operators handled by eval_unary_op.
 */
eval_plus_minus (sp, tok, typ)
int tok;
U16 *typ;
register struct stkelt *sp;
{
  register struct stkelt *rhs;

  rhs = sp + 1;
  if (sp->undf && rhs->undf)
    if (phase == 1) *typ &= ~T_RESOLVED;
    else error ("#binary operator not compatible with operand types");
  if (tok == '+') {
    sp->undf += rhs->undf;
    sp->tcnt += rhs->tcnt;
    sp->dcnt += rhs->dcnt;
    sp->bcnt += rhs->bcnt;
    sp->val += rhs->val;
  } else {                             /* tok == '-' */
    sp->undf += rhs->undf;
    sp->tcnt -= rhs->tcnt;
    sp->dcnt -= rhs->dcnt;
    sp->bcnt -= rhs->bcnt;
    sp->val += ~rhs->val + 1;
  }
}

/* Applies a binary operator to the top two elements on the stack, writing
 * the result to *sp.
 */
eval_binary_op (sp, tok, typ)
int tok;
U16 *typ;
register struct stkelt *sp;
{
  register struct stkelt *rhs;

  rhs = sp + 1;
  if (sp->undf || sp->tcnt ||          /* check types -- must be immediate */
  sp->dcnt || sp->bcnt || rhs->undf || rhs->tcnt || rhs->dcnt || rhs->bcnt)
    if (phase == 1) *typ &= ~T_RESOLVED;
    else error ("#binary operator not compatible with operand types");
  switch (tok) {
    case '*':
      sp->val *= (sp+1)->val;
      break;
    case '/':
      sp->val /= (sp+1)->val;
      break;
    case tAND:
      sp->val &= (sp+1)->val;
      break;
    case tOR:
      sp->val |= (sp+1)->val;
      break;
    case tXOR:
      sp->val ^= (sp+1)->val;
      break;
    case tSHL:
      sp->val <<= (sp+1)->val;
      break;
    case tSHR:
      sp->val >>= (sp+1)->val;
      break;
    case tMOD:
      sp->val %= (sp+1)->val;
      break;
  }
}

/* Applies a unary operator to an element on the stack.
 */
eval_unary_op (sp, tok, typ)
int tok;
U16 *typ;
struct stkelt *sp;
{
  if (sp->undf || tok != tUMINUS && (sp->tcnt || sp->dcnt || sp->bcnt))
    if (phase == 1) *typ &= ~T_RESOLVED;
    else error ("#unary operator not compatible with operand type");
  if (tok == tCOM) sp->val = ~sp->val;
  else if (tok == tNOT) sp->val ^= 1;
  else {                                /* tUMINUS */
    sp->val = ~sp->val + 1;
    sp->tcnt = -sp->tcnt;
    sp->dcnt = -sp->dcnt;
    sp->bcnt = -sp->bcnt;
  }
}

/* Called when a label token is found while evaluating a tokenized expression.
 * If the label is unresolved, tries to resolve it if phase > 1.  Places
 * value and type info on evaluation stack.
 */
eval_lbl_tok (lp, sp, typ, undf)
struct stkelt *sp;
lblptr lp, *undf;
U16 *typ;
{
  if (!(lp->typ & T_RESOLVED)) {       /* check if symbol resolved */
    if (phase > 1) {
      if (lp->typ & T_CYCLE)           /* check for cycles */
        error ("#expression has cyclic reference");
      else eval_lbl (lp);              /* recursively evaluate symbol */
    }
    if (!(lp->typ & T_RESOLVED)) *typ &= ~T_RESOLVED;
  }
  sp->val = lp->val;
  sp->undf = sp->tcnt = sp->dcnt = sp->bcnt = 0;
  switch (lp->typ & T_LBL) {
    case T_UNDF:
      if (*undf != NULL)
        if (phase > 1)
          error ("#multiple undefined symbols in expression");
        else *typ &= ~T_RESOLVED;
      sp->val = 0;
      *undf = lp;
      ++sp->undf;
      break;
    case T_TEXT:
      ++sp->tcnt;
      break;
    case T_DATA:
      ++sp->dcnt;
      break;
    case T_BSS:          
      ++sp->bcnt;
      break;
    default:
      break;
  }
}

/* Passed type information collected while evaluating an expression, boils
 * it down to a specific type or reports error.
 */
#define ABS(x) ((x) < 0? -(x): (x))
eval_typ (sp, typ)
register U16 *typ;
register struct stkelt *sp;
{
  *typ &= ~T_LBL;                      /* clear type but save flags */
  if (1 < ABS (sp->undf) +             /* type legal? */
  ABS (sp->tcnt) + ABS (sp->dcnt) + ABS (sp->bcnt) && phase > 1)
    error ("#expression has bad type");
  if (sp->undf) *typ |= T_UNDF;        /* set new type */
  else if (sp->tcnt) *typ |= T_TEXT;
  else if (sp->dcnt) *typ |= T_DATA;
  else if (sp->bcnt) *typ |= T_BSS;
  else *typ |= T_IMM;
  if (debug == 5)
    printf ("eval_typ: undf=%d tcnt=%d dcnt=%d bcnt=%d ret=0x%x\n",
      sp->undf, sp->tcnt, sp->dcnt, sp->bcnt, *typ);
}

/* Passed a lblptr, tries to resolve its type and value.
 */
eval_lbl (lp)
lblptr lp;
{
  int lnnum_save;
  lblptr undf;

  lp->typ |= T_CYCLE;                  /* catches cycles */
  lnnum_save = lnnum;                  /* stack current line number */
  lnnum = lp->lnnum;                   /* for error reporting */
  eval_exp (lp->exp, &lp->typ, &lp->val, &undf);
  if ((lp->typ & (T_LBL | T_RESOLVED)) == (T_UNDF | T_RESOLVED) &&
  undf != NULL)
    error ("#label equated to external symbol");
  lnnum = lnnum_save;                  /* restore line number */
  lp->typ &= ~T_CYCLE;                 /* reset cycle catcher */
}

/* Expressions are arrays of something.  Need to be able to align pointers
 * and longs in these arrays while, at the same time, not using more space
 * than necessary.  Also must be portable.  This routine determines
 * constants exp_align and exp_align_mask such that pointers and longs may
 * be placed at (char *)exp+exp_align&exp_mask.  Assumes exp_align will be
 * a power of two.
 */
init_exp()
{
  struct {
    U8 tok;
    expptr ptr;
  } align_struct;

  exp_align = (char *)(&align_struct.ptr) - (char *)(&align_struct.tok);
  exp_align_mask = ~((exp_align--) - 1);
}
