static char RCSid[] = "$Id: sym.c,v 1.12 1992/08/17 13:36:56 waite Exp $";
/* Copyright, 1989, The Regents of the University of Colorado */

/* 								June 29, 1986
 *     Removed the symbol table parameter from all sym module routines.
 * All sym routines now use the table pointed at by the cur_symtbl internal
 * variable instead.
 *
 * NOTE: This module modified from code by
 *	 W.M. Waite
 *	 Department of Electrical and Computer Engineering
 *	 University of Colorado, Boulder
 *
 * Routines:
 *	1) prtsym - Print a symbol table entry.
 *	3) dmpsym - Dump the symbol table.
 *	4) getsymb - Obtain the symbol corresponding to an identifier
 *		     or literal.
 *	5) idnt_exists - Check whether a given identifier has been previously
 *			 entered into a symbol table.
 *	6) getid - Obtain the identifier corresponding to a symbol.
 *	7) num_assoc_sym - Return count of associated symbol groups in
 *		current symbol table.
 *	7) init_sym - Get a new symbol table.
 *      8) set_cur_symtbl - Sets the symbol table that all other routines
 *		in the sym module use.
 *	9) get_symbol_queue - Places selected classes of SYMBOLS into
 *		a queue.
 *	10) delete_symtbl - Free the storage used by the current symbol table.
 *	11) rename_identifier - Change all the symbol pointer nodes
 *			       pointing at an identifier to point at
 *			       a different identifier.
 *	12) first_symbol_ptr - Return a pointer to a symbol_ptr_node
 *			      pointing at a supplied SYMBOL.
*/



#include <stdio.h>
#include <stdlib.h>
#include <curses.h>
#include "cagt_config.h"
#include "support.h"
#include "queue.h"


/*    cagt might require that more than a single symbol table be availible
 * simultaneously. This means that the symbol table cannot be directly
 * declared within the module as is usually done. Thus, the init_sym
 * and set_cur_symtbl routines were added, and the static table is
 * no longer used.
*/
/* The following variable constitutes the internal state of this module */
/*private struct chainelt *hash[HTSIZE] = NULL;*/    /* Symbol table access */
private SYMTBL cur_symtbl;			    /* Current symbol table */







public int prtsym(d, s)
   FILE *d;
   SYMBOL s;
/*
 * Print a symbol table entry
 *
 * On exit:
 *	s has been added to the current line of d
 *	prtsym returns the number of characters output.
*/
   {
   char c[BUFSIZ];

   (void)strncpy(c,string[s->str],s->l); c[s->l] = '\0';
   if (s->is_IDNT) (void) fprintf(d,"%s",c);
   else {
      char *p;
      putc('\'',d);
      for (p = c; *p; p++) { putc(*p,d); if (*p == '\'') putc('\'',d); }
      putc('\'',d);
   }
   return(s->l + (s->is_IDNT ? 0 : 2));
   }







#ifdef UNUSED
private char c[BUFSIZ];
#endif



#ifdef UNUSED
public void dmpsym(d)
   FILE *d;
/*
 * Dump the symbol table
 *
 * On entry:
 *	A current symbol table exists.
 *
 * On exit:
 *	The entire current symbol table has been written to d
*/
   {
   int i, j;
   struct chainelt *p;
   char c[BUFSIZ];

   (void) fprintf(d,"\n Symbol table contents-\n");
   j = 0;
   for (i = 0; i < HTSIZE; i++)
	 {
         p = cur_symtbl[i];
         if (p)
	       {
               if (j == i-1)
                     (void) fprintf(d,"   Bin %d\n",j);
                  else if (j < i)
                             (void) fprintf(d,"   Bins %d-%d empty\n",j,i-1);
               j = i+1;
               (void) fprintf(d,"   Bin %d\n",i);
               do {
                  (void) strncpy(c, p->ptr, p->len);
                  c[p->len] = '\0';
		  if (p->dat->is_IDNT)
		      (void) fprintf(d,"(IDNT-%s) ",
				     ((p->dat->non_term) ? "NT":"T"));
		    else
		      (void) fprintf(d,"(LITT) ");
                  (void) fprintf(d, "\t->%s<-\tToktyp: %d  ", c, p->typ);
                  if (p->dat != (SYMBOL) 0)
		        {
			(void) prtsym(d,p->dat);
			}
                  (void) putc('\n',d);
                  p = p->nxt;
                  } while (p);
               }
         }
   if (j == HTSIZE-1)
         (void) fprintf(d,"   Bin %d\n",j);
      else if (j < HTSIZE-1)
                 (void) fprintf(d,"   Bins %d-%d empty\n", j, HTSIZE-1);
   (void) putc('\n',d);
   }
#endif







public void getsymb(c, l, t, s, nt)
   char *c;
   int l, *t;
   SYMBOL *s;
   char *nt;
/*
 * Obtain the symbol corresponding to an identifier or literal
 *
 * On Entry:
 *	A current symbol table exists.
 *	cur_symtbl points at the symbol hash table to be used.
 *      c points to the identifier or literal
 *      l=length of the identifier or literal
 *      t is a variable containing terminal code IDNT or LITT
 *	if t is IDNT then nt is TRUE if c points at a terminal in the EBNF,
 *	   FALSE if a non-term
 * On Exit:
 *	If the identifier or literal has appeared previously then:
 *         t remains unchanged
 *          s has been set to the symbol set on its first appearance
 *	    if t s IDNT, nt has been set to the logical OR if nt and it's
 *	       previous value, the symbol table has been updated with this new
 *	       nt value.
 *	Otherwise
 *         t remains unchanged
 *         s has been set to a new symbol
 *	   nt is unchanged, the new symbol table entry contains it's value.
*/
   {
   register struct chainelt *ent;
   register int test;

   test = 1;
   switch (l) {
	case 0 : ent = (struct chainelt *) &cur_symtbl[0]; break;
	case 1 : ent = (struct chainelt *) &cur_symtbl[*c]; break;
	default :
            {
            register char *cr = c;
            register int key = 0;
            register int lr = l;

            do key += *cr++; while (--lr);
            ent = (struct chainelt *) &cur_symtbl[key % HTSIZE];
            }
	}

   do {
      if (ent->nxt == (struct chainelt *) 0) break;
      ent = ent->nxt;
      if ((*t == ent->typ) && ((test = l - ent->len) == 0))
	 if (l == 0)
		test = 0;
	     else
		{
        	register char *cr = c;
        	register char *p = ent->ptr;
        	register int lr = l;

        	do ; while (*cr++ == *p++ && --lr);
        	test = cr[-1] - p[-1];
        	}
      } while (test > 0);


   if (test != 0)						/* No match */
	 {
         char save;
         int stringindex;
         struct chainelt *temp;

	 /* Last character in c is not put in string table */
         save = c[l];
	 c[l] = ' ';
         stringindex = stostr(c,l+1);
         c[l] = save;

	 /* Get a new hash table element */
	 GET_MEMORY(temp,struct chainelt *,1,struct chainelt,"getsymb","1")
         temp->nxt = ent->nxt;
	 ent->nxt = temp;
         if (test < 0)				  /* Put new elt before ent */
	       {
               ASNSTRPTR(temp->ptr, ent->ptr);
               temp->len = ent->len;
               temp->typ = ent->typ;
	       temp->dat = ent->dat;
               }
	    else
	       ent = temp;
         ASNSTRPTR(ent->ptr, string[stringindex]);
	 ent->len = l;
         ent->typ = *t;
         if ( (*t == IDNT) || (*t == LITT) )
	       {
               mkidentifier(stringindex, &ent->dat, l, *t, *nt);
               }
	    else
	       {
               ent->dat = (SYMBOL) 0;
               }
         }

   if (*t == IDNT) *nt = ent->dat->non_term = ent->dat->non_term || *nt;
   *s = ent->dat;
   }







public int idnt_exists(c, l)
   char *c;
   int l;
/*
 * Determine if the symbol corresponding to an identifier exists.
 *
 * On Entry:
 *	A current symbol table exists.
 *	cur_symtbl points at the hash table to be used.
 *	c points to the identifier (not literal).
 *	l = length of identifier.
 *
 * On Exit:
 *	If the identifier pointed at by c already has been assigned a
 *	   SYMBOL in the symbol table, symb_exists returns TRUE, otherwise
 *	   FALSE.
*/
   {
   register struct chainelt *ent;
   register int test;

   test = 1;
   switch (l) {
	case 0 : ent = (struct chainelt *) &cur_symtbl[0]; break;
	case 1 : ent = (struct chainelt *) &cur_symtbl[*c]; break;
	default :
            {
            register char *cr = c;
            register int key = 0;
            register int lr = l;

            do key += *cr++; while (--lr);
            ent = (struct chainelt *) &cur_symtbl[key % HTSIZE];
            }
	}

   do {
      if (ent->nxt == (struct chainelt *) 0) break;
      ent = ent->nxt;
      if ((IDNT == ent->typ) && ((test = l - ent->len) == 0))
	 if (l == 0)
		test = 0;
	     else
		{
        	register char *cr = c;
        	register char *p = ent->ptr;
        	register int lr = l;

        	do ; while (*cr++ == *p++ && --lr);
        	test = cr[-1] - p[-1];
        	}
      } while (test > 0);

   return(test == 0);				    /* (test == 0) => match */
   }







#ifdef UNUSED
public char *getid(c, l, s)
   char *c;
   int l;
   SYMBOL s;
/*
 * Obtain the identifier corresponding to a symbol
 *
 * On Exit:
 *	The identifier for symbol s, which is not longer than l characters,
 *         has been stored at c.
 *      getid points to the first unused character position
*/
   {
   return(get_str(c,l,(s->str),' '));
   }
#endif







public int num_assoc_sym()
/*
 * Return the count of symbol association circles containing more than one
 * member.
 *
 * On entry:
 *	A current symbol table exists.
 *
 * On exit:
 *	This routine combs the current symbol table searching for
 *	visible (currently used in a grammar structure) symbols.
 *	If any such simbols exist with an association circle that
 *	contains at least one other symbol, it is counted. The
 *	count of such symbols is returned.
 *
 * Notes:
 *	There are many, many ways to do this more efficiently, but
 *	I can't think of any easier way. I'll gamble that this is
 *	good enough. If it turns out not to be, code from
 *	get_symbol_queue should be copied and edited to fit.
*/
{
   QUEUE_PTR queue;
   int ret;

   queue = get_symbol_queue(TRUE, TRUE, TRUE, TRUE, TRUE);
   ret = count_queue(queue);
   delete_queue(queue);

   return(ret);
}








public SYMTBL init_sym()
/*
 * On Exit:
 *	init_sym returns a pointer to a hash table of size HTSIZE for
 *	   use in symbol table access.
*/
   {
   SYMTBL top,
	  cur;
   int i;

   GET_MEMORY(top = cur, SYMTBL, HTSIZE, struct chainelt *, "init_sym", "1")
   for (i = 0; i < HTSIZE; i++) cur[i] = (struct chainelt *) 0;
   return(top);
   }







public SYMTBL set_cur_symtbl(table)
   SYMTBL table;
/*
 * On Entry:
 *	table points at a valid symbol table
 *
 * On Exit
 *	The symbol table pointed at by table is the current symbol table
 *		for the sym module.
 *	set_cur_symtbl returns the ptr to the previously current table.
*/
   {
    SYMTBL old_table = cur_symtbl;

    cur_symtbl = table;

    return (old_table);
   }







public QUEUE_PTR get_symbol_queue(litt, term, non_term, used_only, assoc)
   int	litt;
   int	term;
   int	non_term;
   int	used_only;
   int	assoc;
/*
 * Get a symbol queue
 *
 * On entry:
 *	A current symbol table exists.
 *	(litt == TRUE) if LITTs should be included
 *	(term == TRUE) if terminal IDNT's should be included
 *	(non_term == TRUE) if non-terminal IDNT's should be included
 *	(used_only == TRUE) if only symbols with a back_chain should be
 *	    included.
 *	(assoc == TRUE) if only symbols with a back_chain and a non-trivial
 *	    assoc_circle should be included.
 *
 * On exit:
 *	A queue pointer to a queue containing the desired items is returned
*/
   {
   register int i;
   register struct chainelt *p;
   QUEUE_PTR queue;
   register SYMBOL cur;

   queue = init_queue();

    if (assoc) used_only = TRUE;

   for (i = 0; i < HTSIZE; i++)
       {
       for (p = cur_symtbl[i]; p; p = p->nxt)
	  {			/* Check the attributes against conditions */
	  cur = p->dat;
	  if (cur->is_IDNT)			        /* Save it? */
	        if (cur->non_term)
		      { if (!non_term) continue; }	/* Reject non-term */
		   else
		      { if (!term) continue; }		/* Reject term */
	     else
		if (!litt) continue;			/* Reject literal */

	   if (!(cur->back_chain) && used_only) continue; /* Reject unused */

	   if (assoc && (cur->assoc == cur)) continue;	/* Reject non-assoc */

	   enqueue(queue, ((void *) cur) );	/* Meets the criteria */
	   }
       }

   return queue;
   }







public void delete_symtbl()
/*
 * Get a symbol queue
 *
 * On entry:
 *	A current symbol table exists.
 *
 * On exit:
 *	The storage used by the symbol table has been returned to the
 *	run time system. The result of attempts to access the symbol table
 *	is undefined.
*/
{
    register int i;			/* Current table index */
    register struct chainelt *celt;	/* Current chain_elt */
    register struct chainelt *tmp_celt;	/* Used to delete chain_elt */
    register SYMBOL sym;			/* Current *SYMBOL */
    SYMBOL_BACK_PTR back;		/* Current SYMBOL_BACK_PTR */
    SYMBOL_BACK_PTR tmp_back;		/* Used to delete SYMBOL_BACK_PTR */

    /*
     * Abstract:
     *
     * for (every chainelt in the table)
     *     {
     *	  find the symbol the chain_elt points at.
     *	  for (every SYMBOL_BACK node the *SYMBOL points at)
     *	      {
     *	      delete the associated SYMBOL_PTR_NODE.
     *	      delete the SYMBOL_BACK node.
     *	      }
     *    delete the *SYMBOL.
     *	  delete the chainelt.
     *	  }
     * delete the symbol table.
    */	       


    for (i = 0; i < HTSIZE; i++)
	{
	celt = cur_symtbl[i];
	while (celt)
	    {
	    sym = celt->dat;
	    back = sym->back_chain;
	    while (back)
		{
		FREE_MEMORY(back->back, "Symbol pointer node", 1);
		tmp_back = back;
		back = back->next;
		FREE_MEMORY(tmp_back, "Symbol back node", 1);
		}
	    FREE_MEMORY(sym, "SYMBOL node", 1);
	    tmp_celt = celt;
	    celt = celt->nxt;
	    FREE_MEMORY(tmp_celt, "Chainelt node", 1);
	    }
	}
    FREE_MEMORY(cur_symtbl, "Symbol table", 1);
    cur_symtbl = (SYMTBL) 0;	/* Zero out the table */
}







/*    In order to be able to rename identifiers in the EBNF grammar quickly,
 * individual identifier nodes in the grammar do not point directly at
 * the SYMBOL for that identifier, but rather to a symbol_ptr_node that points
 * indirectly at the SYMBOL in question. When building the grammar structure,
 * the following routines are used to generate the required
 *  symbol_ptr_node(s) and maintain the relationship between the two.
*/



public void rename_identifier(old_sym,new_sym)
   SYMBOL old_sym,
	  new_sym;
/*
 * On Entry:
 *	old_sym and new_sym are both valid SYMBOLs.
 *	old_sym and new_sym must both exist in the same symbol table,
 *	   otherwise the action of this routine is undefined.
 *
 * On Exit:
 *	if (old_sym != new_sym)
 *		All the symbol_ptr_nodes pointing at old_sym have been changed
 *		   to point at new_sym. The back_chain list from old_sym has
 *		   been transferred to the tail of the back_chain list on
 *		   new_sym.
 *	otherwise
 *		No changes are made.
*/
   {
   SYMBOL_BACK_PTR tmp_back;
   SYMBOL tmp_sym;

   if (old_sym == new_sym) return;

   /* NOTE: UNDO_POINT - The actions taken by this routine must be
    *	    remembered by any future UNDO system that might be added
    *	    to CAGT. In particular, a pointer to the old symbol must
    *	    kept, and the pointer changes must be remembered.
   */

   /*
    * If new_sym is not currently in use, remove it from it's current
    * association circle. This renaming forces it into a new context
   */
   if (!new_sym->back_chain && (new_sym->assoc != new_sym))
	{
	tmp_sym = new_sym;
	while (tmp_sym->assoc != new_sym) tmp_sym = tmp_sym->assoc;
	tmp_sym->assoc = tmp_sym->assoc->assoc;
	new_sym->assoc = new_sym;
	}

   /* Find end of back_chain for new_sym and add old_sym chain to end */
   tmp_back = new_sym->back_chain;
   if (tmp_back)			  /* new_sym has symbol_ptr_node(s) */
	 {			       /* Spool to end of new_sym back ptrs */
         while (tmp_back->next) tmp_back = tmp_back->next;
	 tmp_back->next = old_sym->back_chain;
	 }
      else			       /* new_sym has no symbol_ptr_node(s) */
	 {
	 new_sym->back_chain = old_sym->back_chain;
	 }
   tmp_back = old_sym->back_chain;
   old_sym->back_chain = (SYMBOL_BACK_PTR) 0;

   /* Make the symbol_ptr_nodes added to new_sym back ptrs point at new_sym */
   while (tmp_back)
	{
	tmp_back->back->symbol_ptr = new_sym;
	tmp_back = tmp_back->next;
	}

   /* Put the two symbols into the association circle if not already there */
   tmp_sym = old_sym;
   while (tmp_sym->assoc != old_sym)
	if (tmp_sym == new_sym)
		return;		/* They're already associated - we're done */
	    else
		tmp_sym = tmp_sym->assoc;
   tmp_sym->assoc = new_sym;	/* End of old's circle points at new */
   
   tmp_sym=new_sym;
   while (tmp_sym->assoc != new_sym) tmp_sym = tmp_sym->assoc;
   tmp_sym->assoc = old_sym;	/* End of new's circle points at old */
   }







public SYMBOL_PTR_NODE_PTR first_symbol_ptr(sym)
   SYMBOL sym;
/*
 * On Entry:
 *	sym is a valid SYMBOL.
 *
 * On Exit:
 *	if sym has any elements in its back_chain, first_symbol_ptr returns
 *	   a pointer to the symbol_ptr_node pointed at by that first
 *	   element.
 *	otherwise, a new symbol_ptr_node is set to point at sym and is
 *	   added to sym's back_chain. Then, a pointer to this new node is
 *	   returned.
*/
   {
   /* NOTE: UNDO_POINT - The actions taken by this routine must be
    *	    remembered by any future UNDO system that might be added
    *	    to CAGT. In particular, any new symbol_ptr_node must be
    *	    remembered.
   */

   
   if (sym->back_chain == (SYMBOL_BACK_PTR) 0) /*No existing back_chain?*/
	{
	SYMBOL_PTR_NODE_PTR new_symptr;

	/* Get new symbol_ptr_node */
	GET_MEMORY(new_symptr, SYMBOL_PTR_NODE_PTR, 1, SYMBOL_PTR_NODE,
		   "first_symbol_ptr", 1)
	new_symptr->symbol_ptr = sym;

	/* New back chain element */
	GET_MEMORY(sym->back_chain, SYMBOL_BACK_PTR, 1, SYMBOL_BACK,
		   "first_symbol_ptr", 2)
	sym->back_chain->next = (SYMBOL_BACK_PTR) 0;
	sym->back_chain->back = new_symptr;
	}

    return(sym->back_chain->back);
   }
