static char RCSid[] = "$Id: gram_rev.c,v 1.13 1992/07/28 16:26:34 waite Exp $";
/* Copyright, 1989, The Regents of the University of Colorado */

/* This module contains routines to build a parsing grammar given an abstract
 * grammar containing connection points, the original concrete grammar, and
 * a relationship file.
 *
 * Routines:
 *	1) reorder_dec_to_abs - Uses the order of the original abstract
 *			        grammar to reorder the decorated abstract
 *				grammar, thus preparing for the generation
 *				of the parsing grammar.
 *	2) abstract_to_parsing - Given a concrete EBNF grammar structure
 *				 and a corresponding abstract structure
 *				 decorated with connection points, convert
 *				 the concrete grammar to a decorated parsing
 *				 grammar.
 */




#include <stdio.h>
#include <stdlib.h>
#include "cagt_config.h"
#include "cagt_usr_err.h"
#include "support.h"
#include "queue.h"
#include "gram.h"







public int reorder_dec_to_abs(abs,dec)
   LEFT_NODE_PTR abs;
   LEFT_NODE_PTR dec;
/*
 * On Entry:
 *	abs points at the left side chain of the abstract grammar.
 *	dec points at the left side chain of the decorated abstract grammar.
 *	Both grammars share the same symbol table allowing the direct
 *	   comparison of their identifiers and literals.
 *
 * On Exit:
 *      abs points at the left side chain of the decorated abstract
 *	   grammar. This version of the decorated grammar is in
 *	   the same order as the original abstract grammar, which is
 *	   deleted as a side effect of the process.
 *	The grammar structure pointed at by dec has had it's storage returned
 *	   to the operating system, and the value of dec is invalid.
 *	reorder_dec_to_abs returns the number of decorated rules that
 *	   were not matched with abstract rules (0 implys a perfect
 *	   match).
*/
   {
   LEFT_NODE_PTR abs_cur;		      /* Remembers beginning of abs */
   LEFT_NODE_PTR temp;		   /* Used to count and to delete dec rules */
   LEFT_NODE_PTR dec_copy;		 /* Used to compare dec rule to abs */
   int abs_num = 0;		     /* Number of rules in abstract grammar */
   int dec_rule = 0;			     /* Keeps track of current rule */
   int errors = 0;				    /* # of unmatched rules */
   char match;					   /* TRUE if rules matched */
   int *matched, *ismatched;		  /* Flags indicating matched rules */


   /* Count the rules in the abstract grammar */
   temp = abs;
   while (temp)
	{
	abs_num++;
	temp = temp->next_rule;
	}

   GET_MEMORY(matched, int *, abs_num, int, "reorder_dec_to_abs", 2);
   ismatched = matched + abs_num;
   do *(--ismatched) = 0; while (ismatched > matched);

   while (dec)
	{
	dec_rule++;
 	dec_copy = dup_left_side(dec);
 	dec_copy->right_side = dup_right_side(dec->right_side);
	remove_connections(dec_copy->right_side);
	trim_nesting(dec_copy->right_side);
	abs_cur = abs; ismatched = matched;
	match = FALSE;
	while(abs_cur && !match)
	    {
	    if (same_rule(dec_copy,abs_cur))
		  {
		  match = TRUE; (*ismatched)++;
		  delete_right_side(abs_cur->right_side);
		  abs_cur->right_side = dec->right_side;
		  }
	       else {
	          abs_cur = abs_cur->next_rule;
		  ismatched++;
		  }
	    }
	temp = dec;
	dec = dec->next_rule;
	if (!match)
	    {
	    errors++;
	    cagt_msg(0 , MSG_RET, (cagt_msg_text, msg_arr[-(CAGT_DECNOMAT)], dec_rule))
	    output_left_node(stderr,temp);
	    output_right_side(stderr,temp->right_side,FALSE);
	    putc('\n',stderr);
	    delete_right_side(temp->right_side);
	    }
	delete_right_side(dec_copy->right_side);
	FREE_MEMORY(temp, "reorder_dec_to_abs", 1)
	FREE_MEMORY(dec_copy, "reorder_dec_to_abs", 1)
	}

   abs_cur = abs; ismatched = matched;
   while (abs_cur) {
        SYMBOL_PTR_NODE_PTR nt_symb;

        if (*ismatched != 1 &&
           !(is_chainrule(abs_cur->right_side, FALSE, &nt_symb, FALSE) &&
	     abs_cur->text->symbol_ptr == nt_symb->symbol_ptr)) {
		cagt_msg(0, MSG_RET, (cagt_msg_text, msg_arr[-(CAGT_ABSBADMAT)], *ismatched))
		output_left_node(stderr,abs_cur);
		output_right_side(stderr,abs_cur->right_side,FALSE);
		putc('\n',stderr);
	}
	abs_cur = abs_cur->next_rule; ismatched++;
   }
   FREE_MEMORY(matched, "reorder_dec_to_abs", 2);

   return(errors);
   }







public void get_expansion_nodes(abs_grammar,abs_node,con_node,head,tail)
   LEFT_NODE_PTR abs_grammar;
   RIGHT_NODE_PTR abs_node;
   RIGHT_NODE_PTR con_node;
   RIGHT_NODE_PTR *head;
   RIGHT_NODE_PTR *tail;
/*
 * Purpose:
 *	   In the abstract grammar, there can exist identifier (IDNT)
 *	nodes (call it the expantion node) that represent a reference
 *	to another rule in the abstract grammar that must be found and
 *	inserted into a copy of the abstract rule in place of the
 *	expansion node (call it the expansion rule) prior to producing
 *	a parsing rule. Such rules are created during similar rule
 *	combination, where the rules:
 *
 *		xExpr ::= xExpr '+' xExpr.
 *		xExpr ::= xExpr '-' xExpr.
 *
 *	  get turned into
 *
 *		xExpr ::= xExpr xOp xExpr.
 *		xOp ::= '+'.
 *		xOp ::= '-'.
 *
 *	In the above example, xOp is an example of a node that
 *	needs to be replaced by the "expansion rules" during the
 *	reverse phase of CAGT. This is necessary because the
 *	expansion rules may have connection points added to
 *	them between CAGT forward and reverse phases. Such
 *	expansion rules can be identified by the following
 *	characteristics:
 *
 *		- The expantion node in the abstract grammar is
 *		  an IDNT that appears on the left hand side of
 *		  the expansion rule in the abstract rule.
 *
 *		- The expansion rule has an empty relationship
 *		  chain, indicating that it was an added rule
 *		  not found in the original concrete grammar.
 *
 *		- The first node after the IST node on the right
 *		  side of the expansion rule is the same IDNT or
 *		  LITT found in the concrete rule node cooresponding
 *		  to the expansion node in the abstract grammar.
 *		  The only nodes between this node and the DOTT
 *		  node are connection point nodes (AMPT).
 *
 *	Suppose that CAGT knows that
 *
 *		xExpr ::= xExpr xOp xExpr.
 *
 *	is the abstract rule corresponding to
 *
 *		xExpr :: xExpr '+' xExpr.
 *
 *	Using the above rules, it can then be determined that
 *
 *		xOp ::= '+' &'connection_point'.
 *
 *	is the expansion rule (the connection point was added
 *	to show why this process is necessary). The resulting
 *	abstract rule is thus
 *
 *		xExpr ::= xExpr '+' &'connection_point' xExpr.
 *
 *	The purpose of this routine is to search the abstract
 *	grammar for such expansion rules, and to return a copy
 *	of the expansion nodes (if any) from such rules.
 *
 * On Entry:
 *	abs_grammar points at the top rule in the abstract grammar
 *	   structure.
 *	abs_node points at the expansion node in an abstract rule.
 *	con_node points at the node in the concrete grammar that
 *	   corresponds to abs_node.
 * On Exit:
 *	If an expansion rule is found in the abstract grammar,
 *	   *head contains a pointer to the first node in a copy of
 *	   the expansion nodes from that rule. *tail points at
 *	   the last node in the copy.
 *	If no expansion rule is found, *head is NULL and *tail
 *	   is undefined. 
 *	
 *
*/
   {
   /* 1st node in copy of expansion nodes to be returned */
   char found = FALSE;				     /* TRUE if match found */
   RIGHT_NODE_PTR temp;					       /* Temporary */


   *head = (RIGHT_NODE_PTR)0;		       /* Assume won't find */

   for ( ; (abs_grammar && !found) ; abs_grammar = abs_grammar->next_rule)
	{
	/* Both IDNTs? */
	if (abs_grammar->code != abs_node->code) continue;

	/* Same identifier? */
	if (abs_grammar->text->symbol_ptr != abs_node->x.text->symbol_ptr)
		continue;

	/* Empty rel_chain? */
	if (abs_grammar->rel_ptr) continue;

	/* con_node has same code as node on right of '::=' ? */
	if (abs_grammar->right_side->next->code != con_node->code)
		continue;

	/* ...and same textual contents? */
	if (abs_grammar->right_side->next->x.text->symbol_ptr !=
	    con_node->x.text->symbol_ptr)
		continue;

	/* Anything but AMPTs between there and the DOTT? */
	temp = abs_grammar->right_side->next->next;
	while (temp->code != DOTT)
		{
		if (temp->code != AMPT) break;
		temp = temp->next;
		}
	if (temp->code != DOTT) continue;


	found = TRUE;

	/* Make a copy of "expansion" rule */
	*head = *tail = dup_right_side(abs_grammar->right_side->next);

	/* Clip the DOTT from the end of the copy*/
	while (((*tail)->next->code) != DOTT) *tail = (*tail)->next;
	delete_right_side( (*tail)->next );
	(*tail)->next = (RIGHT_NODE_PTR)0;
	}

   }







public void abstract_to_parsing(con_indexed,abs)
   struct indexed_access_block *con_indexed;
   LEFT_NODE_PTR abs;
/*
 *							April 25, 1988
 * Modified to handle moving connection nodes from the concrete
 * grammar to the parsing grammar in addition to the normal moving
 * from abstract to parsing. This is done so that lexers generated by
 * the new GLA can communicate with PGS parsers via action nodes.
 *
 *							September 21, 1986
 * Rewritten to handle PGS modification nodes ("@" and "$")
 * occurring in the concrete grammar. The existence of such
 * nodes means that the simple algorithm used previously
 * (and described in my Master's thesis) can't be used because
 * that algorithm was based on the assumption that the concrete
 * and abstract grammar (.abs) rules were of exactly the
 * same form (PGS mods appear in the concrete grammar, but
 * not in the abstract and they need to appear in the parsing
 * grammar). This routine implements a more general approach to the
 * problem that can handle this (and possible other future)
 * additions to the concrete grammar.
 *
 * On Entry:
 *	con_indexed points at an indexed_access_block that allows constant
 *	   time access to the concrete grammar.
 *	abs points at the left side chain of the abstract grammar.
 *	Both grammars share the same symbol table allowing the direct
 *	   comparison of their identifiers and literals.
 *	The concrete and abstract grammars are related by a previous
 *	   cagt run in the forward direction.
 *	The abstract grammar has the relationship chains from that
 *	   earlier run attached.
 *
 * On Exit:
 *	The concrete grammar has been transformed into a parsing grammar
 *	   as follows:
 *
 *		For each rule moving down the abstract grammar:
 *		   {
 *		   For each rule in the concrete grammar indexed
 *		      by the rel_chain from the above abstract rule:
 *		         {
 *		         Make a copy of the abstract rule right side.
 *			 Do a depth first left-to-right traversal
 *			    of the concrete and abstract copy rules
 *			    saving the addresses of the pointers
 *			    to each node encountered in a concrete
 *			    node queue (con_queue) and an abstract
 *			    node queue (abs_queue).
 *			 while (Neither queue is empty)
 *			     {
 *			      *
 *			      * NOTE: The order of operations in
 *			      * this loop is important.
 *			      *
 *			     if the top nodes in the queues are
 *			        textual (LITT or IDNT) then
 *			        {
 *			        If they are not the same type (i.e. Not
 *				   both LITT or IDNT) then the node in the
 *				   abstract grammar represents a reference
 *			           to another rule in the abstract grammar
 *			           that must be found and inserted into the
 *			           copy of the abstract rule in place of
 *				   the current node.
 *		                Transfer the textual nodes from the
 *			          concrete rule right side to the equivalent
 *			          nodes in the abstract copy.
 *				Remove the next items from both queues.
 *				}
 *			     If the top concrete node is an AMPT, move the
 *				node to the abstract copy rule.
 *			     while the top nodes in the two queues are
 *			        the same type and non-textual,
 *				remove the next items from both queues.
 *			     while the top node of the abs_queue is
 *			        an AMPT (connection), or a LPNT
 *			        or RPNT that don't match any
 *				node in the concrete rule (which is
 *				assurred by the previous step) remove
 *				them from the top of the queue.
 *			     while the top node of the con_queue is
 *				a PGS modification node (ATT or DOLT),
 *				move the node to the abstract copy rule.
 *			     }
 *			 If either the con_queue or abs_queue
 *			    is not empty, signal a fatal error.
 *			 Delete the concrete right side.
 *			 Put the abstract copy in place of the
 *			    concrete rule.
 *			 }
 *		   }
 *
*/
   {
   QUEUE_PTR con_queue,			      /* Used to hold textual nodes */
	     abs_queue;
   LEFT_NODE_PTR con_rule;	      /* Points at con. rule being modified */
   REL_CHAIN_ELT_PTR rel_ptr;		    /* Points at Relationship chain */
   LEFT_NODE_PTR abs_top = abs;			      /* Remembers top rule */
   int abs_rule_num = 0;			   /* Counts abstract rules */
   RIGHT_NODE_PTR x_head;	      /* 1st node in copy of expantion rule */
   RIGHT_NODE_PTR x_tail;	     /* Last node in copy of expantion rule */
   RIGHT_NODE_PTR abs_copy;	      /* Points at copy of current abs rule */
   RIGHT_NODE_PTR *con_node,		   /* Current concrete textual node */
		  *abs_node;		   /* Current abstract textual node */
   RIGHT_NODE_PTR temp;
   char expand_done;


   con_queue = init_queue();
   abs_queue = init_queue();

   while (abs)				 /* Pass through all abstract rules */
      {
      abs_rule_num++;
      rel_ptr = abs->rel_ptr;
      while (rel_ptr)	       /* Given abs rule has concrete relationships */
	  {
          con_rule = rule_by_index(con_indexed,rel_ptr->rule_num);
	  abs_copy = dup_right_side(abs->right_side);
          traversal_ptr_addr(con_queue,con_rule->right_side);
          traversal_ptr_addr(abs_queue,abs_copy);

	  con_node = (RIGHT_NODE_PTR *) dequeue(con_queue);
	  abs_node = (RIGHT_NODE_PTR *) dequeue(abs_queue);
	  for(;;)
	     {
	     /*
	      * NOTE: This loop is set up like a typical production
	      * system loop. Although such a loop is a reasonable
	      * solution, the ordering of the productions is important
	      * and care should be excercised if new productions
	      * are added, or old one modified.
	     */

	     /* Production a - See if two textual nodes are up */
	     if ((((*abs_node)->code == IDNT) || ((*abs_node)->code == LITT))
					       &&
		 (((*con_node)->code == IDNT) || ((*con_node)->code == LITT)))
		 {
		 expand_done = FALSE;
		 if ( (*abs_node)->code != (*con_node)->code )
		     {		     /* Expand abs node with other abs rule */
		     get_expansion_nodes(abs_top,*abs_node,*con_node,
				         &x_head,&x_tail);
		     if (x_head)				/* Found it */
			{
			/* Link in the expansion copy in abs_node place */
			temp = *abs_node;
			*abs_node = x_head;	 /* Point at expansion copy */
			x_tail->next = temp->next;	       /*Finish Link*/
			/* Kill old abs_node */
			temp->next = (RIGHT_NODE_PTR)0;
			delete_right_side(temp);
			/* The next abs queue entry is now void */
			(void) (RIGHT_NODE_PTR *) dequeue(abs_queue);
			/* Replace it with address of tail next field */
			abs_node = &(x_tail->next);
			expand_done = TRUE;
			};
		     }
		 /* Transfer the textual information from con to abs */
		 (*abs_node)->code = (*con_node)->code;
                 (*abs_node)->x.text = (*con_node)->x.text;

		 /* Next nodes */
		 con_node = (RIGHT_NODE_PTR *) dequeue(con_queue);
		 if (!expand_done)
			abs_node = (RIGHT_NODE_PTR *) dequeue(abs_queue);

		 continue;
		 }

	     /* Production aa - See if con token is a connection node */
	     /* NOTE: This production added 25 April 1988 */
	     if ((*con_node)->code == AMPT)
		 {				     /* Move it to abs rule */
		 temp = *abs_node;
		 *abs_node = *con_node;
		 *con_node = (*con_node)->next;
		 (*abs_node)->next = temp;
		 /* abs_node should now point at next field of PGS node */
		 abs_node = &((*abs_node)->next);
		 /*
		  * The next entry in con_queue is now unneeded and should 
		  * be dumped. The current value of con_node is still
		  * correct, even though *con_node has changed.
		 */
		 (void) (RIGHT_NODE_PTR *) dequeue(con_queue);

		 continue;
		 }

	     /* Production b - See if both nodes are the same (non-text)*/
	     if  ( ((*abs_node)->code == (*con_node)->code ) &&
		   ((*con_node)->code != DOTT) &&
		   ((*con_node)->code != IDNT) &&
		   ((*con_node)->code != LITT) )
		 {
		 con_node = (RIGHT_NODE_PTR *) dequeue(con_queue);
		 abs_node = (RIGHT_NODE_PTR *) dequeue(abs_queue);
		 continue;
		 }

	     /* Production c - See if abs token is AMPT, LPNT, or RPNT */
	     if  ( ((*abs_node)->code == AMPT) ||
		   ((*abs_node)->code == LPNT) ||
		   ((*abs_node)->code == RPNT) )
		 {
		 abs_node = (RIGHT_NODE_PTR *) dequeue(abs_queue);
		 continue;
		 }

	     /* Production d - See if con token is PGS modification */
	     if (((*con_node)->code == ATT) || ((*con_node)->code == DOLT))
		 {				     /* Move it to abs rule */
		 temp = *abs_node;
		 *abs_node = *con_node;
		 *con_node = (*con_node)->next;
		 (*abs_node)->next = temp;
		 /* abs_node should now point at next field of PGS node */
		 abs_node = &((*abs_node)->next);
		 /*
		  * The next entry in con_queue is now unneeded and should 
		  * be dumped. The current value of con_node is still
		  * correct, even though *con_node has changed.
		 */
		 (void) (RIGHT_NODE_PTR *) dequeue(con_queue);

		 continue;
		 }

	     break;				  /* If no productions fire */
	     }


	/* If only one rule (con or abs_copy) is finished => error */
	if ( ((*con_node)->code != DOTT) ||
	     ((*abs_node)->code != DOTT) )
		{
		fprintf(stderr,"ABS-%d) ", abs_rule_num);
		output_left_node(stderr,abs);
		output_right_side(stderr,abs->right_side,FALSE);
		putc('\n',stderr);
		fprintf(stderr,"CON-%d) ", rel_ptr->rule_num);
		output_left_node(stderr,con_rule);
		output_right_side(stderr,con_rule->right_side,FALSE);
		putc('\n',stderr);
		cagt_msg(0, MSG_EXIT, (cagt_msg_text, msg_arr[-(CAGT_ABSPARMIS)], abs_rule_num,
			 rel_ptr->rule_num))
		}

	 /* Attach the abs rule copy in place of the concrete rule */
	 delete_right_side(con_rule->right_side);
	 con_rule->right_side = abs_copy;


         rel_ptr = rel_ptr->next;
	 }

      abs = abs->next_rule;
      }

   delete_queue(con_queue);
   delete_queue(abs_queue);
   }







/*							September 19, 1986
 * This copy is kept because it embodies a neat algorithm that might
 * come in handy. It is the algorithm described in my Master's thesis.
 * It is unable to handle the PGS modifications in the concrete grammar,
 * and is thus replaced by the more general approach in the new
 * version above.
*/
#ifdef OBSOLETE
public void abstract_to_parsing(con_indexed,abs)
   struct indexed_access_block *con_indexed;
   LEFT_NODE_PTR abs;
/*
 * On Entry:
 *	con_indexed points at an indexed_access_block that allows constant
 *	   time access to the concrete grammar.
 *	abs points at the left side chain of the abstract grammar.
 *	Both grammars share the same symbol table allowing the direct
 *	   comparison of their identifiers and literals.
 *	The concrete and abstract grammars are related by a previous
 *	   cagt run in the forward direction.
 *	The abstract grammar has the relationship chains from that
 *	   earlier run attached.
 *
 * On Exit:
 *	The concrete grammar has been transformed into a parsing grammar
 *	   as follows:
 *
 *		For each rule moving down the abstract grammar:
 *		   {
 *		   For each rule in the concrete grammar indexed
 *		      by the rel_chain from the above abstract rule:
 *		         {
 *		         Make a copy of the abstract rule right side.
 *			 For each textual node in the 2 rules (LITT or IDNT)
 *			    (from RIGHT to LEFT):
 *			     {
 *			     Consider the nodes pairwise. If they are
 *			        not the same type (i.e. Not both LITT or
 *			        IDNT) then the node in the abstract grammar
 *			        represents a reference to another rule in the
 *			        abstract grammar that must be found and
 *			        inserted into the copy of the abstract rule
 *			        in place of the current node.
 *		             Transfer the textual nodes from the
 *			       concrete rule right side to the equivalent
 *			       nodes in the abstract copy.
 *			     }
 *			 Delete the concrete right side.
 *			 Put the abstract copy in place of the
 *			    concrete rule.
 *			 }
 *		   }
 *
*/
   {
   QUEUE_PTR con_queue,			      /* Used to hold textual nodes */
	     abs_queue;
   LEFT_NODE_PTR con_rule;	      /* Points at con. rule being modified */
   REL_CHAIN_ELT_PTR rel_ptr;		    /* Points at Relationship chain */
   LEFT_NODE_PTR abs_top = abs;			      /* Remembers top rule */
   RIGHT_NODE_PTR x_head;	      /* 1st node in copy of expantion rule */
   RIGHT_NODE_PTR x_tail;	     /* Last node in copy of expantion rule */
   RIGHT_NODE_PTR abs_copy;	      /* Points at copy of current abs rule */
   RIGHT_NODE_PTR *con_node,		   /* Current concrete textual node */
		  *abs_node;		   /* Current abstract textual node */
   RIGHT_NODE_PTR old_abs_node;
   static short toklist[NUM_TOKENS];	/* Is initialized to zero
					   by compiler */

   /* Indicate the textual nodes */
   toklist[IDNT] = TRUE;
   toklist[LITT] = TRUE;


   con_queue = init_queue();
   abs_queue = init_queue();


   while (abs)				 /* Pass through all abstract rules */
      {
      rel_ptr = abs->rel_ptr;
      while (rel_ptr)	       /* Given abs rule has concrete relationships */
	  {
          con_rule = rule_by_index(con_indexed,rel_ptr->rule_num);
	  abs_copy = dup_right_side(abs->right_side);
	  find_token_ptr_addr(toklist,con_queue,con_rule->right_side);
	  reverse_queue(con_queue);
          find_token_ptr_addr(toklist,abs_queue,abs_copy);
	  reverse_queue(abs_queue);

	  while( !empty(con_queue) && !empty(abs_queue) )     /* Nodes left */
	     {
	     con_node = (RIGHT_NODE_PTR *) dequeue(con_queue);
	     abs_node = (RIGHT_NODE_PTR *) dequeue(abs_queue);
	     if ((*abs_node)->code != (*con_node)->code)
		 {		     /* Expand abs node with other abs rule */
		 get_expansion_nodes(abs_top,*abs_node,*con_node,
				     &x_head,&x_tail);
		 if (x_head)					/* Found it */
			{
			/* Link in the expansion copy in abs_node place */
			old_abs_node = *abs_node;
			*abs_node = x_head;	 /* Point at expansion copy */
			x_tail->next = old_abs_node->next; /*Finish Link*/
			/* Kill old abs_node */
			old_abs_node->next = (RIGHT_NODE_PTR)0;
			delete_right_side(old_abs_node);
			};
		 }
             (*abs_node)->code = (*con_node)->code;  /* Xfer textual info */
             (*abs_node)->x.text = (*con_node)->x.text;
	     }
	 if ( !empty(con_queue) || !empty(abs_queue) )
	     cagt_msg(0, MSG_EXIT, (cagt_msg_text, msg_arr[-(CAGT_ABSPARMIS)], 0))
	 delete_right_side(con_rule->right_side);
	 con_rule->right_side = abs_copy;

         rel_ptr = rel_ptr->next;
	 }

      abs = abs->next_rule;
      }

   delete_queue(con_queue);
   delete_queue(abs_queue);
   }
#endif
