#!/usr/local/bin/gawk -f
#**************************************************************************************
#
# hierarchy:
#		This program takes an input file which defines a DAG
#	and expands it into a tree.  It is useful when you have a tree of
# 	generic types.  See the sample input file "file_types.hierarchy"
#	for a sample dataset, and sample outputs "file_types.includes.h",
#	"file_types.operation.h" and "file_types.type.h".  The input has 
#	three distinct types of input lines:
#
#		root 	TYPE
#		set	<TAG>	TYPE1	TYPE2	...
#		children	TYPE|<TAG>	TYPE|<TAG>	TYPE|<TAG>	...
#
#	each word is tab-separated.  "root" defines the root type of the 
#	tree.  "set" defines an alias for a list of types.  "children"
#	defines the child types (or sets of types) for a given type or 
#	set of types.
#
#**************************************************************************************
# command line switches:
#	-voutput_type=type	-- print the specialized type definition (iPcress only)
#	-voutput_type=general	-- general output type.  has sub-options
#		-vpreamble=???	-- print this ??? string before the type
#		-vpostamble=???	-- print this ??? string after after the type
#		-vprint_first=1	-- if you want to include the root type
#
#
#**************************************************************************************

function print_type_node (queue, node, out,    i) {
	for (i = 1 ; i <= queue[node] ; i ++)
	  printf "%s", queue[node,i] ;
	printf "_type = \n\t" ;
	for (i = 1 ; i <= queue[node] ; i ++)
	  {
	    printf "isa_%s", queue[node,i] ;
	    if (i < queue[node])
	      printf " | " ;
	  }
        printf ",\n" ;
	return
}

function print_general_node (queue, node,     i) {
	if ((queue[node] > 1) || (print_first == 1))
	  {
	    printf "%s", preamble;
	    for (i = 1 ; i <= queue[node] ; i ++)
	      {
	        if ((i == queue[node]) && (i > 1))
		  printf ".";
	        printf "%s", queue[node,i] ;
	      }
	  printf "%s\n", postamble;
	  }
	return
}

BEGIN { head = 2;
	tail = 1;
	}

$1 == "set" { 
	set_names[$2] = NF - 2;
	for (dummy = 3 ; dummy <= NF ; dummy ++)
	  {
	    set_names[$2,dummy - 2] = $dummy;
	  }
	}

$1 == "root" { root_set = $2 ; }

($1 == "children") && ($2 ~ /<.+>/) { 
	for (s_el = 1 ; s_el <= set_names[$2] ; s_el ++)
	  {
	    t=0 ;
	    for (dummy = 3 ; dummy <= NF ; dummy ++)
	      if ($dummy ~ /<.+>/)
	        {
		  for (i = 1 ; i <= set_names[$dummy] ; i ++)
		    {
		      t++
		      children[set_names[$2,s_el],t] = set_names[$dummy,i]
		    }
		}
	      else
		{
		  t ++
	          children[set_names[$2,s_el],t] = $dummy;
		}
	    children[set_names[$2,s_el]] = t;
	   }
	}

($1 == "children") && ($2 !~ /<.+>/) { 
	t = 0;
	for (dummy = 3 ; dummy <= NF ; dummy ++)
	  if ($dummy ~ /<.+>/)
	    {
	      for (i = 1 ; i <= set_names[$dummy] ; i ++)
		{
		  t++
		  children[$2,t] = set_names[$dummy,i]
		}
	    }
	  else
	    {
	      t ++
	      children[$2,t] = $dummy;
	    }
	children[$2] = t;
	}

END { 
	queue[1] = 1;
	queue[1,1] = root_set;
	while (head > tail)
	  {
	    if (output_type == "type")
	      print_type_node(queue,tail);
	    if (output_type == "general")
	      print_general_node(queue,tail);

	    for (i = 1 ; i <= children[queue[tail,queue[tail]]] ; i ++)
	      {
		# add new node to queue
		for (j = 1 ; j <= queue[tail] ; j ++)
		    queue[head,j] = queue[tail,j];
		queue[head] = queue[tail] + 1;
		queue[head,queue[head]] = children[queue[tail,queue[tail]],i]
		head ++
	      }
	    tail ++
	  }
     }


