/* AE program profiling system.
   Code to find loops in a control-flow graph.
   Copyright (C) 1989, 1990 by James R. Larus (larus@cs.wisc.edu)

   AE and AEC are free software; you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published by the
   Free Software Foundation; either version 1, or (at your option) any
   later version.

   AE and AEC are distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with GNU CC; see the file COPYING.  If not, write to James R.
   Larus, Computer Sciences Department, University of Wisconsin--Madison,
   1210 West Dayton Street, Madison, WI 53706, USA or to the Free
   Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */


/* $Header: /var/home/larus/AE/AE/RCS/ae-loop.c,v 2.2 90/02/14 09:19:47 larus Exp Locker: larus $ */


#include "config.h"
#include "rtl.h"
#include "basic-block.h"
#include "ae.h"


/* Find the arcs in the control flow graph that entry, exit, and
   iterate loops.  We find loops by computing the control-flow
   dominators, then finding edges whose destination dominates their
   source (loop back edges in reducible flow graphs).  From the back
   edges and the loop header, we can compute the entire loop body and
   record the appropriate set of edges. */


int block_succ_p ();
void compress_semid ();
int condjump_p ();
int dominate_p ();
void dfs_number ();
void dfs_number_sub ();
int eval_semid ();
void find_dominators ();
void find_loops ();
int find_loop_back_edge ();
void find_natural_loop ();
int head_to_block_number ();
rtx jump_target ();
void link_semid ();
void mark_insn ();
int multiway_jump_p ();
void record_exit_edge ();
void record_exit_edge1 ();
int simplejump_p ();


void bzero ();
void free ();
char *malloc ();


/* Information on instructions in function. */

extern aux_insn_info *insn_info;


/* Index of a basic block */

typedef unsigned short bb_index;



/* Used to assign unique numbers to loops within a function. */

static int loop_no = 0;


/* Represent a set of basic blocks as a bit vector  */

#define SET_BBSET_BIT(BBS, BN)					\
  BBS [(BN) / REGSET_ELT_BITS] |= 1 << ((BN) % REGSET_ELT_BITS)


#define GET_BBSET_BIT(BBS, BN)					\
  BBS [(BN) / REGSET_ELT_BITS] & (1 << ((BN) % REGSET_ELT_BITS))


#define FOREACH_SET_BIT(BBS, LIMIT, N, BODY)			\
  {								\
    register int _i_, _j_, _c_;					\
    for (_i_ = 0, _c_ = LIMIT;					\
	 _i_ < (LIMIT + REGSET_ELT_BITS - 1) / REGSET_ELT_BITS;	\
	 _i_ ++, _c_ -= REGSET_ELT_BITS)			\
      {								\
	register int _l_ = (_c_ < REGSET_ELT_BITS ? _c_ : REGSET_ELT_BITS);\
	register int _w_ = BBS [_i_];				\
	if (_w_ != 0)						\
	  for (_j_ = 0; _j_ < _l_; _j_ ++)			\
	    {							\
	      int N = (_i_ * REGSET_ELT_BITS) + _j_;		\
	      if (_w_ & (1 << _j_))				\
		BODY;						\
	    }}}


/* Entry N contains the number of the block that is the immediate dominator
   of basic block N. */

static bb_index *dom;


/* Entry N is bit vector where bit M is 1 if block M is predecessor
   of block N in control flow graph */

static regset *pred;


/* Find the loops in the function that begins with INSNS.  Record the
   entry, exit, and backedge arcs in INSN_INFO. */

void
analyze_loops (insns)
     rtx insns;
{
  loop_no = 0;
  find_dominators ();
  find_loops ();
  free (dom);
  free (pred);
}


/* Below is a direct implementation of the fast dominator algorithm
   from: T. Lengauer and R. E. Tarjan, "A Fast Algorithm for Finding
   Dominators in a Flowgraph," TOPLAS, v. 1, n. 1, July 1979, pp 121-141.
   See this paper for detailed comments. */


/* Return non-zero if block X dominates block Y and 0 otherwise. */

static int
dominate_p (x, y)
     register int x, y;
{
  if (x == y)
    return 1;
  else if (dom [y] == 0)
    return 0;
  else if (dom [y] == x)
    return 1;
  else
    return dominate_p (x, dom [y]);
}


/* Entry N is parent of block N in depth-first spanning tree. */

static bb_index *parent;


/* Entry N is number of semidominator of block N */

static bb_index *semi;


/* Entry N is bit vector of blocks whose semidominator is N */

static regset *bucket;


/* Entry N is the number of the block whose DF number is N. */

static bb_index *vertex;


/* Used in LINK/EVAL data structure */
static bb_index *ancestor;
static bb_index *label;
static bb_index *size;
static bb_index *child;


static bb_index n;

/* Find the dominators in the control graph using the Lengauer/Tarjan
   algorithm.  Leave each element of the array DOM with the immediate
   dominator of each block and each element of the array PRED with a bit
   vector of the control-flow predecessors of a block. */

static void
find_dominators ()
{
  register regset tem;
  register int n_blocks = n_basic_blocks + 1; /* Number blocks from 1 */
  int bbset_size = (n_blocks + REGSET_ELT_BITS - 1) / REGSET_ELT_BITS;
  int bbset_bytes = bbset_size * sizeof (int);
  register int i, v;

  if (n_basic_blocks > (1 << (sizeof (bb_index) * 8)))
    error ("Too many basic blocks for find_dominator");

  /* This algorithm wants to see block numbers from 1..n, so that it can
     use array element 0 for its purposes.  */
  dom = (bb_index *) malloc (n_blocks * sizeof (bb_index));
  bzero (dom, n_blocks * sizeof (bb_index));

  pred = (regset *) malloc (n_basic_blocks * sizeof (regset));
  tem = (regset) malloc (n_basic_blocks * bbset_bytes);
  bzero (tem, n_basic_blocks * bbset_bytes);
  init_regset_vector (pred, tem, n_basic_blocks, bbset_bytes);

  parent = (bb_index *) malloc (n_blocks * sizeof (bb_index));
  bzero (parent, n_blocks * sizeof (bb_index));

  semi = (bb_index *) malloc (n_blocks * sizeof (bb_index));
  bzero (semi, n_blocks * sizeof (bb_index));

  bucket = (regset *) malloc (n_blocks * sizeof (regset));
  tem = (regset) malloc (n_blocks * bbset_bytes);
  bzero (tem, n_blocks * bbset_bytes);
  init_regset_vector (bucket, tem, n_blocks, bbset_bytes);

  vertex = (bb_index *) malloc (n_blocks * sizeof (bb_index));
  bzero (vertex, n_blocks * sizeof (bb_index));

  ancestor = (bb_index *) malloc (n_blocks * sizeof (bb_index));
  bzero (ancestor, n_blocks * sizeof (bb_index));

  label = (bb_index *) malloc (n_blocks * sizeof (bb_index));
  for (i = 0; i < n_blocks; i ++) label [i] = i;

  size = (bb_index *) malloc (n_blocks * sizeof (bb_index));
  for (i = 0; i < n_blocks; i ++) size [i] = 1;

  size [0] = label [0] = semi [0] = 0;

  child = (bb_index *) malloc (n_blocks * sizeof (bb_index));
  bzero (child, n_blocks * sizeof (bb_index));

  if (n_basic_blocks == 0) return;

  /* Step 1 */
  n = 0;
  dfs_number (0);

  for (i = n_blocks - 1; i != 1; i --)
    {
      register w = vertex [i];

      /* Step 2 */
      for (v = 1; v < n_blocks; v ++)
	{
	  if (w > 0 && GET_BBSET_BIT (pred [w - 1], v - 1))
	    {
	      register int u = eval_semid (v);

	      if (semi [u] < semi [w])
		semi [w] = semi [u];
	    }
	}
      SET_BBSET_BIT (bucket [vertex [semi [w]]], w);
      link_semid (parent [w], w);

      /* Step 3 */
      for (v = 1; v < n_blocks; v ++)
	{
	  int pw = parent [w];

	  if (GET_BBSET_BIT (bucket [pw], v))
	    {
	      int u;

	      bucket [pw] [v / REGSET_ELT_BITS]
		&= ~(1 << (v % REGSET_ELT_BITS));
	      u = eval_semid (v);
	      if (semi [u] < semi[v])
		dom [v] = u;
	      else
		dom [v] = pw;
	    }
	}
    }

  /* Step 4 */
  for (i = 2; i < n_blocks; i ++)
    {
      register int w = vertex [i];

      if (dom [w] != vertex [semi [w]])
	dom [w] = dom [dom [w]];
    }

  /* Convert back to real block numbers */
  for (i = 0; i < n_basic_blocks; i ++)
    if (dom [i + 1] == 0)
      dom [i] = dom [i + 1];
    else
      dom [i] = dom [i + 1] - 1;
  dom [0] = 0;

  free (parent);
  free (semi);
  free (bucket);
  free (ancestor);
  free (label);
  free (size);
  free (child);
}


/* Assign depth-first numbers to blocks and set up some data structures. */

static void
dfs_number (v_r)
     register int v_r;		/* Real block number */
{
  register rtx x = basic_block_end [v_r];

  semi [v_r + 1] = (n += 1);
  vertex [n] = v_r + 1;
  if (GET_CODE (x) == JUMP_INSN && simplejump_p (x))
    dfs_number_sub (v_r, head_to_block_number (jump_target (x)));
  else if (GET_CODE (x) == JUMP_INSN && condjump_p (x))
    {
      dfs_number_sub (v_r, head_to_block_number (jump_target (x)));
      dfs_number_sub (v_r, v_r + 1);
    }
  else if (multiway_jump_p (x))
    {
      rtx body =  PATTERN (x);
      register int vlen, i;

      vlen = XVECLEN (body, 0);
      for (i = 0; i < vlen; i++)
	dfs_number_sub (v_r,
			head_to_block_number (XEXP (XVECEXP (body, 0, i), 0)));
    }
  else
    dfs_number_sub (v_r, v_r + 1);
}


static void
dfs_number_sub (v_r, w_r)
     register int v_r, w_r;		/* Real block numbers */
{
  if (w_r == n_basic_blocks) return;
  if (semi [w_r + 1] == 0)
    {
      parent [w_r + 1] = v_r + 1;
      dfs_number (w_r);
    }
  SET_BBSET_BIT (pred [w_r], v_r);
}


/* The "fast" link/eval routines from the paper. */

static int
eval_semid (v)
     register int v;
{
  if (ancestor [v] == 0)
    return v;
  else
    {
      compress_semid (v);
      return label[v];
    }
#ifdef FAST
  if (ancestor [v] == 0)
    return label [v];
  else
    {
      compress_semid (v);
      if (semi [label [ancestor [v]]] > semi [label [v]])
	return label [v];
      else
	return label [ancestor [v]];
    }
#endif
}


static void
compress_semid (v)
     register int v;
{
  if (ancestor [ancestor [v]] != 0)
    {
      compress_semid (ancestor [v]);
      if (semi [label [ancestor [v]]] < semi [label [v]])
	label [v] = label [ancestor [v]];
      ancestor [v] = ancestor [ancestor [v]];
    }
}


static void
link_semid (v, w)
     register int v, w;
{
  register int s = w;

  ancestor [w] = v;

#ifdef FAST
  while (semi [label [w]] < semi [label [child [s]]])
    {
      if (size [s] + size [child [child [s]]] > 2 * size [child [s]])
	{
	  parent [child [s]] = s;
	  child [s] = child [child [s]];
	}
      else
	{
	  size [child [s]] = size [s];
	  s = parent [s] = child [s];
	}
    }

  label [s] = label [w];
  size [v] = size [v] + size [w];
  if (size [v] < 2 * size [w])
    {
      int t = s;
      s = child [v];
     child [v] = t;
    }
  while (s != 0)
    {
      parent [s] = v;
      s = child [s];
    }
#endif
}


/* Find all loops in the function and record which edges in the
   control flow graph enter, exit, and iterate them. */

static void
  find_loops ()
{
  register int i, h;
  bb_index *stack;

  stack = (bb_index *) malloc (n_basic_blocks * sizeof (bb_index));
  for (i = 0; i < n_basic_blocks; i ++)
    {
      if ((h = find_loop_back_edge (i)) != -1)
	/* Check if we've already found another backedge of this loop */
	if (!insn_marked_p (basic_block_head[h], LOOP_HEAD_FLAG))
	  {
	    mark_insn (basic_block_head [h], LOOP_HEAD_FLAG);
	    find_natural_loop (stack, i, h);
	  }
    }
  free (stack);
}


/* Return the number of block that is the target of a loop back edge
   from block I (i.e., the number of the loop header's block). If none
   exists, return -1; */

static int
find_loop_back_edge (i)
     register int i;
{
  register rtx x = basic_block_end [i];
  register int t;

  if (GET_CODE (x) == JUMP_INSN && simplejump_p (x))
    t = head_to_block_number (jump_target (x));
  else if (GET_CODE (x) == JUMP_INSN && condjump_p (x))
    {
      t = head_to_block_number (jump_target (x));
      if (dominate_p (t, i))
	return t;
      t = i + 1;		/* Check fall-thru */
    }
  else if (multiway_jump_p (x))
    {
      rtx body =  PATTERN (x);
      register int vlen, j;

      vlen = XVECLEN (body, 0);
      for (j = 0; j < vlen; j ++)
	{
	  t = head_to_block_number (XEXP (XVECEXP (body, 0, j), 0));
	  if (dominate_p (t, i))
	    return t;
	}
      return -1;
    }
  else
    t = i + 1;			/* Check fall-thru */

  /* Fall-thru check */
  if (t < n_basic_blocks && dominate_p (t, i))
    return t;
  else
    return -1;
}


/* Find the natural loop in the control flow graph (see, Aho, Sethi,
   Ullman, p 604) corresponding to the backedge from block N to H.  For
   each loop, record relevant edges. */

static void
find_natural_loop (stack, n, h)
bb_index *stack;
register int n, h;
{
  register int stack_top = 0;
  static int mark = 0;
  register int i;

  /* Record loop backedge */
  insn_info [INSN_UID (basic_block_end [n])].back_edge
    = cons (cons (h, loop_no), insn_info [n]. back_edge);

  mark ++;
  insn_info [INSN_UID (basic_block_head [h])].mark = mark;
  insn_info [INSN_UID (basic_block_head [n])].mark = mark;
  if (h != n)
    stack [stack_top ++] = n;

  while (stack_top != 0)
    {
      int m = stack [-- stack_top];

      FOREACH_SET_BIT (pred [m], n_basic_blocks, i,
		       {
			 if (insn_info [INSN_UID (basic_block_head [i])].mark
			     != mark)
			   {
			     insn_info [INSN_UID (basic_block_head [i])].mark
			       = mark;
			     stack [stack_top ++] = i;
			   }});

      /* Record loop backedge */
      if (block_succ_p (m, h))
	insn_info [INSN_UID (basic_block_end [m])].back_edge
	  = cons (cons (h, loop_no), insn_info [m]. back_edge);
    }

  for (i = 0; i < n_basic_blocks; i ++)
    {
      /* Find edges entering the loop (at its head) */
      if (GET_BBSET_BIT (pred [h], i)
	  && insn_info [INSN_UID (basic_block_head [i])].mark != mark)
	insn_info [INSN_UID (basic_block_end [i])].entry_edge
	  = cons (cons (h, loop_no),
		  insn_info [INSN_UID (basic_block_end [i])].entry_edge);

      /* Find edges exiting the loop (from any block in the loop) */
      if (insn_info [INSN_UID (basic_block_head [i])].mark == mark)
	{
	  register rtx x = basic_block_end [i];

	  if (GET_CODE (x) == JUMP_INSN && simplejump_p (x))
	    record_exit_edge (x, jump_target (x), loop_no, mark);
	  else if (GET_CODE (x) == JUMP_INSN && condjump_p (x))
	    {
	      record_exit_edge (x, jump_target (x), loop_no, mark);
	      record_exit_edge1 (x, i + 1, loop_no, mark);
	    }
	  else if (multiway_jump_p (x))
	    {
	      rtx body =  PATTERN (x);
	      register int vlen, i;

	      vlen = XVECLEN (body, 0);
	      for (i = 0; i < vlen; i++)
		record_exit_edge (x, XEXP (XVECEXP (body, 0, i), 0), loop_no,
				  mark);
	    }
	  else
	    record_exit_edge1 (x, i + 1, loop_no, mark);
	}
    }

  loop_no += 1;
}

/* Return non-zero if block N is a successor of block M.  Return 0
   otherwise. */

static int
block_succ_p (m, n)
     register int m, n;
{
  register rtx x = basic_block_end [m];
  register rtx y = basic_block_head [n];

  if (GET_CODE (x) == JUMP_INSN && simplejump_p (x))
    return (y == jump_target (x));
  else if (GET_CODE (x) == JUMP_INSN && condjump_p (x))
    {
      if (y == jump_target (x)) return 1;
      return m + 1 == n;	/* Fall-thru? */
    }
  else if (multiway_jump_p (x))
    {
      rtx body =  PATTERN (x);
      register int vlen, i;

      vlen = XVECLEN (body, 0);
      for (i = 0; i < vlen; i++)
	if (y == (XEXP (XVECEXP (body, 0, i), 0)))
	  return 1;

      return 0;
    }
  else
    return m + 1 == n;
}


/* Record that edge from block X to block Y is an exit edge of loop LOOP_NO,
   assumming that Y is not marked with MARK. */

static void
record_exit_edge (x, y, loop_no, mark)
     rtx x, y;
     int loop_no;
     int mark;
{
  if (y != NULL && insn_info [INSN_UID (y)].mark != mark)
    /* Target of edge is not in loop */
    insn_info [INSN_UID (x)].exit_edge
      = cons (cons (head_to_block_number (y), loop_no),
	      insn_info [INSN_UID (x)].exit_edge);
}


/* Record the same information, except that block Y is now the block
   with BLOCK_NO. */

static void
record_exit_edge1 (x, block_no, loop_no, mark)
     rtx x;
     int block_no;
     int loop_no;
     int mark;
{
  if (block_no == n_basic_blocks /* Implicit last block */
      || insn_info [INSN_UID (basic_block_head [block_no])].mark != mark)
    /* Target of edge is not in loop */
    insn_info [INSN_UID (x)].exit_edge
      = cons (cons (block_no, loop_no),
	      insn_info [INSN_UID (x)].exit_edge);
}
