/******************************** -*- C -*- ****************************
 *
 *	Functions for byte code optimization & analysis
 *
 *
 ***********************************************************************/

/***********************************************************************
 *
 * Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
 * Written by Paolo Bonzini.
 *
 * This file is part of GNU Smalltalk.
 *
 * GNU Smalltalk is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License as published by the Free
 * Software Foundation; either version 2, or (at your option) any later
 * version.
 *
 * GNU Smalltalk is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * GNU Smalltalk; see the file COPYING.	 If not, write to the Free Software
 * Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 *
 ***********************************************************************/


#include "gst.h"
#include "gstpriv.h"

#include <obstack.h>

#if STDC_HEADERS
#include <string.h>		/* for memcpy */
#include <stdlib.h>
#endif /* STDC_HEADERS */
#include <stdio.h>

/* Define this to disable the peephole bytecode optimizer.  It works
   well and increases a bit performance, so there's no reason to do that
   unless you're debugging the compiler. */
/* #define NO_OPTIMIZE */

/* The JIT compiler prefers optimized bytecodes, because they are
   more regular. */
#ifdef USE_JIT_TRANSLATION
#undef NO_OPTIMIZE
#endif

/* Define this to disable type inference for SmallIntegers, which will be
   rewritten soon. */
#define NO_TYPE_INFERENCE

/* This specifies which bytecodes are pushes */
const int _gst_is_push_table[256] = {
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	/* 0 */
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	/* 16 */
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	/* 32 */
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	/* 48 */
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	/* 64 */
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	/* 80 */
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* 96 */
  1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,	/* 112 */
  1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0,	/* 128 */
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* 144 */
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* 160 */
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* 176 */
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* 192 */
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* 208 */
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* 224 */
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};				/* 240 */

/* This specifies which bytecodes are message sends */
const int _gst_is_send_table[256] = {
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* 0 */
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* 16 */
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* 32 */
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* 48 */
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* 64 */
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* 80 */
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* 96 */
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* 112 */
  0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* 128 */
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* 144 */
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,	/* 160 */
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	/* 176 */
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	/* 192 */
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	/* 208 */
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,	/* 224 */
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
};				/* 240 */

/* This specifies the stack balance of bytecodes  */
static const int stack_balance_table[256] = {
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,		  /* 0 */
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,		  /* 16 */
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,		  /* 32 */
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,		  /* 48 */
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,		  /* 64 */
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,		  /* 80 */
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 96 */
  1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 255, 0,		  /* 112 */
  1, 0, -1, 255, 255, 255, 255, -1, 1, 1, 255, 0, 0, 0, 0, 0,	  /* 128 */
  0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,	  /* 144 */
  0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1,	  /* 160 */
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 176 */
  -1, -2, 0, 0, -1, 0, -1, 0, -1, 0, -1, -1, 0, -1, 0, 0,	  /* 192 */
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,		  /* 208 */
  -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 224 */
  -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2  /* 240 */
};

/* The offsets for 2-byte jump bytecodes.  The final offset
   is jump_offsets[FIRST_BYTE & 15] + SECOND_BYTE */
static const int jump_offsets[16] = {
  -1022, -766, -510, -254, 2, 258, 514, 770,
  2, 258, 514, 770, 2, 258, 514, 770
};


/* This structure and the following one are used by the bytecode
   peephole optimizer.
   
   This one, in particular, defines where basic blocks start in the
   non- optimized bytecodes. byte is nothing more than an offset in
   those bytecodes; id is used to pair jump bytecodes with their
   destinations: in the initial scan, when we encounter a jump
   bytecode we fill two block_boundaries -- one has positive id and
   represents the destination of the jump, one has negative id (but
   the same absolute value) and represents the jump bytecode
   itself. */
typedef struct block_boundary
{
  short byte;
  short id;
}
block_boundary;

/* This structure defines how to fix the jumps after the optimized
   basic blocks are put together.  Everything is done after the
   peephole pass because this allows us to handle forward jumps and
   backward jumps in the same way.

   When single blocks are optimized, the sorted block_boundaries are
   examined one at a time.  As we process blocks, we fill an array of
   jump structures with offsets in the optimized bytecode.  We fill a
   single field at a time -- the id's sign in the block_boundary says
   which field is to be filled, the absolute value gives which jump
   structure is to be filled.  In the end, block_boundaries whose id's
   absolute value is the same are all paired. */
typedef struct jump
{
  int from;			/* where the jump bytecode lies */
  int dest;			/* where the jump bytecode lands */
}
jump;


/* This structure contains the minimum and maximum value that a
   variable can hold.  If both values are outside the possible range
   for SmallIntegers mean that the variable does not hold a
   SmallInteger.  If only one value holds a SmallInteger, it means
   either `< max' or `> min' (respectively if max or min are
   in-range). */

typedef struct constraint
{
  long min, max;
  struct constraint *source;
}
constraint;

static constraint small_int_constraint =
  { MIN_ST_INT, MAX_ST_INT, NULL };
static constraint non_int_constraint =
  { MIN_ST_INT - 1, MAX_ST_INT + 1, NULL };


/* This structure is used by the JIT compiler's bytecode analyzer to
   store information about basic blocks and the control-flow graph.
   I'm not sure of these structs' fate once the pass is
   rewritten... */
typedef struct basic_block basic_block;
typedef struct graph_edge graph_edge;

struct basic_block
{
  gst_uchar *bp;		/* ptr to first bytecode */
  int size;
  int offset;			/* from start of bytecodes */
  graph_edge *forwardEdges;	/* in edges from before */
  graph_edge *backwardEdges;	/* in edges from after */
  int numForwardEdges;		/* # of in edges from before */
  int numBackwardEdges;		/* # of in edges from after */
  int timesVisited;		/* flag for analyze_basic_block */
  int active;			/* flag for graph searches */
  basic_block *next, *satisfiedNext;	/* the out edges */
  constraint *constraints;
};

struct graph_edge
{
  basic_block *block;
  graph_edge *next;
};

/* This structure maps basic_block structure to where they start in
   memory.  It is strictly needed for forward references only, but
   actually we use it for backward references too. */
typedef struct block_pointer
{
  gst_uchar *bp;
  basic_block *block;
}
block_pointer;


/* Scan the bytecodes between FROM and TO. As they are
   scanned, they are overwritten with an optimized version; in
   the end, _gst_compile_bytecodes() is used to append them to the
   stream of optimized bytecodes.  The return value indicates
   whether at least a bytecode was generated.  */
static mst_Boolean optimize_basic_block (gst_uchar * from,
					 gst_uchar * to);


/* This compares two block_boundary structures according to their
   bytecode position. */
static int compare_blocks (const PTR a, const PTR b) FN_PURE;

/* This answers how the dirtyness of BLOCKCLOSUREOOP affects
   the block that encloses it.  */
static inline int check_inner_block (OOP blockClosureOOP);

/* This fills a table that says to which bytecodes a jump lands.
   Starting from BP, and for a total of SIZE bytes, bytecodes are
   analyzed and on output DEST[i] is non-zero if and
   only if BP[i] is the destination of a jump. It is positive
   for a forward jump and negative for a backward jump.  The number
   of jumps is returned.  */
static int make_destination_table (gst_uchar * bp,
				   int size,
				   char *dest);

/* This changes the NEXT and CODENEXT pointers in BASICBLOCKS from
   pointers to bytecodes to pointers to another basic_block structure.
   Also fills in the OFFSET field.  BLOCKHEADINGS is an array of
   NUMHEADINGS entries that pair the address of the starting bytecodes
   with the corresponding basic_block structure, and is sorted by the
   address of starting bytecode. */
static void connect_basic_blocks (basic_block * basicBlocks,
				  block_pointer * blockHeadings,
				  int numHeadings);

/* This does the real work for analyze_basic_block */
static void propagate_constraints (basic_block * basicBlock,
				   char *int_tab);

/* in flux... */
static void analyze_basic_block (basic_block * basicBlock,
				 basic_block * enterFrom,
				 gst_uchar condition,
				 int topOfStack);

/* Walk the CFG down, depth-first, from LOOPBLOCK, which initially
 * should be the same as ENTRYPOINT. */

static void unconstrain_written_variables (basic_block * loopBlock,
					   basic_block * entryPoint);

/* Add an edge between FROM and TO to the CFG */
static void add_edge (basic_block * from,
		      basic_block * to);

/* Visit the edges of the CFG that go backward from BASICBLOCK.  Pass
   each of them to FUNC, with EXTRA as the second parameter. */
static void visit_backward_edges (basic_block * basicBlock,
				  void (*func) (basic_block *, basic_block *),
				  basic_block * extra);

/* Visit the edges of the CFG that jump forward from BASICBLOCK.  Pass
   each of them to FUNC, with EXTRA as the second parameter. */
static void visit_forward_edges (basic_block * basicBlock,
				 void (*func) (basic_block *, basic_block *),
				 basic_block * extra);

/* Fill BASICBLOCK structure with information on the basic block
   whose start is pointed by BP.  All the fields but `next' and
   `offset' are filled.  Information from compute_destination_table
   is passed into DEST. 

   In addition it returns whether execution falls into the immediately
   following block at the end of this one. */

static mst_Boolean next_basic_block (basic_block * basicBlock,
				     gst_uchar * bp,
				     char *dest);

/* A helper function for bsearch-ing into an array of block_headings. */
static int find_block_heading (const PTR needle, const PTR haystack) FN_PURE;

/* Answer X with all bits after the MSB set to 1. */
static inline long mask (long int x) FN_PURE;

/* Set the constraint object RESULT to be equal to OOP (or to be marked
   as a non-integer if OOP is not a SmallInteger */
static inline int constraint_set (OOP oop,
				  constraint * result);

/* Mark N constraint objects starting at A as unknown. */
static inline int constraint_reset (constraint * a,
				    int n);

/* Compute the range of A+B into RESULT. */
static inline int constraint_sum (constraint * a,
				  constraint * b,
				  constraint * result);

/* Compute the range of A-B into RESULT. */
static inline int constraint_subtract (constraint * a,
				       constraint * b,
				       constraint * result);

/* Compute the range of A*B into RESULT. */
static inline int constraint_multiply (constraint * a,
				       constraint * b,
				       constraint * result);

/* Compute the range of A\\B into RESULT. */
static inline int constraint_remainder (constraint * a,
					constraint * b,
					constraint * result);

/* Compute the range of A//B into RESULT. */
static inline int constraint_division (constraint * a,
				       constraint * b,
				       constraint * result);

/* Compute the range of A bitAnd: B into RESULT. */
static inline int constraint_bit_and (constraint * a,
				      constraint * b,
				      constraint * result);

/* Compute the range of A bitOr: B into RESULT. */
static inline int constraint_bit_or (constraint * a,
				     constraint * b,
				     constraint * result);

/* Compute the range of A bitShift: B into RESULT. */
static inline int constraint_bit_shift (constraint * a,
					constraint * b,
					constraint * result);

/* Copy the range of A into RESULT and mark A as its source. */
static inline int constraint_copy (constraint * a,
				   constraint * result);

/* Restrict the range of A and B to what is guaranteed by A < B. */
static inline void constraint_less (constraint * a,
				    constraint * b);

/* Restrict the range of A and B to what is guaranteed by A <= B. */
static inline void constraint_less_equal (constraint * a,
					  constraint * b);

/* Restrict the range of A and B to what is guaranteed by A > B. */
static inline void constraint_greater (constraint * a,
				       constraint * b);

/* Restrict the range of A and B to what is guaranteed by A >= B. */
static inline void constraint_greater_equal (constraint * a,
					     constraint * b);

/* Restrict the range of A and B to what is guaranteed by A = B. */
static inline void constraint_equal (constraint * a,
				     constraint * b);

/* Copy a block of N constraints starting at A to B, and restrict
   the ranges of the top two according to CONDITION (a boolean
   bytecode).  If a[k].source == a[j], set b[k].source to b[j].  */
static void constraint_copy_block (constraint * a,
				   constraint * b,
				   int condition,
				   int n);

/* Restrict the top two constraints of A according to CONDITION (a boolean
   bytecode) without modifying A.  Then merge the N ranges starting by
   A with the N ranges starting at B.  If there can be multiple sources,
   for b[k] (i.e. if a[k].source == a[j] but b[k].source != b[j]) set
   b[k].source to NULL.  */
static void constraint_merge_block (constraint * a,
				    constraint * b,
				    int condition,
				    int n);

int
_gst_is_simple_return (bytecodes bytecodes)
{
  gst_uchar *bytes;
  long byteCodeLen;

  if (bytecodes == NULL)
    return (0);

  byteCodeLen = _gst_bytecode_length (bytecodes);
  bytes = bytecodes->base;

  /* check for ^self */
  if (byteCodeLen == 1 && bytes[0] == (RETURN_INDEXED | RECEIVER_INDEX))
    return (1);

  /* check for ^INSTANCE_VARIABLE */
  if (byteCodeLen == 2)
    {
      if ((bytes[0] & ~15) == PUSH_RECEIVER_VARIABLE
	  && bytes[1] == RETURN_CONTEXT_STACK_TOP)
	return (((bytes[0] & 0x0F) << 8) | 2);
    }

  else if (byteCodeLen == 3)
    {
      if (bytes[0] == PUSH_INDEXED
	  && (bytes[1] & LOCATION_MASK) == RECEIVER_LOCATION
	  && bytes[2] == RETURN_CONTEXT_STACK_TOP)
	return (((bytes[1] & ~LOCATION_MASK) << 8) | 2);
    }

  /* check for ^firstLiteral */
  if (byteCodeLen == 1)
    {
      if (bytes[0] == (RETURN_INDEXED | TRUE_INDEX))
	{
	  _gst_add_forced_object (_gst_true_oop);
	  return (3);
	}
      else if (bytes[0] == (RETURN_INDEXED | FALSE_INDEX))
	{
	  _gst_add_forced_object (_gst_false_oop);
	  return (3);
	}
      else if (bytes[0] == (RETURN_INDEXED | NIL_INDEX))
	{
	  _gst_add_forced_object (_gst_nil_oop);
	  return (3);
	}
      else
	return (0);
    }

  if (byteCodeLen == 2)
    {
      if (bytes[1] != RETURN_CONTEXT_STACK_TOP)
	return (0);

      if (bytes[0] == PUSH_LIT_CONSTANT)
	return (3);

      else if (bytes[0] == (PUSH_SPECIAL | LIT_MINUS_ONE_INDEX))
	{
	  _gst_add_forced_object (FROM_INT (-1));
	  return (3);
	}
      else if (bytes[0] == (PUSH_SPECIAL | LIT_ZERO_INDEX))
	{
	  _gst_add_forced_object (FROM_INT (0));
	  return (3);
	}
      else if (bytes[0] == (PUSH_SPECIAL | LIT_ONE_INDEX))
	{
	  _gst_add_forced_object (FROM_INT (1));
	  return (3);
	}
      else if (bytes[0] == (PUSH_SPECIAL | LIT_TWO_INDEX))
	{
	  _gst_add_forced_object (FROM_INT (2));
	  return (3);
	}
    }

  return (0);
}

int
_gst_check_kind_of_block (bytecodes bc,
			  OOP * literals)
{
  int status, newStatus;
  gst_uchar *bp, *end;
  OOP blockClosureOOP;

  status = 0;			/* clean block */
  for (bp = bc->base, end = bc->ptr; bp != end;
       bp += BYTECODE_SIZE (*bp))
    {
      switch (*bp)
	{
	case 134:
	  if ((bp[1] & 63) == POP_STORE_INTO_ARRAY)
	    break;

	  /* operation on instance variables - fall through */

	case 0:
	case 1:
	case 2:
	case 3:		/* push instance variable */
	case 4:
	case 5:
	case 6:
	case 7:
	case 8:
	case 9:
	case 10:
	case 11:
	case 12:
	case 13:
	case 14:
	case 15:
	case 96:
	case 97:
	case 98:
	case 99:		/* pop into instance var */
	case 100:
	case 101:
	case 102:
	case 103:
	case 112:
	case 120:
	case 140:		/* push self/return/set top */
	  if (status == 0)
	    status = 1;

	  break;

	case 32:
	case 33:
	case 34:
	case 35:		/* push literal constant */
	case 36:
	case 37:
	case 38:
	case 39:
	case 40:
	case 41:
	case 42:
	case 43:
	case 44:
	case 45:
	case 46:
	case 47:
	case 48:
	case 49:
	case 50:
	case 51:
	case 52:
	case 53:
	case 54:
	case 55:
	case 56:
	case 57:
	case 58:
	case 59:
	case 60:
	case 61:
	case 62:
	case 63:
	  newStatus = check_inner_block (literals[*bp & 31]);
	  if (newStatus > status)
	    {
	      if (newStatus == 31)
		{
		  return (31);
		}
	      status = newStatus;
	    }
	  break;

	case 124:		/* return from method */
	  return (31);

	case 126:		/* big literal operations */
	  if ((bp[1] & LOCATION_MASK) != PUSH_LITERAL)
	    continue;

	  blockClosureOOP =
	    literals[((bp[1] & ~LOCATION_MASK) << 6) | bp[2]];
	  newStatus = check_inner_block (blockClosureOOP);
	  if (newStatus > status)
	    {
	      if (newStatus == 31)
		return (31);

	      status = newStatus;
	    }
	  break;

	case 137:		/* push this context */
	  if (bp[1] != BLOCK_COPY_COLON_SPECIAL)
	    return (31);

	  break;

	case 128:
	case 129:
	case 130:
	case 142:		/* 2-byte stack ops */
	  if ((bp[1] & LOCATION_MASK) == RECEIVER_LOCATION
	      && status == 0)
	    status = 1;

	  else if ((bp[1] & LOCATION_MASK) == LIT_CONST_LOCATION)
	    {
	      newStatus =
		check_inner_block (literals[bp[1] & ~LOCATION_MASK]);
	      if (newStatus > status)
		{
		  if (newStatus == 31)
		    return (31);

		  status = newStatus;
		}
	    }
	  break;

	case 138:		/* outer temp operation */
	  if (status < (1 + bp[2]))
	    {
	      status = 1 + bp[2];
	      if (status > 31)	/* ouch! how deep!! */
		return (31);
	    }
	  break;
	}
    }
  return (status);
}

int
check_inner_block (OOP blockClosureOOP)
{
  int newStatus;
  gst_block_closure blockClosure;
  gst_compiled_block block;

  if (!IS_CLASS (blockClosureOOP, _gst_block_closure_class))
    return (0);

  /* This case is the most complicated -- we must check the cleanness
     of the inner block and adequately change the status. 
     full block: no way dude -- exit immediately
     clean block: same for us 
     receiver access: same for us
     access to temps in the Xth context: from the perspective of the block
     being checked here, it is like an access to temps in the (X-1)th
     context access to this block's temps: our outerContext can be nil
     either, but for safety we won't be a clean block. */
  blockClosure = (gst_block_closure) OOP_TO_OBJ (blockClosureOOP);
  block = (gst_compiled_block) OOP_TO_OBJ (blockClosure->block);
  newStatus = block->header.clean;
  switch (newStatus)
    {
    case 31:
    case 0:
    case 1:
      return (newStatus);
    default:
      return (newStatus - 1);
    }
}


int
compare_blocks (const PTR a, const PTR b)
{
  const block_boundary *ba = (const block_boundary *) a;
  const block_boundary *bb = (const block_boundary *) b;

  return (ba->byte - bb->byte);
}

bytecodes
_gst_optimize_bytecodes (bytecodes bytecodes)
{
  block_boundary *blocks, *current;
  jump *jumps;
  gst_uchar *bp;
  gst_uchar *end, *first;
  int num;

#ifdef NO_OPTIMIZE
  return (bytecodes);
#endif

  bp = bytecodes->base;
  end = bytecodes->ptr;
  blocks = alloca (sizeof (block_boundary) * (end - bp + 1));
  memset (blocks, 0, sizeof (block_boundary) * (end - bp + 1));

  /* 1) Split into basic blocks.  This part cheats so that the final
     fixup also performs jump optimization. */
  for (current = blocks, num = 0; bp != end; bp += BYTECODE_SIZE (*bp))
    {
      gst_uchar *dest = bp;
      mst_Boolean canOptimizeJump;
      do
	{
	  canOptimizeJump = false;
	  switch (*dest)
	    {
	      /* short jump */
	    case JUMP_SHORT:
	      if (dest[2] == POP_STACK_TOP)
		{
		  /* The bytecodes can only be those produced by
		     #ifTrue:/#ifFalse: 
			     0: jump to 2 
			     1: push nil 
			     2: pop stack top 

		     This could not be optimized to a single
		     pop, cause bytecodes 1 and 2 lie in different
		     basic blocks! So we rewrite it to a functionally
		     equivalent but optimizable bytecode sequence. */
		  *dest = POP_STACK_TOP;
		  break;
		}
	      /* Fall through */

	    case JUMP_SHORT | 1:
	    case JUMP_SHORT | 2:
	    case JUMP_SHORT | 3:
	    case JUMP_SHORT | 4:
	    case JUMP_SHORT | 5:
	    case JUMP_SHORT | 6:
	    case JUMP_SHORT | 7:
	      /* If bp == dest, we could end up writing a 2-byte jump
	         bytecode where space was only reserved for a 1-byte
	         jump bytecode! But if we jump to a return, we can
	         safely optimize -- returns are always one byte */
	      canOptimizeJump = (bp != dest);
	      dest += *dest;
	      dest -= 142;
	      canOptimizeJump |= (*dest >= 120 && *dest <= 125);
	      break;

	      /* pop and short jump if false */
	    case POP_JUMP_FALSE_SHORT:
	    case POP_JUMP_FALSE_SHORT | 1:
	    case POP_JUMP_FALSE_SHORT | 2:
	    case POP_JUMP_FALSE_SHORT | 3:
	    case POP_JUMP_FALSE_SHORT | 4:
	    case POP_JUMP_FALSE_SHORT | 5:
	    case POP_JUMP_FALSE_SHORT | 6:
	    case POP_JUMP_FALSE_SHORT | 7:
	      /* UNCONDITIONAL jumps to CONDITIONAL jumps must not be
	         touched! */
	      if (bp == dest)
		{
		  dest += *dest;
		  dest -= 150;
		}
	      break;

	      /* long jump, pop and long jump if true, pop and long
	         jump if false */
	    case JUMP_LONG:
	    case JUMP_LONG | 1:
	    case JUMP_LONG | 2:
	    case JUMP_LONG | 3:
	    case JUMP_LONG | 4:
	    case JUMP_LONG | 5:
	    case JUMP_LONG | 6:
	    case JUMP_LONG | 7:
	      /* 2-byte unconditional jump, we can indeed optimize it */
	      canOptimizeJump = true;
	      dest += ((signed int) dest[1]) + jump_offsets[*dest & 15];
	      break;

	    case POP_JUMP_TRUE:
	    case POP_JUMP_TRUE | 1:
	    case POP_JUMP_TRUE | 2:
	    case POP_JUMP_TRUE | 3:
	    case POP_JUMP_FALSE:
	    case POP_JUMP_FALSE | 1:
	    case POP_JUMP_FALSE | 2:
	    case POP_JUMP_FALSE | 3:
	      /* UNCONDITIONAL jumps to CONDITIONAL jumps must not be
	         touched! */
	      if (bp == dest)
		dest +=
		  ((signed int) dest[1]) + jump_offsets[*dest & 15];

	      break;

	    case RETURN_INDEXED:
	    case RETURN_INDEXED | 1:
	    case RETURN_INDEXED | 2:
	    case RETURN_INDEXED | 3:
	    case RETURN_METHOD_STACK_TOP:
	    case RETURN_CONTEXT_STACK_TOP:
	      /* Return bytecodes - patch the original jump to return
	         directly */
	      if (*bp >= JUMP_LONG)
		{
		  bp[0] = NOP_BYTECODE;
		  bp[1] = *dest;	/* fill both bytes */
		}
	      else
		*bp = *dest;

	      /* This in fact eliminated the jump, don't split in basic 
	         blocks */
	      dest = bp;
	      break;
	    }
	}
      while (canOptimizeJump);
      if (bp != dest)
	{
	  current->byte = dest - bytecodes->base;
	  current->id = ++num;
	  current++;
	  current->byte = bp - bytecodes->base;
	  current->id = -num;
	  current++;
	}
    }

  /* 2) Get the "real" block boundaries by sorting them according to
     where they happen in the original bytecode.  Note that a simple
     bucket sort is not enough because multiple jumps could end on the
     same bytecode, and the same bytecode could be both the start and
     the destination of a jump! */
  qsort (blocks, current - blocks, sizeof (block_boundary),
	 compare_blocks);

  /* 3) Optimize the single basic blocks, and reorganize into `jumps'
     the data that was put in blocks */
  jumps = alloca (sizeof (jump) * num);

  for (bp = bytecodes->base; blocks != current; blocks++)
    {
      first = bp;
      bp = bytecodes->base + blocks->byte;
      optimize_basic_block (first, bp);
      if (blocks->id > 0)
	jumps[blocks->id - 1].dest = _gst_current_bytecode_length ();

      else
	jumps[-blocks->id - 1].from = _gst_current_bytecode_length ();
    }
  optimize_basic_block (bp, end);

  _gst_free_bytecodes (bytecodes);
  bytecodes = _gst_get_bytecodes ();

  /* 4) Fix the jumps so that they correctly point to the start of the
     same basic block */
  for (; num--; jumps++)
    {
      short offset;

      bp = bytecodes->base + jumps->from;
      offset = jumps->dest - jumps->from - 2;
      if (offset == -1)
	{			/* jump to following bytecode do */
	  if (*bp >= JUMP_LONG)	/* NOT exist - use other bytecodes */
	    bp[1] = NOP_BYTECODE;	/* 2 byte jumps = nop+nop or
					   pop+nop */

	  if (*bp & 8)
	    *bp = POP_STACK_TOP;	/* pop stack top for
					   conditionals */
	  else
	    *bp = NOP_BYTECODE;	/* nop for unconditional jumps */

	  continue;
	}
      switch (*bp & ~7)
	{
	  /* short jumps */
	case JUMP_SHORT:
	  *bp = JUMP_SHORT | offset;
	  continue;
	case POP_JUMP_FALSE_SHORT:
	  *bp = POP_JUMP_FALSE_SHORT | offset;
	  continue;

	  /* long jumps */
	case JUMP_LONG:
	  *bp = JUMP_LONG | 4;
	  break;
	case POP_JUMP_TRUE:
	  *bp &= ~3;
	  break;
	}
      *bp++ += offset >> 8;
      *bp = offset & 255;
    }

  return (bytecodes);
}

mst_Boolean
optimize_basic_block (gst_uchar * from,
		      gst_uchar * to)
{
#define NEXT(size) BEGIN_MACRO {	\
  n = size;				\
  opt += size;				\
  continue;				\
} END_MACRO

#define REPLACE(size) BEGIN_MACRO {	\
  n = size;				\
  continue;				\
} END_MACRO

#define COPY BEGIN_MACRO {		\
  n = BYTECODE_SIZE(byte);		\
  opt++;				\
  if(n != 1) *opt++ = *bp++;		\
  if(n == 3) *opt++ = *bp++;		\
  continue;				\
} END_MACRO

#define BEGIN	     if(0) {
#define BYTECODE(n)  } else if(byte == (n)) {
#define RANGE(a, b)  } else if((unsigned char)(byte - (a)) < ((b) - (a))) {
#define EITHER(a, b) } else if(byte == (a) b) {
#define OR(b)  		       || byte == (b)
#define CONDITION(c) } else if(c) {
#define NO_MATCH     } else {
#define END	     }


  gst_uchar byte, *bp, *opt;
  int n;

  bp = opt = from;
  n = 0;
  while (bp != to)
    {
      byte = *opt = *bp++;
      BEGIN
	RANGE (RETURN_INDEXED, RETURN_CONTEXT_STACK_TOP) 
	  opt++;
          break;		/* this `break' performs unreachable
				   code elimination! */

	BYTECODE (NOP_BYTECODE) 
	  REPLACE (n);

        BYTECODE (NOT_EQUAL_SPECIAL) 
	  if (!n)
	    NEXT (1);
	  if (opt[-1] == (PUSH_SPECIAL | NIL_INDEX))
	    {			/* x ~= nil */
	      opt[-1] = NOT_NIL_SPECIAL;
	      REPLACE (1);
	    }
	  NEXT (1);

	EITHER (SAME_OBJECT_SPECIAL, OR (EQUAL_SPECIAL)) 
	  if (!n)
	    NEXT (1);
	  if (opt[-1] == (PUSH_SPECIAL | NIL_INDEX))
	    {			/* x = nil, x == nil */
	      opt[-1] = IS_NIL_SPECIAL;
	      REPLACE (1);
	    }
	  NEXT (1);

        BYTECODE (POP_STACK_TOP) 
	  if (n)
	    {
	      byte = opt[-n];
	      BEGIN 
		CONDITION (IS_PUSH_BYTECODE (byte))	/* push/pop */
		  opt -= n;
	          NEXT (0);

		BYTECODE (STORE_INDEXED)	/* store/pop */
		  byte = *--opt;	/* get data byte */
		  opt--;		/* move to opcode */
		  if (byte < 8)
		    {
		      *opt = POP_RECEIVER_VARIABLE | byte;
		      NEXT (1);
		    }
		  else if (byte >= 64 && byte < 72)
		    {
		      *opt = POP_TEMPORARY_VARIABLE | (byte & 63);
		      NEXT (1);
		    }
		  else
		    {
		      *opt = POP_STORE_INDEXED;
		      NEXT (2);
		    }
		  
		EITHER (BIG_LITERALS_BYTECODE, 
			OR (BIG_INSTANCE_BYTECODE) 
			OR (OUTER_TEMP_BYTECODE))

		  byte = opt[-2];	/* get second byte */
		  if (byte < POP_STORE_VARIABLE)
		    {
		      opt -= n;		/* push/pop */
		      NEXT (0);
		    }
		  else if (byte >= STORE_VARIABLE)
		    {
		      opt[-2] ^= (POP_STORE_VARIABLE ^ STORE_VARIABLE);
		      REPLACE (3);
		    }
	      END
	    }

	  if (bp != to && (*bp & ~3) == RETURN_INDEXED)
	    {
	      *opt++ = *bp++;	/* pop/return */
	      break;		/* kill unreachable code */
	    }
	  NEXT (1);

	CONDITION (IS_PUSH_BYTECODE (byte))	/* push/push -> dup */
	  if (!n)
	    COPY;
	  if (opt[-n] == *opt)
	    {
	      if (n == 1)
		{
		  *opt = DUP_STACK_TOP;
		  NEXT (1);
		}
	      else if (opt[-1] == *bp)
		{
		  *opt = DUP_STACK_TOP;
		  bp++;
		  NEXT (1);
		}
	    }

	  BEGIN			/* pop-store/push -> store */
	    RANGE (PUSH_RECEIVER_VARIABLE, PUSH_RECEIVER_VARIABLE | 7)
	      if (opt[-n] == (POP_RECEIVER_VARIABLE | (byte & 15)))
		{
		  opt[-1] = STORE_INDEXED;
		  *opt++ = RECEIVER_LOCATION | (byte & 15);
		  REPLACE (2);
		}
	      if (opt[-n] == POP_STACK_TOP)
	        {			/* pop/push -> replace */
		  opt--;
		  *opt++ = REPLACE_INDEXED;
		  *opt++ = RECEIVER_LOCATION | (byte & 15);
		  REPLACE (2);
		}

	    RANGE (PUSH_RECEIVER_VARIABLE | 8, PUSH_RECEIVER_VARIABLE | 15)
	      if (opt[-n] == POP_STORE_INDEXED
		  && opt[-1] == (RECEIVER_LOCATION | (byte & 15)))
		{
		  opt[-2] = STORE_INDEXED;
		  REPLACE (2);
		}
	      if (opt[-n] == POP_STACK_TOP)
		{
		  opt--;
		  *opt++ = REPLACE_INDEXED;
		  *opt++ = RECEIVER_LOCATION | (byte & 15);
		  REPLACE (2);
		}

	    RANGE (PUSH_TEMPORARY_VARIABLE, PUSH_TEMPORARY_VARIABLE | 7)
	      if (opt[-n] == (POP_TEMPORARY_VARIABLE | (byte & 15)))
		{
		  opt[-1] = STORE_INDEXED;
		  *opt++ = TEMPORARY_LOCATION | (byte & 15);
		  REPLACE (2);
		}
	      if (opt[-n] == POP_STACK_TOP)
		{
		  opt--;
		  *opt++ = REPLACE_INDEXED;
		  *opt++ = TEMPORARY_LOCATION | (byte & 15);
		  REPLACE (2);
		}

	    RANGE (PUSH_TEMPORARY_VARIABLE | 8, PUSH_TEMPORARY_VARIABLE | 15)
	      if (opt[-n] == POP_STORE_INDEXED
		  && opt[-1] == (TEMPORARY_LOCATION | (byte & 15)))
		{
		  opt[-2] = STORE_INDEXED;
		  REPLACE (2);
		}
	      if (opt[-n] == POP_STACK_TOP)
	        {
		  opt--;
		  *opt++ = REPLACE_INDEXED;
		  *opt++ = TEMPORARY_LOCATION | (byte & 15);
		  REPLACE (2);
		}

	    RANGE (PUSH_LIT_VARIABLE, PUSH_LIT_VARIABLE | 31)
	      if (opt[-n] == POP_STORE_INDEXED
		  && opt[-1] == (LIT_VAR_LOCATION | (byte & 31)))
		{
		  opt[-2] = STORE_INDEXED;
		  REPLACE (2);
		}
	      if (opt[-n] == POP_STACK_TOP)
		{
		  opt--;
		  *opt++ = REPLACE_INDEXED;
		  *opt++ = LIT_VAR_LOCATION | (byte & 31);
		  REPLACE (2);
		}

	    RANGE (PUSH_LIT_CONSTANT, PUSH_LIT_CONSTANT | 31)
	      if (opt[-n] == POP_STACK_TOP)
		{
		  opt--;
		  *opt++ = REPLACE_INDEXED;
		  *opt++ = LIT_CONST_LOCATION | (byte & 31);
		  REPLACE (2);
		}

	    BYTECODE (PUSH_SPECIAL | RECEIVER_INDEX)
	      if (opt[-n] == POP_STACK_TOP)
		{
		  opt[-1] = REPLACE_SELF;
		  REPLACE (1);
		}

	    BYTECODE (PUSH_SPECIAL | LIT_ONE_INDEX)
	      if (opt[-n] == POP_STACK_TOP)
		{
		  opt[-1] = REPLACE_ONE;
		  REPLACE (1);
		}

	    BYTECODE (PUSH_INDEXED)
	      byte = *bp++;
	      if (opt[-n] == POP_STORE_INDEXED)
		{
		  opt[-2] = STORE_INDEXED;
		  if (opt[-1] == byte)
		    REPLACE (2);
		  else
		    *opt = REPLACE_INDEXED;
		}
	      else if (opt[-n] == POP_STACK_TOP)
		*--opt = REPLACE_INDEXED;

	      opt[1] = byte;
	      NEXT (2);
	  END;
	  NEXT (1);			/* no match */

        BYTECODE (BIG_INSTANCE_BYTECODE) 
	  if (!n || opt[-n] != byte)
	    COPY;

          byte = opt[-2];		/* get second byte */
	  if (byte < PUSH_VARIABLE)
	    ;	  /* do nothing */
	  else if (byte < POP_STORE_VARIABLE)
	    {
	      if (byte == *bp && opt[-1] == bp[1])
		{			/* push/push -> dup */
		  *opt = DUP_STACK_TOP;
		  bp += 2;
		  NEXT (1);
		}
	    }
	  else if (byte < STORE_VARIABLE)
	    {			/* pop-store/push -> store */
	      if ((byte & 63) == (*bp & 63) && opt[-1] == bp[1])
		{
		  opt[-2] ^= (POP_STORE_VARIABLE ^ STORE_VARIABLE);
		  bp += 2;
		  REPLACE (3);
		}
	    }
	  
	  opt++;
	  *opt++ = *bp++;
	  *opt++ = *bp++;
	  REPLACE (3);

        EITHER (BIG_LITERALS_BYTECODE,
	        OR (OUTER_TEMP_BYTECODE)) 
	  if (!n || opt[-n] != byte)
	    COPY;

          byte = opt[-2];		/* get second byte */
	  if (byte < POP_STORE_VARIABLE)
	    {
	      if (byte == *bp && opt[-1] == bp[1])
		{			/* push/push -> dup */
		  *opt = DUP_STACK_TOP;
		  bp += 2;
		  NEXT (1);
		}
	    }
	  else if (byte < STORE_VARIABLE)
	    {			/* pop-store/push -> store */
	      if ((byte & 63) == (*bp & 63) && opt[-1] == bp[1])
		{
		  opt[-2] ^= (POP_STORE_VARIABLE ^ STORE_VARIABLE);
		  bp += 2;
		  REPLACE (3);
		}
	    }
	  
	  opt++;
	  *opt++ = *bp++;
	  *opt++ = *bp++;
	  REPLACE (3);

	NO_MATCH
	  COPY;
      END;
    }

  _gst_compile_bytecodes (from, opt);
  return (opt != from);
}


void
_gst_compute_stack_positions (gst_uchar * bp,
			      int size,
			      PTR * base,
			      PTR ** pos)
{
  gst_uchar *end;
  int balance, ofs;
  static const int stackOpBalanceTable[16] = {
    1, 1, -1, 0,		/* 126 (push, push, pop/store, store) */
    0, 0, 0, 0,			/* unused */
    -1, 1, -1, 0,		/* 134 (pop/store, push, pop/store,
				   store) */
    255, 1, -1, 0,		/* 138 (invalid, push, pop/store,
				   store) */
  };

  memzero (pos, sizeof (PTR *) * size);

  pos[0] = base;
  for (end = bp + size; bp != end;
       pos += BYTECODE_SIZE (*bp), bp += BYTECODE_SIZE (*bp))
    {

      switch (*bp)
	{
	  /* 3-byte stack operations */
	case BIG_LITERALS_BYTECODE:
	case BIG_INSTANCE_BYTECODE:
	case OUTER_TEMP_BYTECODE:
	  balance = stackOpBalanceTable[(*bp - 126) | (bp[1] >> 6)];
	  break;

	  /* 1-byte sends */
	case SEND1EXT_BYTE:
	case SEND_SUPER1EXT_BYTE:
	  balance = -(bp[1] >> 5);
	  break;

	  /* 2-byte send */
	case SEND2EXT_BYTE:
	  balance = -(bp[1] & 31);
	  break;

	  /* Everything else */
	default:
	  balance = stack_balance_table[*bp];
	  if (*bp >= JUMP_SHORT)
	    {
	      if (*bp < JUMP_LONG)
		{
		  /* short jumps */
		  ofs = (*bp & 7) + 2;
		  pos[ofs] = pos[0] + balance;
		}
	      else if (*bp < PLUS_SPECIAL)
		{
		  /* long jumps */
		  ofs = ((signed int) bp[1]) + jump_offsets[*bp & 15];
		  if (ofs > 0)
		    pos[ofs] = pos[0] + balance;
		}
	    }
	}

      if (balance == 255)
	{
	  _gst_errorf
	    ("Invalid bytecode encountered during bytecode analysis");
	  balance = 0;
	}

      if (!pos[BYTECODE_SIZE (*bp)])
	pos[BYTECODE_SIZE (*bp)] = pos[0] + balance;
    }
}


static int stack_depth;
static constraint *self_constraint;
static char *int_tab;
static OOP class_oop;
static OOP *literals;
static struct obstack *data_obstack;

void
_gst_analyze_bytecodes (OOP methodOOP,
			 int size,
			 char *dest,
			 char *inIntTab,
			 struct obstack *obstack)
{
  gst_uchar *bp, *end;
  basic_block *basicBlocks, *bb;
  block_pointer *blockHeadings;
  int structSize, numJumps, numTemps, n;

  data_obstack = obstack;
  int_tab = inIntTab;
  memzero (int_tab, sizeof (char) * size);

  bp = GET_METHOD_BYTECODES (methodOOP);
  numJumps = make_destination_table (bp, size, dest);

#ifdef NO_TYPE_INFERENCE
  return;
#endif

  class_oop = GET_METHOD_CLASS (methodOOP);
  literals = GET_METHOD_LITERALS (methodOOP);

  if (OOP_CLASS (methodOOP) == _gst_compiled_block_class)
    {
      stack_depth = GET_BLOCK_HEADER (methodOOP).depth;
      numTemps = GET_BLOCK_HEADER (methodOOP).numTemps;
    }
  else
    {
      stack_depth = GET_METHOD_HEADER (methodOOP).stack_depth;
      numTemps = GET_METHOD_HEADER (methodOOP).numTemps;
    }

  stack_depth = CTX_SIZE (stack_depth);

  self_constraint =
    (class_oop ==
     _gst_small_integer_class) ? &small_int_constraint :
    &non_int_constraint,
    /* We have * 2 because a basic block has a single entry and a
       single exit. */
    blockHeadings = (block_pointer *) obstack_alloc (data_obstack,
						     sizeof
						     (block_pointer) *
						     numJumps * 2 + 1);

  structSize =
    sizeof (basic_block) + sizeof (constraint) * (stack_depth - 1);
  bb = basicBlocks =
    (basic_block *) obstack_alloc (data_obstack, structSize);

  for (end = bp + size, n = 0;;)
    {
      mst_Boolean fallsThrough;
      blockHeadings[n].bp = bp;
      blockHeadings[n].block = bb;
      fallsThrough = next_basic_block (bb, bp, dest);

      /* Go on, please ... */
      bp += bb->size;
      if (bp == end)
	break;

      dest += bb->size;
      n++;

      /* Make the previous block point to this one */
      bb->next = fallsThrough ? (basic_block *) bp : NULL;
      bb = (basic_block *) obstack_alloc (data_obstack, structSize);
    }

  connect_basic_blocks (basicBlocks, blockHeadings, n);

  /* Calls itself recursively, visiting the graph depth-first */
  constraint_reset (basicBlocks->constraints, numTemps);
  analyze_basic_block (basicBlocks, NULL, 0, numTemps);

  obstack_free (data_obstack, basicBlocks);
}

int
make_destination_table (gst_uchar * bp,
			int size,
			char *dest)
{
  gst_uchar *end;
  int n;

  memzero (dest, sizeof (char) * size);

  for (n = 0, end = bp + size; bp != end;
       dest += BYTECODE_SIZE (*bp), bp += BYTECODE_SIZE (*bp))
    {

      if (*bp >= JUMP_SHORT)
	{
	  if (*bp < JUMP_LONG)
	    {
	      /* short jumps */
	      dest[(*bp & 7) + 2] = 1;
	      n++;
	    }
	  else if (*bp < PLUS_SPECIAL)
	    {
	      int ofs;
	      ofs = ((signed int) bp[1]) + jump_offsets[*bp & 15];
	      /* long jumps */
	      dest[ofs] = (ofs <= 0) ? -1 : 1;
	      n++;
	    }
	}
    }

  return (n);
}


int
find_block_heading (const PTR needle,
		    const PTR haystack)
{
  const gst_uchar *ofs = (const gst_uchar *) needle;
  const block_pointer *blkPtr = (const block_pointer *) haystack;

  return (ofs - blkPtr->block->bp);
}

void
connect_basic_blocks (basic_block * basicBlocks,
		      block_pointer * blockHeadings,
		      int numHeadings)
{
  block_pointer *heading, *thisOne;
  basic_block *successor, *thisBlock;
  gst_uchar *base = blockHeadings[0].bp;
  int left;

  for (thisOne = blockHeadings, left = numHeadings; left--; thisOne++)
    {
      thisBlock = thisOne->block;
      thisBlock->offset = thisBlock->bp - base;

      if (!thisBlock->next)
	{
	  /* This block returns. */
	  continue;
	}

      if ((gst_uchar *) thisBlock->next == thisOne[1].bp)
	{
	  /* Optimize the common case when the successor follows this
	     block in the bytecode stream */
	  successor = thisOne[1].block;
	}
      else
	{
	  heading = bsearch (thisBlock->next,
			     blockHeadings, numHeadings,
			     sizeof (block_pointer),
			     find_block_heading);

	  successor = heading->block;
	}
      thisBlock->next = successor;
      add_edge (thisBlock, successor);

      if (thisBlock->satisfiedNext)
	{
	  /* Do another search if we have a conditional branch.
	     Conditional jumps only go forward, so we can reduce the
	     binary search range. (actually, because of caching it
	     might be faster to search everything...) */
	  heading = bsearch (thisBlock->satisfiedNext,
			     thisOne + 1, left - 1,
			     sizeof (block_pointer),
			     find_block_heading);

	  successor = heading->block;
	  thisBlock->satisfiedNext = successor;
	  add_edge (thisBlock, successor);
	}
    }
}

mst_Boolean
next_basic_block (basic_block * basicBlock,
		  gst_uchar * bp,
		  char *dest)
{
  mst_Boolean fallsThrough = true;

  basicBlock->bp = bp;
  basicBlock->satisfiedNext = NULL;
  basicBlock->numBackwardEdges = basicBlock->numForwardEdges = 0;
  basicBlock->active = 0;

  for (;;)
    {
      /* Go on until we find a branch bytecode, a return bytecode, or a 
         jump destination. */
      if ((*bp >= RETURN_INDEXED && *bp <= RETURN_CONTEXT_STACK_TOP)
	  || (*bp >= JUMP_SHORT && *bp < PLUS_SPECIAL)
	  || (*dest && bp != basicBlock->bp))
	break;

      dest += BYTECODE_SIZE (*bp);
      bp += BYTECODE_SIZE (*bp);
    }

  /* If we got here because a jump lands on bp, we are done -- that
     bytecode actually belongs to the *next* basic block. Else, we must 
     increment bp to include the jump or return in the basic block, and 
     fill the `next' or `satisfiedNext' field for unconditional or
     conditional jumps, respectively. */
  if (!*dest || bp == basicBlock->bp)
    {
      if (*bp >= JUMP_SHORT && *bp < PLUS_SPECIAL)
	{
	  int ofs;

	  ofs = *bp < JUMP_LONG
	    ? (*bp & 7) + 2 : ((signed int) bp[1]) +
	    jump_offsets[*bp & 15];

	  if ((*bp & 8) != (JUMP_SHORT & 8))
	    {
	      basicBlock->satisfiedNext = (basic_block *) (bp + ofs);
	      fallsThrough = true;
	    }
	  else
	    {
	      basicBlock->next = (basic_block *) (bp + ofs);
	      fallsThrough = false;
	    }
	}
      else
	{
	  /* A return */
	  fallsThrough = false;
	}

      /* Include the jump or return in the basic block */
      bp += BYTECODE_SIZE (*bp);
    }

  basicBlock->size = bp - basicBlock->bp;
  return (fallsThrough);
}

/* This does the actual work for analyze_basic_block, walking through the 
 * block's bytecodes.  This however does not know about loops, nor does
 * it know about multiple predecessors.  The algorithm is as follows:
 * - trace the execution of the bytecodes as long as they're not jumps
 *   or boolean operations.  Sends to self are inspected for primitives
 *   that return SmallIntegers.
 *
 * - on a boolean operation, check if it is followed by a jump, and if
 *   both LHS and RHS are known to be SmallIntegers.  In this case,
 *   move our own output constraints to the both successor BasicBlocks,
 *   merging either the result of the boolean operation, or its inverse,
 *   with the current constraints about the LHS or RHS (and its source,
 *   and its source's source, etc...)
 *
 *	(Simple) example:
 *		i <= 1000
 *
 *	Stack contains:
 *		i		MIN_ST_INT .. MAX_ST_INT   <----.
 *		...						| source
 *		LHS		MIN_ST_INT .. MAX_ST_INT -------'
 *		RHS		1000	   .. 1000
 *
 *	The `true' basic block will have i constrained to MIN_ST_INT .. 1000,
 *	the `false' basic block will have i constrained to 1001 .. MAX_ST_INT.
 *
 *	(Complex) example:
 *		i <= j
 *
 *	Stack contains:
 *		j		1          .. 400   <--------------.
 *		i		MIN_ST_INT .. 250   <-----------.  |
 *		...						|  |
 *		LHS		MIN_ST_INT .. 250   ------------'  |
 *		RHS		1   	   .. 400   ---------------'
 *
 *	The `true' basic block will have no gain, because its constraints
 *	will still be 1..400 and MIN_ST_INT..250.  But imposing i.min > j.min
 *	and j.max < i.max (see constraint_less_equal) in the `false' basic
 *	block, constrains i to 2 .. 250 and j to 1 .. 249.
 *
 * - on an unconditional jump, go on evaluating the destination.
 *   That means, move our own output constraints to the successor
 *   basic_block and recurse.
 *
 * - on a jump that was not preceded by a boolean, evaluate both
 *   branches, moving our own output constraints to both successor
 *   BasicBlocks and recursing.
 *
 * Actually, the recursion part is done in analyze_basic_block, not here.
 */
void
propagate_constraints (basic_block * basicBlock,
			    char *int_tab)
{
  constraint *stackp, *temps;
  gst_uchar *bp, *end, *intTabPtr;
  gst_uchar b, inverseCondition;
  int n, spOffset, top;

  for (bp = basicBlock->bp, end = bp + basicBlock->size,
       stackp = temps = basicBlock->constraints,
       intTabPtr = int_tab + basicBlock->offset,
       top = TOP_UNKNOWN;
       bp < end;
       bp += BYTECODE_SIZE (b), intTabPtr += BYTECODE_SIZE (b))
    {

      *intTabPtr = top;
      b = *bp;
      switch (b)
	{
	case 0:
	case 1:
	case 2:
	case 3:		/* push inst var */
	case 4:
	case 5:
	case 6:
	case 7:
	case 8:
	case 9:
	case 10:
	case 11:
	case 12:
	case 13:
	case 14:
	case 15:
	  top = constraint_reset (stackp++, 1);
	  continue;

	case 16:
	case 17:
	case 18:
	case 19:
	case 20:
	case 21:
	case 22:
	case 23:
	case 24:
	case 25:
	case 26:
	case 27:
	case 28:
	case 29:
	case 30:
	case 31:
	  top = constraint_copy (&temps[b - 16], stackp++);
	  continue;

	case 32:
	case 33:
	case 34:
	case 35:		/* push literal */
	case 36:
	case 37:
	case 38:
	case 39:
	case 40:
	case 41:
	case 42:
	case 43:
	case 44:
	case 45:
	case 46:
	case 47:
	case 48:
	case 49:
	case 50:
	case 51:
	case 52:
	case 53:
	case 54:
	case 55:
	case 56:
	case 57:
	case 58:
	case 59:
	case 60:
	case 61:
	case 62:
	case 63:
	  top = constraint_set (literals[b - 32], stackp++);
	  continue;

	case 64:
	case 65:
	case 66:
	case 67:		/* push global */
	case 68:
	case 69:
	case 70:
	case 71:
	case 72:
	case 73:
	case 74:
	case 75:
	case 76:
	case 77:
	case 78:
	case 79:
	case 80:
	case 81:
	case 82:
	case 83:
	case 84:
	case 85:
	case 86:
	case 87:
	case 88:
	case 89:
	case 90:
	case 91:
	case 92:
	case 93:
	case 94:
	case 95:
	  top = constraint_reset (stackp++, 1);
	  continue;

	case 96:
	case 97:
	case 98:
	case 99:		/* pop inst var */
	case 100:
	case 101:
	case 102:
	case 103:
	  stackp--;
	  top = TOP_UNKNOWN;
	  continue;

	case 104:
	case 105:
	case 106:
	case 107:		/* pop temp var */
	case 108:
	case 109:
	case 110:
	case 111:
	  top = constraint_copy (--stackp, &temps[b - 104]);
	  continue;

	case 112:		/* push self */
	  top = constraint_copy (self_constraint, stackp++);
	  continue;

	case 113:
	case 114:
	case 115:		/* push true/false/nil */
	  top = constraint_reset (stackp++, 1);
	  continue;

	case 116:
	case 117:
	case 118:
	case 119:		/* push -1..2 */
	  top = constraint_set (FROM_INT (b - 117), stackp++);
	  continue;

	case 120:
	case 121:
	case 122:
	case 123:		/* returns */
	case 124:
	case 125:
	  continue;

	case 126:		/* big literals */
	  n = (bp[1] * 256 + bp[2]) & 16383;
	  switch (bp[1] & OPERATION_MASK)
	    {
	    case PUSH_LITERAL:
	      top = constraint_set (literals[n], stackp++);
	      break;
	    case PUSH_VARIABLE:
	      top = constraint_reset (stackp++, 1);
	      break;
	    case POP_STORE_VARIABLE:
	      top = TOP_UNKNOWN;
	      stackp--;
	      break;
	    case STORE_VARIABLE:
	      break;
	    }
	  continue;

	case 128:		/* push */
	  n = bp[1] & 63;
	  switch (bp[1] & LOCATION_MASK)
	    {
	    case RECEIVER_LOCATION:
	      top = constraint_reset (stackp++, 1);
	      break;
	    case TEMPORARY_LOCATION:
	      top = constraint_copy (&temps[n], stackp++);
	      break;
	    case LIT_CONST_LOCATION:
	      top = constraint_set (literals[n], stackp++);
	      break;
	    case LIT_VAR_LOCATION:
	      top = constraint_reset (stackp++, 1);
	      break;
	    }
	  continue;

	case 129:		/* store */
	  n = bp[1] & 63;
	  if ((bp[1] & LOCATION_MASK) == TEMPORARY_LOCATION)
	    top = constraint_copy (stackp - 1, &temps[n]);

	  continue;

	case 130:		/* pop/store */
	  n = bp[1] & 63;
	  --stackp;
	  if ((bp[1] & LOCATION_MASK) == TEMPORARY_LOCATION)
	    top = constraint_copy (stackp, &temps[n]);

	  continue;

	case 131:		/* 2-byte send */
	case 133:
	  stackp -= (bp[1] >> 5);
	  top = constraint_reset (stackp - 1, 1);
	  continue;

	case 132:		/* 3-byte send */
	  stackp -= (bp[1] & 31);
	  top = constraint_reset (stackp - 1, 1);
	  continue;

	case 134:		/* big instance var */
	  switch (bp[1] & OPERATION_MASK)
	    {
	    case POP_STORE_INTO_ARRAY:
	      top = TOP_UNKNOWN;
	      stackp--;
	      break;
	    case PUSH_VARIABLE:
	      top = constraint_reset (stackp++, 1);
	      break;
	    case POP_STORE_VARIABLE:
	      top = TOP_UNKNOWN;
	      stackp--;
	      break;
	    case STORE_VARIABLE:
	      break;
	    }
	  continue;

	case 135:		/* pop */
	  stackp--;
	  top = TOP_UNKNOWN;
	  continue;

	case 136:		/* dup */
	  top = constraint_copy (stackp - 1, stackp);
	  stackp++;
	  continue;

	case 137:		/* push thisContext */
	  top = constraint_reset (stackp++, 1);
	  continue;

	case 138:
	  switch (bp[1] & OPERATION_MASK)
	    {
	    case 0:
	      break;
	    case PUSH_VARIABLE:
	      top = constraint_reset (stackp++, 1);
	      break;
	    case POP_STORE_VARIABLE:
	      top = TOP_UNKNOWN;
	      stackp--;
	      break;
	    case STORE_VARIABLE:
	      break;
	    }
	  continue;

	case 139:		/* nop */
	  continue;

	case 140:		/* top = self */
	  top = constraint_copy (self_constraint, stackp);
	  continue;

	case 141:		/* top = 1 */
	  top = constraint_set (FROM_INT (1), stackp);
	  continue;

	case 142:		/* set top */
	  n = bp[1] & 63;
	  switch (bp[1] & LOCATION_MASK)
	    {
	    case RECEIVER_LOCATION:
	      top = constraint_reset (stackp, 1);
	      break;
	    case TEMPORARY_LOCATION:
	      top = constraint_copy (&temps[n], stackp);
	      break;
	    case LIT_CONST_LOCATION:
	      top = constraint_set (literals[n], stackp);
	      break;
	    case LIT_VAR_LOCATION:
	      top = constraint_reset (stackp, 1);
	      break;
	    }
	  continue;

	case 143:		/* exit */
	  continue;

	case 152:
	case 153:
	case 154:
	case 155:		/* conditional jump */
	case 156:
	case 157:
	case 158:
	case 159:
	case 168:
	case 169:
	case 170:
	case 171:
	case 172:
	case 173:
	case 174:
	case 175:
	  stackp--;
	  analyze_basic_block (basicBlock->satisfiedNext, basicBlock,
			       0, stackp - basicBlock->constraints);
	  /* fall through */

	case 144:
	case 145:
	case 146:
	case 147:		/* unconditional jump */
	case 148:
	case 149:
	case 150:
	case 151:
	case 160:
	case 161:
	case 162:
	case 163:
	case 164:
	case 165:
	case 166:
	case 167:
	  analyze_basic_block (basicBlock->next, basicBlock,
			       0, stackp - basicBlock->constraints);

	  top = TOP_UNKNOWN;
	  continue;

	case 176:		/* plus */
	  stackp--;
	  top = constraint_sum (stackp - 1, stackp, stackp - 1);
	  continue;

	case 177:		/* minus */
	  stackp--;
	  top = constraint_subtract (stackp - 1, stackp, stackp - 1);
	  continue;

	case 184:		/* times */
	  stackp--;
	  top = constraint_multiply (stackp - 1, stackp, stackp - 1);
	  continue;

	case 186:		/* remainder */
	  stackp--;
	  top = constraint_remainder (stackp - 1, stackp, stackp - 1);
	  continue;

	case 188:		/* shift */
	  stackp--;
	  top = constraint_bit_shift (stackp - 1, stackp, stackp - 1);
	  continue;

	case 189:		/* int division */
	  stackp--;
	  top = constraint_division (stackp - 1, stackp, stackp - 1);
	  continue;

	case 190:		/* and */
	  stackp--;
	  top = constraint_bit_and (stackp - 1, stackp, stackp - 1);
	  continue;

	case 191:		/* or */
	  stackp--;
	  top = constraint_bit_or (stackp - 1, stackp, stackp - 1);
	  continue;

	case 194:
	case 195:
	case 197:
	case 199:		/* miscellaneous 1-arg */
	case 201:
	case 204:
	case 206:
	case 207:

	case 208:
	case 209:
	case 210:
	case 211:		/* 1-byte sends, 0 arg */
	case 212:
	case 213:
	case 214:
	case 215:
	case 216:
	case 217:
	case 218:
	case 219:
	case 220:
	case 221:
	case 222:
	case 223:
	  top = constraint_reset (stackp - 1, 1);
	  continue;


	case 185:
	case 192:
	case 196:
	case 198:		/* miscellaneous 1-argument */
	case 200:
	case 202:
	case 203:
	case 205:

	case 224:
	case 225:
	case 226:
	case 227:		/* 1-byte sends, 1 arg */
	case 228:
	case 229:
	case 230:
	case 231:
	case 232:
	case 233:
	case 234:
	case 235:
	case 236:
	case 237:
	case 238:
	case 239:
	  stackp -= 1;
	  top = constraint_reset (stackp - 1, 1);
	  continue;

	case 193:		/* #at:put: */

	case 240:
	case 241:
	case 242:
	case 243:		/* 1-byte sends, 2 arg */
	case 244:
	case 245:
	case 246:
	case 247:
	case 248:
	case 249:
	case 250:
	case 251:
	case 252:
	case 253:
	case 254:
	case 255:
	  stackp -= 2;
	  top = constraint_reset (stackp - 1, 1);
	  continue;
	}

      /* Here we have a relational operator */
      if ((bp[1] < 152 || bp[1] > 159) &&	/* not a 1-byte
						   conditional jump */
	  (bp[1] < 168 || bp[1] > 175))
	{			/* not a 2-byte conditional jump */
	  stackp -= 1;
	  top = constraint_reset (stackp - 1, 1);
	  continue;
	}

      /* We can make the constraints more strict */
      spOffset = stackp - basicBlock->constraints;
      inverseCondition = (b == 182 || b == 183) ? b ^ 1 : b ^ 3;
      analyze_basic_block (basicBlock->satisfiedNext, basicBlock, b,
			   spOffset - 1);
      analyze_basic_block (basicBlock->next, basicBlock,
			   inverseCondition, spOffset - 1);

      /* We just processed the jump -- the basic block ends here.
         Update the int_tab for the jump's bytecode. */
      intTabPtr[1] = TOP_IS_NOT_INTEGER;
      break;
    }
}

/* The most complicated thing to do here is handling loops.  We scan
 * the loop body to search for written variables and, if they were
 * SmallIntegers, we widen their constraints at the loop entry point,
 * setting the worst possible range (either MIN_ST_INT .. MAX_ST_INT,
 * <= MAX_ST_INT, or >= MIN_ST_INT).
 *
 * We then perform our normal chores, which will pass a reduced
 * constraint to the loop (for example to MIN_ST_INT .. 1000 for a
 * `1 to: 1000 do: [ :i | ... ]' loop).  When we re-enter the
 * basic block, we merge our in-constraint with the loop's out-constraint
 * (in this case, we will merge 1 .. 1 with MIN_ST_INT .. 1001 --
 * 1001 because the loop body increments i) and propagate constraints
 * without doing any recursion in the loop.
 *
 * What if the loop decrements i instead of incrementing? The same
 * holds, because we re-enter the basic-block with a <= 999 constraint.
 */
void
analyze_basic_block (basic_block * basicBlock,
		     basic_block * enterFrom,
		     gst_uchar condition,
		     int topOfStack)
{
  if (!basicBlock)
    return;

  basicBlock->timesVisited++;

  if (enterFrom)
    {
      /* First, work from the back */
      if (basicBlock->numForwardEdges)
	{
	  if (basicBlock->timesVisited == 0)
	    {
	      constraint_merge_block (enterFrom->constraints,
				      basicBlock->constraints,
				      condition, topOfStack);
	    }
	  else
	    {
	      constraint_copy_block (enterFrom->constraints,
				     basicBlock->constraints, condition,
				     topOfStack);
	    }

	  if (--basicBlock->numForwardEdges)
	    {
	      /* Only do the actual analysis the last time we're
	         through */
	      return;
	    }

	  basicBlock->timesVisited = -basicBlock->numBackwardEdges;
	}

      if (basicBlock->numBackwardEdges)
	{
	  if (basicBlock->timesVisited == -basicBlock->numBackwardEdges)
	    {
	      /* Unconstrain the variables on the first pass */
	      visit_backward_edges (basicBlock,
				    unconstrain_written_variables,
				    basicBlock);

	    }
	  else
	    {
	      if (basicBlock->timesVisited < 0)
		{
		  /* Add any information on the changes made by the
		     loop body */
		  constraint_merge_block (enterFrom->constraints,
					  basicBlock->constraints,
					  condition, topOfStack);

		  /* HOW DO I PROPAGATE THE CONSTRAINTS IN THE
		     CONDITION WITHOUT DOING RECURSION IN THE LOOP
		     BODY??? idea:
		     - visit the backward edges, find the 
		     last basic block containing a backward jump to
		     basicBlock.
		     - visit the whole loop (see
		     unconstrainWrittenVariable) and find the last
		     basic block that jumps over the basic block we had 
		     found.
		     - somehow make propagate_constraints
		     recurse until that basic block and no further.  To 
		     do so, we could change restartAnalysisFor into,
		     say, `restrictAnalysisRange'. */
		}
	      return;
	    }
	}
    }

  propagate_constraints (basicBlock, int_tab);
}

void
unconstrain_written_variables (basic_block * loopBlock,
			       basic_block * entryPoint)
{
  gst_uchar *bp, *end;
  constraint *c;

  for (bp = loopBlock->bp, end = bp + loopBlock->size;
       bp < end; bp += BYTECODE_SIZE (*bp))
    {

      if ((*bp & ~7) == POP_TEMPORARY_VARIABLE)
	c = &entryPoint->constraints[*bp & 7];

      else if ((*bp == STORE_INDEXED || *bp == POP_STORE_INDEXED) &&
	       (bp[1] & LOCATION_MASK) == TEMPORARY_LOCATION)
	c = &entryPoint->constraints[bp[1] & ~LOCATION_MASK];

      else
	continue;

      /* Found a `store-into-temporary' bytecode.  If the variable was
         integer, instead of unconstraining we set the worst possible
         conditions.  This is because if they do bitwise operations the 
         result cannot in any case exceed MAX_ST_INT or MIN_ST_INT (no
         carries can occur); instead if they do most arithmetic
         operations (+, - *) an overflow will occur. ### This needs to 
         be reviewed later though. */
      if (c->min >= MIN_ST_INT)
	c->min = MIN_ST_INT;

      if (c->max <= MAX_ST_INT)
	c->max = MAX_ST_INT;
    }

  /* Assumes no jumps to the middle of a loop -- always true because
     Smalltalk has no goto. */
  if (loopBlock != entryPoint)
    visit_forward_edges (loopBlock, unconstrain_written_variables,
			 entryPoint);
}


/* Functions to operate on constraint operands follow */

#define EXIT_IF_OUT_OF_RANGE				\
  if (a->min < MIN_ST_INT || a->max > MAX_ST_INT	\
      || b->min < MIN_ST_INT || b->max > MAX_ST_INT) { 	\
    return;						\
  }

#define CHECK_OUT_OF_RANGE_STRICT			\
  if (a->min < MIN_ST_INT || a->max > MAX_ST_INT	\
      || b->min < MIN_ST_INT || b->max > MAX_ST_INT) {	\
    result->min = MIN_ST_INT-1;				\
    result->max = MAX_ST_INT+1;				\
    return TOP_UNKNOWN;					\
  }

#define CHECK_OUT_OF_RANGE					\
  if ((a->min < MIN_ST_INT && a->max > MAX_ST_INT)		\
      || (b->min < MIN_ST_INT && b->max > MAX_ST_INT)) {	\
    result->min = MIN_ST_INT-1;					\
    result->max = MAX_ST_INT+1;					\
    return TOP_UNKNOWN;						\
  }

#define DO_RETURN						\
  return (result->min < MIN_ST_INT || result->max > MAX_ST_INT)	\
    ? TOP_UNKNOWN : TOP_IS_INTEGER				\

int
constraint_copy (constraint * a,
		 constraint * result)
{
  result->min = a->min;
  result->max = a->max;
  result->source = a;

  DO_RETURN;
}

int
constraint_set (OOP oop,
		constraint * result)
{
  result->source = NULL;
  if (IS_INT (oop))
    {
      result->min = MIN_ST_INT - 1;
      result->max = MAX_ST_INT + 1;
      return TOP_IS_NOT_INTEGER;
    }
  else
    {
      result->min = result->max = TO_INT (oop);
      return TOP_IS_INTEGER;
    }
}

int
constraint_reset (constraint * a,
		  int n)
{
  for (; n--; a++)
    {
      a->min = MIN_ST_INT - 1;
      a->max = MAX_ST_INT + 1;
      a->source = NULL;
    }

  return TOP_UNKNOWN;
}

int
constraint_sum (constraint * a,
		constraint * b,
		constraint * result)
{
  long min, max;

  result->source = NULL;
  CHECK_OUT_OF_RANGE;

  min = a->min + b->min;
  result->min = min < MIN_ST_INT || a->min < MIN_ST_INT
    || b->min < MIN_ST_INT ? MIN_ST_INT - 1 : min;

  max = a->max + b->max;
  result->max = max > MAX_ST_INT || a->max > MAX_ST_INT
    || b->max > MAX_ST_INT ? MAX_ST_INT + 1 : min;

  DO_RETURN;
}

int
constraint_subtract (constraint * a,
		     constraint * b,
		     constraint * result)
{
  long min, max;

  result->source = NULL;
  CHECK_OUT_OF_RANGE;

  min = a->min - b->max;
  result->min = min < MIN_ST_INT || a->min < MIN_ST_INT
    || b->max > MAX_ST_INT ? MIN_ST_INT - 1 : min;

  max = a->max - b->min;
  result->max = max > MAX_ST_INT || a->max > MAX_ST_INT
    || b->min < MIN_ST_INT ? MAX_ST_INT + 1 : min;

  DO_RETURN;
}

int
constraint_multiply (constraint * a,
		     constraint * b,
		     constraint * result)
{
  long r1, r2, r3, r4;
  long m12, m34, M12, M34;

  result->source = NULL;
  CHECK_OUT_OF_RANGE_STRICT;

  r1 = mul_with_check (a->min, b->min);
  r2 = mul_with_check (a->min, b->max);
  m12 = (r1 < r2) ? r1 : r2;
  M12 = r1 ^ r2 ^ m12;

  r3 = mul_with_check (a->max, b->min);
  r4 = mul_with_check (a->max, b->max);
  m34 = (r3 < r4) ? r3 : r4;
  M34 = r3 ^ r4 ^ m34;

  result->min = (m12 < m34) ? m12 : m34;
  result->max = (M12 > M34) ? M12 : M34;
  DO_RETURN;
}

int
constraint_remainder (constraint * a,
		      constraint * b,
		      constraint * result)
{
  result->source = NULL;

  /* It does not matter if `a' is a LargeInteger as long as b is a
     SmallInteger. */
  if ((a->min < MIN_ST_INT && a->max > MAX_ST_INT)
      || (b->min < MIN_ST_INT || b->max > MAX_ST_INT))
    {
      result->min = MIN_ST_INT - 1;
      result->max = MAX_ST_INT + 1;
      return TOP_UNKNOWN;
    }

  result->min = (b->min < 0) ? (b->min + 1) : 0;
  result->max = (b->max > 0) ? (b->max - 1) : 0;
  DO_RETURN;
}

int
constraint_division (constraint * a,
		     constraint * b,
		     constraint * result)
{
  long r1, r2, r3, r4;
  long m12, m34, M12, M34;

  result->source = NULL;
  CHECK_OUT_OF_RANGE_STRICT;

/* Subtract 1 from the result if the result is negative (to
 * round towards -oo rather than 0).
 */
#define DIVISION(a, b)	((a) / (b) - ( ((a) ^ (b)) < 0) )

  r1 = DIVISION (a->min, b->min);
  r2 = DIVISION (a->max, b->max);
  m12 = (r1 < r2) ? r1 : r2;
  M12 = r1 ^ r2 ^ m12;

  r3 = DIVISION (a->min, b->max);
  r4 = DIVISION (a->max, b->min);
  m34 = (r3 < r4) ? r3 : r4;
  M34 = r3 ^ r4 ^ m34;

  result->min = (m12 < m34) ? m12 : m34;
  result->max = (M12 > M34) ? M12 : M34;
  DO_RETURN;
}

long
mask (long int x)
{
  /* If x is the result, x+1 is a power of two, and we have

         x   00000000....00111111...11 
         x+1 00000000....01000000...00 
     x & x+1 00000000....00000000...00 */
  while (x & (x + 1))
    x |= x >> 1;

  return x;
}

/* The functions for bit-wise & and | only give results rounded
 * to the next power of two (or to the next power of two - 1).
 * This is fine because these functions usually extract bit
 * contents, so they're operands are likely to be constrained
 * by powers of two, either.
 */
int
constraint_bit_and (constraint * a,
		    constraint * b,
		    constraint * result)
{
  /* A good compiler will analyze life-times and overlap r2 to a1, r12
     to r1, r3 to b1, r4, b2, r34 to r3, getting by with five registers 
     (see below) */
  long r1, r2, r3, r4, r12, r34;
  long a1, a2, b1, b2;

  result->source = NULL;
  CHECK_OUT_OF_RANGE_STRICT;

  /* The minimum is obtained when the result is negative (that's why we 
     use 0 if a value is positive) and there are as many zeroes as
     possible on the right (that's why we use ~ on the mask). */
  a1 = a->min;
  a2 = a->max;
  b1 = b->min;
  b2 = b->max;

  a1 = a1 < 0 ? ~mask (~a1) : 0;
  a2 = a2 < 0 ? ~mask (~a2) : 0;
  b1 = b1 < 0 ? ~mask (~b1) : 0;
  b2 = b2 < 0 ? ~mask (~b2) : 0;

  r1 = a1 & b1;
  r2 = a1 & b2;
  r12 = (r1 < r2) ? r1 : r2;
  r3 = a2 & b1;
  r4 = a2 & b2;
  r34 = (r3 < r4) ? r3 : r4;

  /* The maximum is obtained when the result is positive and as little
     zero bits as possible are cleared (if a value must be negative,
     anding -1 will give the biggest result, because we don't risk
     zeroing any bit). */
  a1 = a->min;
  a2 = a->max;
  b1 = b->min;
  b2 = b->max;

  a1 = a1 > 0 ? mask (a1) : -1;
  a2 = a2 > 0 ? mask (a2) : -1;
  b1 = b1 > 0 ? mask (b1) : -1;
  b2 = b2 > 0 ? mask (b2) : -1;

  result->min = (r12 < r34) ? r12 : r34;

  r1 = a1 & b1;
  r2 = a1 & b2;
  r12 = (r1 > r2) ? r1 : r2;
  r3 = a2 & b1;
  r4 = a2 & b2;
  r34 = (r3 > r4) ? r3 : r4;
  result->max = (r12 > r34) ? r12 : r34;

  DO_RETURN;
}

int
constraint_bit_or (constraint * a,
		   constraint * b,
		   constraint * result)
{
  long min, max;

  result->source = NULL;
  CHECK_OUT_OF_RANGE_STRICT;

  min = a->min < b->min ? a->min : b->min;
  max = a->max > b->max ? a->max : b->max;

  result->min = min < 0 ? ~mask (~min) : 0;
  result->max = max > 0 ? mask (max) : 0;

  DO_RETURN;
}

int
constraint_bit_shift (constraint * a,
		      constraint * b,
		      constraint * result)
{
  long r1, r2, r3, r4;
  long m12, m34, M12, M34;

#define MIN_LONG	(-1L ^ (~0UL >> 1))

  result->min = MIN_ST_INT - 1;
  result->max = MAX_ST_INT + 1;
  result->source = NULL;
  if ((a->min < MIN_ST_INT || a->max > MAX_ST_INT)
      || (b->min <= -SIZEOF_LONG || b->max >= SIZEOF_LONG))
    return TOP_UNKNOWN;

  if (b->max > 0 && ((a->min << b->max >> b->max != a->min)
		     || (a->max << b->max >> b->max != a->max)))
    return TOP_UNKNOWN;

  r1 = r3 = a->min;
  r2 = r4 = a->max;

  if (b->max > 0)
    {
      r1 <<= b->max;
      r2 <<= b->max;
    }
  else
    {
      r1 >>= -b->max;
      r2 >>= -b->max;
    }
  m12 = (r1 < r2) ? r1 : r2;
  M12 = r1 ^ r2 ^ m12;

  if (b->min > 0)
    {
      r3 <<= b->min;
      r4 <<= b->min;
    }
  else
    {
      r3 >>= -b->min;
      r4 >>= -b->min;
    }
  m34 = (r3 < r4) ? r3 : r4;
  M34 = r1 ^ r2 ^ m34;

  result->min = (m12 < m34) ? m12 : m34;
  result->max = (M12 > M34) ? M12 : M34;
  DO_RETURN;
}


void
constraint_less (constraint * a,
		 constraint * b)
{
  constraint *copy;
  EXIT_IF_OUT_OF_RANGE;

  /* a         239 .. 500 
     b           1 .. 250
     a < b --> 239 .. 249 < 240 .. 250 */

  a->max = MIN (a->max, b->max - 1);	/* impose a->max < b->max */
  b->min = MAX (b->min, a->min + 1);	/* impose b->min > a->min */

  /* Propagate... */
  for (; (copy = a->source); copy->max = a->max, a = copy);
  for (; (copy = b->source); copy->min = b->min, b = copy);
}

void
constraint_less_equal (constraint * a,
		       constraint * b)
{
  constraint *copy;
  EXIT_IF_OUT_OF_RANGE;

  /* a          239 .. 500
     b            1 .. 250
     a <= b --> 239 .. 250 <= 239 .. 250 */

  a->max = MIN (a->max, b->max);	/* impose a->max <= b->max */
  b->min = MAX (b->min, a->min);	/* impose b->min >= a->min */

  /* Propagate... */
  for (; (copy = a->source); copy->max = a->max, a = copy);
  for (; (copy = b->source); copy->min = b->min, b = copy);
}

void
constraint_greater (constraint * a,
		    constraint * b)
{
  constraint *copy;
  EXIT_IF_OUT_OF_RANGE;

  /* Same as constraint_less, but with reversed a's and b's. */
  b->max = MIN (b->max, a->max - 1);
  a->min = MAX (a->min, b->min + 1);

  /* Propagate... */
  for (; (copy = b->source); copy->max = b->max, b = copy);
  for (; (copy = a->source); copy->min = a->min, a = copy);
}

void
constraint_greater_equal (constraint * a,
			  constraint * b)
{
  constraint *copy;
  EXIT_IF_OUT_OF_RANGE;

  /* Same as constraint_less_equal, but with reversed a's and b's. */
  b->max = MIN (b->max, a->max);
  a->min = MAX (a->min, b->min);

  /* Propagate... */
  for (; (copy = b->source); copy->max = b->max, b = copy);
  for (; (copy = a->source); copy->min = a->min, a = copy);
}

void
constraint_equal (constraint * a,
		  constraint * b)
{
  constraint *copy;
  EXIT_IF_OUT_OF_RANGE;

  /* Same as constraint_less_equal+constraint_greater_equal */
  a->max = b->max = MIN (b->max, a->max);
  b->min = a->min = MAX (a->min, b->min);

  /* Propagate... */
  for (; (copy = b->source);
       copy->max = b->max, copy->min = b->min, b = copy);
  for (; (copy = a->source);
       copy->max = a->max, copy->min = a->min, a = copy);
}

void
constraint_copy_block (constraint * a,
		       constraint * b,
		       int condition,
		       int n)
{
  long delta;

  for (delta = (char *) b - (char *) a; n--; a++, b++)
    {
      b->min = a->min;
      b->max = a->max;
      b->source =
	(constraint *) (a->source ? ((char *) a->source) +
			delta : NULL);
    }

  switch (condition)
    {
    case LESS_THAN_SPECIAL:
      constraint_less (b - 2, b - 1);
      break;

    case GREATER_THAN_SPECIAL:
      constraint_greater (b - 2, b - 1);
      break;

    case LESS_EQUAL_SPECIAL:
      constraint_less_equal (b - 2, b);
      break;

    case GREATER_EQUAL_SPECIAL:
      constraint_greater_equal (b - 2, b);
      break;

    case EQUAL_SPECIAL:
      constraint_equal (b - 2, b);
      break;

    case NOT_EQUAL_SPECIAL:
      break;
    }
}

void
constraint_merge_block (constraint * a,
			constraint * b,
			int condition,
			int n)
{
  long delta;
  constraint *copy;

  if (condition)
    {
      copy = (constraint *) alloca (n * sizeof (constraint));
      constraint_copy_block (a, copy, condition, n);
      a = copy;
    }

  for (delta = (char *) b - (char *) a; n--; a++, b++)
    {
      b->min = MIN (a->min, b->min);
      b->max = MAX (a->max, b->max);

      if (a->source)
	{
	  constraint *source =
	    (constraint *) (((char *) a->source) + delta);
	  if (source != b->source)
	    b->source = NULL;
	}
    }
}


void
add_edge (basic_block * from,
	  basic_block * to)
{
  graph_edge *e = obstack_alloc (data_obstack, sizeof (graph_edge));

  e->block = to;
  if (from->bp < to->bp)
    {
      e->next = to->forwardEdges;
      to->forwardEdges = e;
    }
  else
    {
      e->next = to->backwardEdges;
      to->backwardEdges = e;
    }
}

void
visit_backward_edges (basic_block * basicBlock,
		      void (*func) (basic_block *, basic_block *),

		      basic_block * extra)
{
  graph_edge *e;

  for (e = basicBlock->backwardEdges; e; e = e->next)
    func (basicBlock, extra);
}

void
visit_forward_edges (basic_block * basicBlock,
		     void (*func) (basic_block *, basic_block *),

		     basic_block * extra)
{
  graph_edge *e;

  for (e = basicBlock->forwardEdges; e; e = e->next)
    func (basicBlock, extra);
}
