/* AE program profiling system.
   Machine-specific definitions for SPARC processors.
   Copyright (C) 1990 by James R. Larus. (larus@cs.wisc.edu)

   AE and AEC are free software; you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published by the
   Free Software Foundation; either version 1, or (at your option) any
   later version.

   AE and AEC are distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with GNU CC; see the file COPYING.  If not, write to James R.
   Larus, Computer Sciences Department, University of Wisconsin--Madison,
   1210 West Dayton Street, Madison, WI 53706, USA or to the Free
   Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */


/* $Header: /var/home/larus/AE/AE/RCS/ae-sparc.h,v 2.0 90/02/09 17:21:58 larus Exp Locker: larus $ */


/* Define the base and bounds of the AE buffer that accumulates events in
   the executing program. */


/* The pointer to the AE Buffer can either be in a register or in a
   variable stored in memory.  If it is in a register, AE_BUFFER_REG
   contains the register's name, as a string.  If it is in memory,
   AE_BUFFER_VAR contains the variable's name as a string.
   MAKE_AE_BUFFER_POINTER returns an rtx expression for this pointer. */

/* On SPARC, we use base and bounds registers (%g4 and %g6,
   respectively). */

#define AE_BUFFER_REG "%g4"
#undef AE_BUFFER_VAR

#define MAKE_AE_BUFFER_POINTER() gen_rtx (REG, Pmode, 4) /* 4 = %g4 */


/* The end of the AE Buffer can either be pointed to by a register or
   by a variable stored in memory.  If it is in a register,
   AE_BUFFER_BOUND_REG contains the register's name, as a string.  If it
   is in memory, AE_BUFFER_BOUND_VAR contains the variable's name as a
   string.  MAKE_AE_BOUND_POINTER returns an rtx expression for this
   pointer. */

#define AE_BUFFER_BOUND_REG "%g6"
#undef AE_BUFFER_BOUND_VAR

#define MAKE_AE_BOUND_POINTER()	 gen_rtx (REG, Pmode, 6)


/* Alternatively, the end of the AE buffer can be a fixed distance
   from the top of the stack. */

#undef STACK_TOP
#undef AE_BUFFER_STACK_OFFSET


/* Size of AE buffer (bytes). */

#define AE_BUFFER_SIZE 0x100000	/* 1MB */


/* Size of a stack frame for the routine AE_START. */

/* Need a 16 word window save area for SPARC */

#define AE_START_FRAME_SIZE (16 * sizeof (int))


/* Name of stack pointer register. */

#define SP_REG "%sp"



/* One plus maximum number of instructions combine by peephole optimizer. */

#define MAX_PEEP 3


/* Return non-zero if register number REGNO can be defined upon
   function entry. */

#define REGISTER_DEFINED_IN_CALL(REGNO) ((REGNO) == STACK_POINTER_REGNUM \
					 || (REGNO) == FRAME_POINTER_REGNUM \
					 || ((REGNO) >= 24 && (REGNO) <= 31))


/* Define the characters that proceed comments and assembler directives. */

#define ASM_COMMENT_CHAR '!'

#define ASM_DIRECTIVE_CHAR '.'


/* Define the number of delayed instructions after a jump, conditional jump,
   or call instruction.  Do not define these values if the instructions have
   no delays or the assembler hides them by doing code reorganization. */

#define JUMP_DELAY_SLOTS 1

#define CJUMP_DELAY_SLOTS 1

#define CALL_DELAY_SLOTS 1


/* The size of most assembly instructions (in bytes). */

#define STD_ASM_INSN_LENGTH 4


/* Set of instruction-size pairs for instructions whose size is not
   standard.  The table must be sorted by instruction name. */

#undef ASM_INSN_SIZE_EXCEPTIONS


/* Return non-zero if the assembly instruction is a branch that does
   not execute its (normally) delayed slot instruction. */

#define BRANCH_IS_ANNULED(ASM_INSN) substring (ASM_INSN, ",a ")


/* Return a pointer to the function name if an assembly instruction is
   a subroutine invocation.  If it is not, return 0. */

#define ASM_INSN_IS_CALL(ASM_INSN) (strncmp ((ASM_INSN), "call .", 6) \
				    ? 0 : (ASM_INSN) + 5)



/* Produce the schema corresponding the the standard function prologue
   and epilogue.  Record values that are need upon function entry. */

#define SCHEMA_PROLOGUE(RECORD_REG_ON_ENTRY)			\
{								\
  /* Code from FUNCTION_PROLOGUE: */				\
  extern char call_used_regs[];					\
  extern int frame_pointer_needed;				\
  extern rtx stack_pointer_rtx, frame_pointer_rtx;		\
  int n_fregs = 0, i;						\
  int n_iregs = 64;						\
  register int regno;						\
  int sp_schema_produced = 0, fp_schema_produced = 0;		\
								\
  for (i = 32; i < FIRST_PSEUDO_REGISTER; i++)			\
    if (regs_ever_live[i] && ! call_used_regs[i])		\
      n_fregs++;						\
  for (i = 16; i < 32; i++)					\
    if (regs_ever_live[i]) { n_iregs = 96; break; }		\
								\
  if (RECORD_REG_ON_ENTRY [STACK_POINTER_REGNUM])		\
    sp_schema_produced = record_sp ();				\
  if (RECORD_REG_ON_ENTRY [FRAME_POINTER_REGNUM])		\
    fp_schema_produced = record_fp ();				\
								\
  if (n_fregs)							\
    {								\
      for (i = 32, n_fregs = 0; i < FIRST_PSEUDO_REGISTER; i++)	\
        if (regs_ever_live[i] && ! call_used_regs[i])		\
          {							\
	    if (!sp_schema_produced)				\
	      sp_schema_produced = record_sp ();		\
	    if (regs_ever_live[i+1] && ! call_used_regs[i+1])	\
	      {							\
		store_schema_int_offset (STACK_POINTER_REGNUM,	\
					 n_iregs + 4 * n_fregs, 0); \
		n_fregs += 2, i += 1;				\
	      }							\
	    else						\
	      store_schema_int_offset (STACK_POINTER_REGNUM,	\
				       n_iregs + 4 * n_fregs++, 1); \
          }							\
    }								\
  if (regs_ever_live[32])					\
    {								\
      if (!fp_schema_produced)					\
	fp_schema_produced = record_fp ();			\
      store_schema_int_offset (FRAME_POINTER_REGNUM, -16, 0);	\
      store_schema_int_offset (FRAME_POINTER_REGNUM, -12, 0);	\
    }								\
								\
  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)	\
    if (RECORD_REG_ON_ENTRY [regno])				\
      if ((regno == STACK_POINTER_REGNUM && !sp_schema_produced)\
	  || (regno == FRAME_POINTER_REGNUM && !fp_schema_produced) \
	  || (regno != STACK_POINTER_REGNUM && regno != FRAME_POINTER_REGNUM))\
      {								\
	unknown_def_schema (regno);				\
	issue_event (gen_rtx (REG, Pmode, regno));		\
      }								\
}


#define SCHEMA_EPILOGUE()					\
{								\
  /* Code from FUNCTION_EPILOGUE: */				\
  extern char call_used_regs[];					\
  extern int current_function_pretend_args_size;		\
  int fsize = (((get_frame_size ()) + 7 - STARTING_FRAME_OFFSET)\
	       & -8);						\
  int actual_fsize;						\
  int n_fregs = 0, i;						\
  int n_iregs = 64;						\
								\
  for (i = 32, n_fregs = 0; i < FIRST_PSEUDO_REGISTER; i++)	\
    if (regs_ever_live[i] && ! call_used_regs[i])		\
      n_fregs++;						\
  for (i = 16; i < 32; i++)					\
    if (regs_ever_live[i]) { n_iregs = 96; break; }		\
  actual_fsize = fsize + n_iregs + (n_fregs*4+7 & -8);		\
  actual_fsize += current_function_pretend_args_size+7 & -8;	\
  fsize += current_function_pretend_args_size+7 & -8;		\
  if (n_fregs)							\
    {								\
      int base;							\
      int offset;						\
      if (fsize < 4096)						\
	{							\
	  base = FRAME_POINTER_REGNUM;				\
	  offset = n_iregs - actual_fsize;			\
	}							\
      else							\
	{							\
	  base = 1;		/* %g1 */			\
	  offset = n_iregs;					\
	}							\
      for (i = 32, n_fregs = 0; i < FIRST_PSEUDO_REGISTER; i++)	\
	if (regs_ever_live[i] && ! call_used_regs[i])		\
	  {							\
	    if (regs_ever_live[i+1] && ! call_used_regs[i+1])	\
	      {							\
		load_schema_int_offset (base, offset + 4 * n_fregs, 0); \
		n_fregs += 2, i += 1;				\
	      }							\
	    else						\
	      load_schema_int_offset (base, offset + 4 * n_fregs++, 1);\
	  }							\
    }								\
}



/* Produce and write the the assembly output file code to record the
   various types of events. */


/* Check that the AE buffer has SIZE bytes free.  If not, empty the
   buffer. */

#define GENERATE_SPACE_CHECK(COMMENT, SIZE)			\
{								\
  rtx xops [2];							\
  rtx label =  gen_label_rtx ();				\
  char buffer [256];						\
								\
  xops [0] = ae_buffer_pointer;					\
  xops [1] = ae_buffer_end_pointer;				\
  sprintf (buffer, "subcc %%0, %%1, %%%%g0\t! %s Event", COMMENT); \
  output_asm_insn (buffer, xops);				\
								\
  xops [0] = label;						\
  output_asm_insn ("ble %l0", xops);				\
  output_asm_insn ("nop");					\
								\
  output_asm_insn ("call _ae_flush_buffer", xops);		\
  output_asm_insn ("nop");					\
								\
  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (label)); \
}


/* Generate an event to record VALUE (an rtx register). */

#define GENERATE_EVENT(VALUE)					\
{								\
  rtx xops [2];							\
								\
  xops [0] = VALUE;						\
  output_asm_insn ("mov %0, %%g2", xops);			\
  xops [0] = ae_buffer_pointer;					\
  /* Is there a better way to do an unaligned store on a SPARC? */ \
  output_asm_insn ("stb %%g2, [3 + %0]", xops);			\
  output_asm_insn ("srl %%g2, 8, %%g2", xops);			\
  output_asm_insn ("stb %%g2, [2 + %0]", xops);			\
  output_asm_insn ("srl %%g2, 8, %%g2", xops);			\
  output_asm_insn ("stb %%g2, [1 + %0]", xops);			\
  output_asm_insn ("srl %%g2, 8, %%g2", xops);			\
  output_asm_insn ("stb %%g2, [0 + %0]", xops);			\
								\
  xops [0] = ae_buffer_pointer;					\
  output_asm_insn ("add %0, 4, %0\t\t! End Event", xops);	\
}


/* Generate an event to record the integer VALUE, which should be
   stored in BYTES bytes. */

#define GENERATE_SHORT_EVENT(VALUE, BYTES)			\
{								\
  xops [0] = gen_rtx (CONST_INT, VOIDmode, VALUE);		\
  output_asm_insn ("mov %0, %%g2", xops);			\
								\
  xops [0] = ae_buffer_pointer;					\
  if (BYTES == 1)						\
    output_asm_insn ("stb %%g2, [0 + %0]", xops);		\
  else								\
    {								\
      output_asm_insn ("stb %%g2, [1 + %0]", xops);		\
      output_asm_insn ("srl %%g2, 8, %%g2", xops);		\
      output_asm_insn ("stb %%g2, [0 + %0]", xops);		\
    }								\
								\
  xops [0] = ae_buffer_pointer;					\
  xops [1] = gen_rtx (CONST_INT, VOIDmode, BYTES);		\
  output_asm_insn ("add %0, %1, %0\t\t! End Short Event", xops); \
}


/* Generate an event to record ADDRESS, which is made computed from BASE and
   OFFSET. */

#define GENERATE_ADDRESS_EVENT(ADDRESS, BASE, OFFSET)		\
{								\
  rtx xops [2];							\
								\
  xops [0] = BASE;						\
  output_asm_insn ("sethi %%hi(%0), %%g2", xops);		\
  if (OFFSET != NULL)						\
    {								\
      xops [1] = OFFSET;					\
      output_asm_insn ("add %%g2, %%lo(%0)+%a1, %%g2", xops);	\
    }								\
  else								\
    output_asm_insn ("add %%g2, %%lo(%0), %%g2", xops);		\
								\
  xops [0] = ae_buffer_pointer;					\
  /* Is there a better way to do an unaligned store on a SPARC? */ \
  output_asm_insn ("stb %%g2, [3 + %0]", xops);			\
  output_asm_insn ("srl %%g2, 8, %%g2", xops);			\
  output_asm_insn ("stb %%g2, [2 + %0]", xops);			\
  output_asm_insn ("srl %%g2, 8, %%g2", xops);			\
  output_asm_insn ("stb %%g2, [1 + %0]", xops);			\
  output_asm_insn ("srl %%g2, 8, %%g2", xops);			\
  output_asm_insn ("stb %%g2, [0 + %0]", xops);			\
								\
  xops [0] = ae_buffer_pointer;					\
  output_asm_insn ("add %0, 4, %0\t\t! End Address Event", xops); \
}



/* Assembly code routines for aecrt0.o. */

#ifdef AE_START_ASM
	.text
	.align 4
	.global _ae_start
	.proc 1
_ae_start:
	mov    	0, %fp
	ld     	[%sp + 64], %l0
	add    	%sp, 68, %l1
	sll    	%l0, 2, %l2
	add    	%l2, 4, %l2
	add    	%l1, %l2, %l2
	sethi  	%hi(_environ), %l3
	st     	%l2, [%l3 + %lo(_environ)]
	call	_ae_initialize	! Addition
	nop
	mov	%o0, %sp	! ditto
	mov	%l0, %o0	! ditto
	mov	%l1, %o1	! ditto
	mov	%l2, %o2	! ditto
	call   	_main
	sub	%sp, 0x20, %sp
	call   	_exit
	nop
	call   	__exit
	nop
#endif


#ifdef AE_FLUSH_BUFFER_ASM
	.text
	.align 4
	.global _ae_flush_buffer
	.proc 1
_ae_flush_buffer:
	!#PROLOGUE# 0
	save %sp,-240,%sp
	!#PROLOGUE# 1
	st %g1,[%fp-144]
	st %g2,[%fp-148]
	st %g3,[%fp-156]
	st %g5,[%fp-160]
	st %g7,[%fp-164]
	sethi %hi(_ae_buffer_base),%g1
	ld	[%g1+%lo(_ae_buffer_base)],%o2
	sethi %hi(_ae_fd),%g1
	ld	[%g1+%lo(_ae_fd)],%o0
	mov %o2,%o1
	call _write,0
	sub %g4,%o2,%o2
	sethi %hi(_ae_buffer_base),%g1
	ld	[%g1+%lo(_ae_buffer_base)],%g4
	sethi %hi(1048572),%g1
	or %lo(1048572),%g1,%g1
	add %g4,%g1,%g6
	ld [%fp-144], %g1
	ld [%fp-148], %g2
	ld [%fp-156], %g3
	ld [%fp-160], %g5
	ld [%fp-164], %g7
	ret
	restore
	.global _ae_fd
	.common _ae_fd,8,"bss"
	.global _ae_buffer_base
	.common _ae_buffer_base,8,"bss"
#endif



/* Definitions for AEC. */

/* a.out file format is BSD, with the nlist library to find symbols. */

#undef ECOFF_AOUT

#define BSD_AOUT


/* Function call returns this many bytes after call instruction. */

#define PC_OFFSET_AFTER_CALL 8


/* Return non-zero if register N is local to a function, e.g. can
   have distinct values in different functions. */

#define REG_LOCAL_TO_FUNCTION(N) ((N) >=8 && (N) <= 31)


/* Initialize registers before the generation program begins. */

#define INITIALIZE_REGISTERS() {output_set_value (0, "0");}
