/*
   Test speed of various forms of loops (dot products / convolutions)

   $Id: tloops.c 1.1 1996/07/09 libtsp-V2R7a $
*/

#include <stdio.h>
#include <time.h>
#include <libtsp.h>

#define NLOOP	99
#define NITER	100000
#define INC	3

#ifdef sun
#define CLOCKS_PER_SEC	1000000
clock_t clock ();
#endif

static void
xloop p_((float (*loopF)p_((const float x[], const float y[], int N)),
	  const float x[], const float y[], int N, int Niter, float *val,
	  double *dur));

static float
Loop1 p_((const float x[], const float y[], int N));
static float
Loop2 p_((const float x[], const float y[], int N));
static float
Loop3 p_((const float x[], const float y[], int N));
static float
Loop4 p_((const float x[], const float y[], int N));
static float
Loop5 p_((const float x[], const float y[], int N));
static float
Loop6 p_((const float x[], const float y[], int N));
static float
Loop7 p_((const float x[], const float y[], int N));
static float
Loop8 p_((const float x[], const float y[], int N));
static float
Loop9 p_((const float x[], const float y[], int N));
static float
Loop10 p_((const float x[], const float y[], int N));

int
main (argc, argv)

     int argc;
     const char *argv[];

{
  int i, ii;
  double tdur;
  float val;
  float x[NLOOP], y[NLOOP], yr[NLOOP], yri[NLOOP*INC];

  for (i = 0; i < NLOOP; ++i) {
    x[i] = i+1;
    y[i] = 1.0 / (i+1);
    yr[i] = 1.0 / (NLOOP-i);
    yri[i*INC] = 1.0 / (NLOOP-i);
  }

  ii = 0;

  printf ("==================== Dot Product: indexed loop\n");

  printf ("===== single mult, single add\n");
  ++ii;
  xloop (Loop1, x, y, NITER, NLOOP, &val, &tdur);
  printf ("  Loop %d, CPU: %.3f sec total, %.3g per mult/add, value: %g\n",
	  ii, tdur, tdur/(NITER*NLOOP), val);

  printf ("===== double mult, single add\n");
  ++ii;
  xloop (Loop2, x, y, NITER, NLOOP, &val, &tdur);
  printf ("  Loop %d, CPU: %.3f sec total, %.3g per mult/add, value: %g\n",
	  ii, tdur, tdur/(NITER*NLOOP), val);

  printf ("===== double mult, double add\n");
  ++ii;
  xloop (Loop3, x, y, NITER, NLOOP, &val, &tdur);
  printf ("  Loop %d, CPU: %.3f sec total, %.3g per mult/add, value: %g\n",
	  ii, tdur, tdur/(NITER*NLOOP), val);

  printf ("\n");
  printf ("==================== Dot Product: single mult, single add\n");

  printf ("===== indexed loop\n");
  ++ii;
  xloop (Loop4, x, y, NITER, NLOOP, &val, &tdur);
  printf ("  Loop %d, CPU: %.3f sec total, %.3g per mult/add, value: %g\n",
	  ii, tdur, tdur/(NITER*NLOOP), val);

  printf ("===== pointer loop, integer test\n");
  ++ii;
  xloop (Loop5, x, y, NITER, NLOOP, &val, &tdur);
  printf ("  Loop %d, CPU: %.3f sec total, %.3g per mult/add, value: %g\n",
	  ii, tdur, tdur/(NITER*NLOOP), val);

  printf ("===== pointer loop, pointer test\n");
  ++ii;
  xloop (Loop6, x, y, NITER, NLOOP, &val, &tdur);
  printf ("  Loop %d, CPU: %.3f sec total, %.3g per mult/add, value: %g\n",
	  ii, tdur, tdur/(NITER*NLOOP), val);

  printf ("===== unwrapped indexed loop\n");
  ++ii;
  xloop (Loop7, x, y, NITER, NLOOP, &val, &tdur);
  printf ("  Loop %d, CPU: %.3f sec total, %.3g per mult/add, value: %g\n",
	  ii, tdur, tdur/(NITER*NLOOP), val);

  printf ("\n");
  printf ("==================== Convolution: single mult, single add\n");

  printf ("===== indexed loop, i, N-i; ++i\n");
  ++ii;
  xloop (Loop8, x, yr, NITER, NLOOP, &val, &tdur);
  printf ("  Loop %d, CPU: %.3f sec total, %.3g per mult/add, value: %g\n",
	  ii, tdur, tdur/(NITER*NLOOP), val);

  printf ("===== indexed loop, i, j; ++i, --j\n");
  ++ii;
  xloop (Loop9, x, yr, NITER, NLOOP, &val, &tdur);
  printf ("  Loop %d, CPU: %.3f sec total, %.3g per mult/add, value: %g\n",
	  ii, tdur, tdur/(NITER*NLOOP), val);

  printf ("===== indexed loop, i, j; --i, j+=INC\n");
  ++ii;
  xloop (Loop10, x, yri, NITER, NLOOP, &val, &tdur);
  printf ("  Loop %d, CPU: %.3f sec total, %.3g per mult/add, value: %g\n",
	  ii, tdur, tdur/(NITER*NLOOP), val);

  return 0;
}

/* Timed loop */

static void
xloop (loopF, x, y, Niter, N, val, tdur)

     float (*loopF)p_((const float x[], const float y[], int N));
     const float x[];
     const float y[];
     int Niter;
     int N;
     float *val;
     double *tdur;

{
  int i;
  float sum;
  clock_t start, end;

  start = clock ();

  sum = 0.0;
  for (i = 0; i < Niter; ++i)
    sum += loopF (x, y, N);

  end = clock ();
  *tdur = ((double) (end - start)) / CLOCKS_PER_SEC;
  
  *val = sum / (N * Niter);

  return;
} 

static float
Loop1 (x, y, N)
     const float x[];
     const float y[];
     int N;
{
  float sum;
  int i;

  sum = 0.0;
  for (i = 0; i < N; ++i)
    sum += x[i] * y[i];
  return sum;
}
static float
Loop2 (x, y, N)
     const float x[];
     const float y[];
     int N;
{
  float sum;
  int i;

  sum = 0.0;
  for (i = 0; i < N; ++i)
    sum += (double) x[i] * y[i];
  return sum;
}
static float
Loop3 (x, y, N)
     const float x[];
     const float y[];
     int N;
{
  double sum;
  int i;

  sum = 0.0;
  for (i = 0; i < N; ++i)
    sum += (double) x[i] * y[i];
  return sum;
}
static float
Loop4 (x, y, N)
     const float x[];
     const float y[];
     int N;
{
  float sum;
  int i;

  sum = 0.0;
  for (i = 0; i < N; ++i)
    sum += x[i] * y[i];
  return sum;
}
static float
Loop5 (x, y, N)
     const float x[];
     const float y[];
     int N;
{
  float sum;
  int i;

  sum = 0.0;
  for (i = 0; i < N; ++i)
    sum += *x++ * *y++;
  return sum;
}
static float
Loop6 (x, y, N)
     const float x[];
     const float y[];
     int N;
{
  float sum;
  const float *xend;

  xend = x + N;
  sum = 0.0;
  for (; x < xend;)
    sum += *x++ * *y++;
  return sum;
}
static float
Loop7 (x, y, N)
     const float x[];
     const float y[];
     int N;
{
  float sum;
  int i;

  sum = 0.0;
  for (i = 0; i < N-1; i+=2)
    sum += x[i] * y[i] + x[i+1] * y[i+1];
  if (i == N-1)
    sum += x[i] * y[i];
  return sum;
}
static float
Loop8 (x, y, N)
     const float x[];
     const float y[];
     int N;
{
  float sum;
  int i;

  sum = 0.0;
  for (i = 0; i < N; ++i)
    sum += x[i] * y[N-1-i];
  return sum;
}
static float
Loop9 (x, y, N)
     const float x[];
     const float y[];
     int N;
{
  float sum;
  int i, j;

  sum = 0.0;
  for (i = 0, j = N-1; i < N; ++i, --j)
    sum += x[i] * y[j];
  return sum;
}
static float
Loop10 (x, y, N)
     const float x[];
     const float y[];
     int N;
{
  float sum;
  int i, j;

  sum = 0.0;
  for (i = N-1, j = 0; j < N*INC; --i, j += INC)
    sum += x[i] * y[j];
  return sum;
}
