
/*
     Test routine for SVctx library. Runs two examples 

       1) Convection diffusion with upwind differencing 
          use -l value (where value > 0.0) to increase convection.

       2) Three dimensional linear elasticity on unit cube;
          use -e for this problem.

       Use -n integer (where integer > 2) to set the mesh refinement.

       The relative convergence tolerance for iterative methods may 
       be set with -rtol value (1.0e-12 is the default)
   example -n 30
   example -n 30 -l 3.0
   example -n 5  -e

*/
#include "tools.h"
#include "system/system.h"
#include "sparse/spmat.h"
#include "solvers/svctx.h"
#include <math.h>
#include <stdio.h>

SpMat  *CD2d();
SpMat  *Elastic3d();
double *FormSolution();
double ErrorNorm();

#if defined(USEX)
#define SETMON(ctx) SVSetMonitor(ctx,ITXMonitor,(void*)0); \
                    ITXMonitor(0,0,-2,0.0);
#else
#define SETMON(ctx)
#endif

main(argc,argv)
int  argc;
char **argv;
{
  SpMat    *mat;
  SVctx    *ctx;
  double   *x,*solution, *b, omega;
  int      M = 3,N,its,i, elastic = 0, sf;
  double   err,start,end,lambda = 0.0, setup, rtol = 1.0e-12;
  ITMETHOD method;

  /* parse the input arguments */
  SYArgGetInt( &argc, argv, 1, "-n", &M );
  SYArgGetDouble( &argc, argv, 1, "-l", &lambda );
  elastic = SYArgHasName( &argc, argv, 1, "-e" );
  SYArgGetDouble( &argc, argv, 1, "-rtol", &rtol );

#if defined(USEX)
  /* Process X-specific arguments (-geometry) */
  XBQGetArgs( &argc, argv, 1 );
#endif

  /* for symmetric problems use CG otherwise GMRES */
  if (lambda != 0.0) method = ITGMRES; else method = ITCG;

  /* for model 2D grid problems; this is good relaxation factor for SOR */
  if (!elastic && lambda == 0.0)
       omega = 2.0/(1.0 + sin(3.14159265358979/(M + 1.0)));
  else omega = 1.20;

  /* Generate matrix */
  if (!elastic)  {mat  = CD2d(M,lambda);                   CHKERR(1);}
  else           {mat  = Elastic3d(M);                     CHKERR(1);}
 
  N           = mat->rows;
  solution    = FormSolution(N);                           CHKERR(1);
  x = (double *) MALLOC(N*sizeof(double));                 CHKPTR(x);
  b = (double *) MALLOC(N*sizeof(double));                 CHKPTR(x);

  /* given a "solution", generate the right hand side */
  SpMult(mat,solution,b);                                  CHKERR(1);

  printf("                Matrix size %d \n",N);
  printf("    Method                 Error        Setup       Time      Speed         Its\n");
  printf("------------------------------------------------------------------------------\n");
  start = SYGetCPUTime(); ctx = SVCreate(mat,SVLU);              CHKERR(1);
  /* SVSetLUThreshold(ctx,0); */
  SVSetUp(ctx);                                            CHKERR(1);
  setup                    = SYGetCPUTime()-start;
  SVGetFlops(ctx,sf);
  SVSolve(ctx,b,x);    end = SYGetCPUTime()-start;               CHKERR(1);
  err = ErrorNorm(N,solution,x);
  PrintInfo( ctx, "Nested Dissection:", err, setup, end, -1, sf );
  SVDestroy(ctx);                                          CHKERR(1);

  start = SYGetCPUTime(); ctx = SVCreate(mat,SVLU);             CHKERR(1);
  SVSetLUOrdering(ctx,ORDER_RCM);    
  SVSetUp(ctx);                                            CHKERR(1);
  setup                    = SYGetCPUTime()-start;
  SVGetFlops(ctx,sf);
  SVSolve(ctx,b,x);     end = SYGetCPUTime()-start;            CHKERR(1);
  err = ErrorNorm(N,solution,x);
  PrintInfo( ctx, "Reverse Cuthill-McGee:", err, setup, end, -1, sf );
  SVDestroy(ctx);                                          CHKERR(1);

  start = SYGetCPUTime(); ctx = SVCreate(mat,SVLU);            CHKERR(1);
  SVSetLUOrdering(ctx,ORDER_QMD);    
  SVSetUp(ctx);                                            CHKERR(1);
  setup                    = SYGetCPUTime()-start;
  SVGetFlops(ctx,sf);
  SVSolve(ctx,b,x);       end = SYGetCPUTime()-start;          CHKERR(1);
  err = ErrorNorm(N,solution,x);
  PrintInfo( ctx, "Quotient Minimum Degree:", err, setup, end, -1, sf );
  SVDestroy(ctx);                                          CHKERR(1);

  start = SYGetCPUTime(); ctx = SVCreate(mat,SVJacobi);CHKERR(1);
  SVSetAccelerator(ctx,method);
  SVSetUp(ctx);                                            CHKERR(1);
  SETMON(ctx);
  SVSetIts(ctx,10*N);
  SVSetRelativeTol(ctx,rtol);
  SVSetGMRESRestart(ctx,2*M);
  setup                    = SYGetCPUTime()-start;
  SVGetFlops(ctx,sf);
  its = SVSolve(ctx,b,x);    end = SYGetCPUTime()-start;         CHKERR(1);
  err = ErrorNorm(N,solution,x);
  PrintInfo( ctx, "Jacobi:", err, setup, end, its, sf );
  SVDestroy(ctx);                                          CHKERR(1);

  start = SYGetCPUTime(); ctx = SVCreate(mat,SVSSOR);CHKERR(1);
  SVSetAccelerator(ctx,method);
  SVSetUp(ctx);                                            CHKERR(1);
  SETMON(ctx);
  SVSetIts(ctx,10*N);
  SVSetRelativeTol(ctx,rtol);
  SVSetGMRESRestart(ctx,2*M);
  setup                    = SYGetCPUTime()-start;
  SVGetFlops(ctx,sf);
  its = SVSolve(ctx,b,x);    end = SYGetCPUTime()-start;         CHKERR(1);
  err = ErrorNorm(N,solution,x);
  PrintInfo( ctx, "Gauss-Seidel:", err, setup, end, its, sf );
  SVDestroy(ctx);                                          CHKERR(1);

  start = SYGetCPUTime(); ctx = SVCreate(mat,SVSSOR);CHKERR(1);
  SVSetAccelerator(ctx,method);
  SVSetSSOROmega(ctx,omega);
  SVSetUp(ctx);                                            CHKERR(1);
  SETMON(ctx);
  SVSetIts(ctx,10*N);
  SVSetRelativeTol(ctx,rtol);
  SVSetGMRESRestart(ctx,2*M);
  setup                    = SYGetCPUTime()-start;
  SVGetFlops(ctx,sf);
  its = SVSolve(ctx,b,x);  end = SYGetCPUTime()-start;           CHKERR(1);
  err = ErrorNorm(N,solution,x);
  PrintInfo( ctx, "SSOR:", err, setup, end, its, sf );
  SVDestroy(ctx);                                          CHKERR(1);

  start = SYGetCPUTime(); ctx = SVCreate(mat,SVILU);CHKERR(1);
  SVSetAccelerator(ctx,method);
  SVSetILUFill(ctx,0);
  SVSetUp(ctx);                                            CHKERR(1);
  SETMON(ctx);
  SVSetIts(ctx,10*N);
  SVSetRelativeTol(ctx,rtol);
  SVSetGMRESRestart(ctx,2*M);
  setup                    = SYGetCPUTime()-start;
  SVGetFlops(ctx,sf);
  its = SVSolve(ctx,b,x);  end = SYGetCPUTime()-start;           CHKERR(1);
  err = ErrorNorm(N,solution,x);
  PrintInfo( ctx, "ILU: Fill = 0:", err, setup, end, its, sf );
  SVDestroy(ctx);                                          CHKERR(1);

  /* ILU(n) n > 0 generally produces a nonsymmetric preconditioner
     so we must use GMRES and not CG */
  start = SYGetCPUTime(); ctx = SVCreate(mat,SVILU);CHKERR(1);
  SVSetAccelerator(ctx,ITGMRES);
  SVSetILUFill(ctx,M/4);
  SVSetUp(ctx);                                            CHKERR(1);
  SETMON(ctx);
  SVSetIts(ctx,10*N);
  SVSetRelativeTol(ctx,rtol);
  SVSetGMRESRestart(ctx,2*M);
  setup                    = SYGetCPUTime()-start;
  SVGetFlops(ctx,sf);
  its = SVSolve(ctx,b,x); end = SYGetCPUTime()-start;            CHKERR(1);
  err = ErrorNorm(N,solution,x);
  { char buf[30]; sprintf( buf, "ILU: Fill = %d:", M/4 );
  PrintInfo( ctx, buf, err, setup, end, its, sf );}
  SVDestroy(ctx);                                          CHKERR(1);

  start = SYGetCPUTime(); ctx = SVCreate(mat,SVILU);CHKERR(1);
  SVSetAccelerator(ctx,ITGMRES);
  /* The amount of allowed fill should be at least proportional to M */
  SVSetILUFill(ctx,5);
  SVSetILUDropTol(ctx,0.001);
  SVSetUp(ctx);                                            CHKERR(1);
  SETMON(ctx);
  SVSetIts(ctx,10*N);
  SVSetRelativeTol(ctx,rtol);
  SVSetGMRESRestart(ctx,2*M);
  setup                    = SYGetCPUTime()-start;
  SVGetFlops(ctx,sf);
  its = SVSolve(ctx,b,x);  end = SYGetCPUTime()-start;           CHKERR(1);
  err = ErrorNorm(N,solution,x);
  PrintInfo( ctx, "ILUND: rtol= .001:", err, setup, end, its, sf );
  SVDestroy(ctx);                                          CHKERR(1);

  start = SYGetCPUTime(); ctx = SVCreate(mat,SVBDD);CHKERR(1);
  SVSetAccelerator(ctx,ITTFQMR);
  SVSetILUFill(ctx,0);
  SVSetBDDDomainsNumber(ctx,M);
  SVSetUp(ctx);                                            CHKERR(1);
  SETMON(ctx);
  SVSetIts(ctx,10*N);
  SVSetRelativeTol(ctx,rtol);
  SVSetGMRESRestart(ctx,2*M);
  setup                    = SYGetCPUTime()-start;
  SVGetFlops(ctx,sf);
  its = SVSolve(ctx,b,x);  end = SYGetCPUTime()-start;           CHKERR(1);
  err = ErrorNorm(N,solution,x);
  PrintInfo( ctx, "TFQMR/BDD:", err, setup, end, its, sf );
  SVDestroy(ctx);                                          CHKERR(1);

  start = SYGetCPUTime(); ctx = SVCreate(mat,SVOSM);CHKERR(1);
  SVSetAccelerator(ctx,ITTFQMR);
  SVSetILUFill(ctx,0);
  SVSetOSMDomainsNumber(ctx,M);
  /* SVSetOSMDefaultOverlap(ctx,2) */
  SVSetUp(ctx);                                            CHKERR(1);
  SETMON(ctx);
  SVSetIts(ctx,10*N);
  SVSetRelativeTol(ctx,rtol);
  SVSetGMRESRestart(ctx,2*M);
  SVGetFlops(ctx,sf);
  setup                    = SYGetCPUTime()-start;
  its = SVSolve(ctx,b,x);  end = SYGetCPUTime()-start;           CHKERR(1);
  err = ErrorNorm(N,solution,x);
  PrintInfo( ctx, "TFQMR/OSM:", err, setup, end, its, sf );
  SVDestroy(ctx);                                          CHKERR(1);

  if (!elastic) {
  start = SYGetCPUTime(); ctx = SVCreate(mat,SVOSM);CHKERR(1);
  SVSetAccelerator(ctx,ITTFQMR);
  SVSetILUFill(ctx,0);
  SVSetOSMDomainsNumber(ctx,M);
  /* We could use M*M,M,3 for the 3-d elasticity example */
  SVSetOSMRegularDomains2d( ctx, M, M, 1 );
  /* SVSetOSMDefaultOverlap(ctx,2) */
  SVSetUp(ctx);                                            CHKERR(1);
  SETMON(ctx);
  SVSetIts(ctx,10*N);
  SVSetRelativeTol(ctx,rtol);
  SVSetGMRESRestart(ctx,2*M);
  SVGetFlops(ctx,sf);
  setup                    = SYGetCPUTime()-start;
  its = SVSolve(ctx,b,x);  end = SYGetCPUTime()-start;           CHKERR(1);
  err = ErrorNorm(N,solution,x);
  PrintInfo( ctx, "TFQMR/OSM (2d decomp):", err, setup, end, its, sf );
  SVDestroy(ctx);                                          CHKERR(1);
  }

  start = SYGetCPUTime(); ctx = SVCreate(mat,SVOSM);CHKERR(1);
  SVSetAccelerator(ctx,ITGMRES);
  SVSetILUFill(ctx,0);
  SVSetOSMDomainsNumber(ctx,M);
  /* SVSetOSMDefaultOverlap(ctx,2) */
  SVSetUp(ctx);                                            CHKERR(1);
  SETMON(ctx);
  SVSetIts(ctx,10*N);
  SVSetRelativeTol(ctx,rtol);
  SVSetGMRESRestart(ctx,2*M);
  SVGetFlops(ctx,sf);
  setup                    = SYGetCPUTime()-start;
  its = SVSolve(ctx,b,x);  end = SYGetCPUTime()-start;           CHKERR(1);
  err = ErrorNorm(N,solution,x);
  PrintInfo( ctx, "GMRES/OSM:", err, setup, end, its, sf );
  SVDestroy(ctx);                                          CHKERR(1);

#ifdef TRY_INNER_ITER
  start = SYGetCPUTime(); ctx = SVCreate(mat,SVOSM);CHKERR(1);
  SVSetOSMDefaultMethod(ctx,SVILU);
  SVSetAccelerator(ctx,ITTFQMR);
  SVSetILUFill(ctx,0);
  SVSetOSMDomainsNumber(ctx,M);
  /* SVSetOSMDefaultOverlap(ctx,2) */
  SVSetUp(ctx);                                            CHKERR(1);
  SETMON(ctx);
  SVSetIts(ctx,10*N);
  SVSetGMRESRestart(ctx,2*M);
  SVGetFlops(ctx,sf);
  setup                    = SYGetCPUTime()-start;
  its = SVSolve(ctx,b,x);  end = SYGetCPUTime()-start;           CHKERR(1);
  err = ErrorNorm(N,solution,x);
  PrintInfo( ctx, "TFQMR/OSM (ILUinner):", err, setup, end, its, sf );
  SVDestroy(ctx);                                          CHKERR(1);
#endif

  /* the incomplete Choleski preconditioners are only for
     symmetric problems */
  if (lambda != 0.0) exit(0);
  start = SYGetCPUTime(); ctx = SVCreate(mat,SVICC);  CHKERR(1);
  SVSetAccelerator(ctx,ITCG);
  SVSetUp(ctx);                                            CHKERR(1);
  SETMON(ctx);
  SVSetIts(ctx,10*N);
  SVSetRelativeTol(ctx,rtol);
  setup                    = SYGetCPUTime()-start;
  SVGetFlops(ctx,sf);
  its = SVSolve(ctx,b,x); end = SYGetCPUTime()-start;            CHKERR(1);
  err = ErrorNorm(N,solution,x);
  PrintInfo( ctx, "ICC: Standard:", err, setup, end, its, sf );
  SVDestroy(ctx);                                          CHKERR(1);

  start = SYGetCPUTime(); ctx = SVCreate(mat,SVICCJP);  CHKERR(1);
  SVSetAccelerator(ctx,ITCG);
  SVSetUp(ctx);                                            CHKERR(1);
  SETMON(ctx);
  SVSetIts(ctx,10*N);
  SVSetRelativeTol(ctx,rtol);
  setup                    = SYGetCPUTime()-start;
  SVGetFlops(ctx,sf);
  its = SVSolve(ctx,b,x); end = SYGetCPUTime()-start;            CHKERR(1);
  err = ErrorNorm(N,solution,x);
  PrintInfo( ctx, "ICC: NonStandard:", err, setup, end, its, sf );
  SVDestroy(ctx);                                          CHKERR(1);

  SpDestroy(mat);
}
/* ---------------------------------------------------------------- */
/* 
     Upwind finite difference discretization for
	  U_xx + U_yy + lambda U_x
*/
SpMat *CD2d( m,lambda )
int    m;
double lambda;
{
  SpMat *f;
  int    n, i, j, row;
  double hinv,hinv2;

  n   = m * m;
  f   = SpCreate( n, n, 5 );

  hinv = 1.0/(m - 1.0);
  hinv2 = hinv*hinv/2.0;

  for (j=0; j<m; j++) {
    for (i=0; i<m; i++) {
      row = i + j * m;
      if (j > 0) {
        SpAddValue( f, -hinv2, row, row - m ); CHKERRV(1,0);
      }
      if (i > 0) {
        SpAddValue( f, -hinv2 - lambda*hinv, row, row - 1 ); CHKERRV(1,0);
      }
      SpAddValue( f, 4.0*hinv2 + lambda*hinv, row, row ); CHKERRV(1,0);
      if (i + 1 < m) {
        SpAddValue( f, -hinv2, row, row + 1 ); CHKERRV(1,0);
      }
      if (j + 1 < m) {
        SpAddValue( f, -hinv2, row, row + m  ); CHKERRV(1,0);
      }
    }    
  }    
  return f;
}
/* ---------------------------------------------------------------- */
/*  
     Builds a random solution 
*/
double *FormSolution(n)
int n;
{
  double *x;
  int    i;
  x = (double *) MALLOC(n*sizeof(double)); CHKPTRV(x,0);
  for ( i=0; i<n; i++ ) x[i] = ((double) random())/2147483647.0;
  return x;
}
/* ---------------------------------------------------------------- */
/*  
     Returns 2 norm of difference of 2 vectors 
*/
double ErrorNorm(n,solution,x)
double *solution,*x;
int    n;
{
  double err;
  DVaxpy(&n, -1.0, solution,x);
  DVnorm(&n, x, &err);
  return err;
}
  
/* Routine to print output line */
PrintInfo( ctx, name, err, ts, te, its, sf )
SVctx  *ctx;
char   *name;
int    its, sf;
double err, ts, te;
{
int flops;
SVGetFlops( ctx, flops );
if (its >= 0) 
    printf("%24s %11.3e %11.3e %11.3e %4.1fMf %4.1fMf %d\n",
	   name, err, ts, te, (double)sf/(1.e6*ts), 
	   (double)(flops)/(1.e6*te), its );
else
    printf("%24s %11.3e %11.3e %11.3e %4.1fMf %4.1fMf\n",
	   name, err, ts, te, (double)sf/(1.e6*ts), 
	   (double)(flops)/(1.e6*te) );
}
