#ifndef lint
static char SCCSid[] = "@(#) ./sparse/row/rblas.c 07/23/93";
#endif

/*
   This file contains some basic sparse operations, including some
   routines to handle elimination with sparse rows that have
   a block structure.  A "Block" is a group of sparse rows that have
   an identical structure (location of non-zeros in columns).  These
   usually do not include the "leading" elements; typically, a block's
   upper left corner is a diagonal of the matrix.
 */
   
#include "tools.h"
#include "sparse/spmat.h"
#include "sparse/sppriv.h"
#include "inline/spops.h"

#define MAX_BASE_PIVOTS 100
static double *(BasePivots[MAX_BASE_PIVOTS]);

void dsctr( sv, y )
SpVec  *sv;
double *y;
{
register int    nz, *i;
register double *v;

i     = sv->i;
v     = sv->v;
nz    = sv->nz;
SCATTER(v,i,y,nz);
}

/* #define DO_SCATTER_GATHER */
#ifdef DO_SCATTER_GATHER
/*
   This is an internal routine that handles the case of eliminating with a 
   block of rows.  This simple version works with groups of four and two.

   Note that we could gather the row to be eliminated into a dense vector
   and do dense-vector ops on it, followed by a scatter.  This would
   be worthwhile if the number of rows to be processed is > 4.

   This routine is currently a mess because several versions have been 
   tried and I want to preserve the code until I have a better handle
   on exactly what to do.
 */
SpiElimWithBlock( rs, ev, pivotblock, blocksize, blockwidth, offsets, boff, 
		  etmp )
register SpVec  **rs;
register double *ev;
register double *etmp;
int             pivotblock, blocksize, blockwidth, *offsets, boff;
{
register int    i, j, pnz, *xi, col;
double   **pivots;

rs  += pivotblock;
xi  = rs[0]->i + offsets[0];
col = xi[0];
pnz = blockwidth;

/* Gather */
for (i=0; i<blocksize; i++) etmp[i] = ev[col + i];
for (;i < pnz; i++) etmp[i] = ev[xi[i]];

if (blocksize > MAX_BASE_PIVOTS)
    pivots = (double **)MALLOC(blocksize*sizeof(double*));
else
    pivots = BasePivots;
for (i=0; i<blocksize; i++) 
    pivots[i] = rs[i]->v + offsets[i] + boff;

SpiElimV( pivots, etmp, blocksize, blockwidth );

if (blocksize > MAX_BASE_PIVOTS)
    FREE( pivots );

/* scatter */
for (i=0; i<blocksize; i++) ev[col + i] = etmp[i];
for (;i < pnz; i++) ev[xi[i]] = etmp[i];
}

#else
/*
   This is an internal routine that handles the case of eliminating with a 
   block of rows.  This simple version works with groups of four and two.

   Note that we could gather the row to be eliminated into a dense vector
   and do dense-vector ops on it, followed by a scatter.  This would
   be worthwhile if the number of rows to be processed is > 4.

   This routine is currently a mess because several versions have been 
   tried and I want to preserve the code until I have a better handle
   on exactly what to do.
 */
SpiElimWithBlock( rs, ev, pivotblock, blocksize, blockwidth, offsets, boff, 
		  etmp )
SpVec  **rs;
double *ev;
double *etmp;
int    pivotblock, blocksize, blockwidth, *offsets, boff;
{
register int    i;
int             *xi;
double          **pivots;

rs  += pivotblock;
xi  = rs[0]->i + offsets[0] + boff;

if (blocksize > MAX_BASE_PIVOTS)
    pivots = (double **)MALLOC(blocksize*sizeof(double*));
else
    pivots = BasePivots;
for (i=0; i<blocksize; i++) 
    pivots[i] = rs[i]->v + offsets[i] + boff;

SpiElimVPerm( pivots, ev, xi, blocksize, blockwidth );

if (blocksize > MAX_BASE_PIVOTS)
    FREE( pivots );
}
#endif


/* 
   FINDPIVOT(pivot,elim,val,idx) finds the elimination value to use from a 
   pivot row and an elim row, using the idx'th element
 */
#define FINDPIVOT(pivot,elim,val,idx) {val = (elim)[idx] * (pivot)[idx];\
(elim)[idx]=val;}

/* 
   ELIMWITHPIVOT(pivot,elim,val,idx) eliminates the idx'th entry 
 */
#define ELIMWITHPIVOT(pivot,elim,val,idx) elim[idx] -= (val)*(pivot[idx])

/* 
   FINDPIVOT2(pivot,elim,val,idx,eidx) finds the elimination value to use from
   a pivot row and an elim row, using the idx'th element
 */
#define FINDPIVOT2(pivot,elim,val,idx,edx) {val = (elim)[edx]*(pivot)[idx];\
(elim)[edx]=val;}

/* 
   ELIMWITHPIVOT2(pivot,elim,val,idx,eidx) eliminates the idx'th entry 
 */
#define ELIMWITHPIVOT2(pivot,elim,val,idx,edx) elim[edx] -= (val)*(pivot[idx])

/* This is the one to use eventually */
/*
   This is like SpiElimWithBlock, but within a block.  Uses blocks of
   two in matrix-matrix mode.  Note that within a block, we don't need 
   to expand the rows out.
 */
SpiElimWithinBlock( rs, pivotblock, blocksize, blockwidth, offsets )
SpVec **rs;
int   pivotblock, blocksize, blockwidth, *offsets;
{
register double a01, a02, a11, a12, *pv0, *pv1, *ev0, *ev1, b1, b2;
register int    i, j, pnz, pi;
double   **pivots;

rs += pivotblock;

if (blocksize > MAX_BASE_PIVOTS)
    pivots = (double **)MALLOC(blocksize*sizeof(double*));
else
    pivots = BasePivots;
for (i=0; i<blocksize; i++) 
    pivots[i] = rs[i]->v + offsets[i];

for (pi=0; pi<blocksize-1; pi += 2) {
    ev0  = pivots[pi];
    ev1  = pivots[pi+1];
    pnz  = blockwidth;
    /* Eliminate two rows at a time WITH two rows at a time */
    for (i=0; i<pi-1; i += 2) {
	pv0    = pivots[i]   + i;
	pv1    = pivots[i+1] + i;

 	b1     = pv0[0];
	b2     = pv1[1];

	a01    = ev0[0] * b1;
	ev0[0] = a01;
	ev0[1] -= a01 * pv0[1];
	a02    = ev0[1] * b2;
	ev0[1] = a02;

	a11    = ev1[0] * b1;
	ev1[0] = a11;
	ev1[1] -= a11 * pv0[1];
	a12    = ev1[1] * b2;
	ev1[1] = a12;
	/* On the Sun4, we need the b1 = ... in order to suppress 
	   redundent loads */
	for (j=2; j<pnz; j++) {
	    b1      = pv0[j];
	    b2      = pv1[j];
	    ev0[j] -= (a01 * b1 + a02 * b2);
	    ev1[j] -= (a11 * b1 + a12 * b2);
	    }
	/* skip to the next block */
	pnz  -= 2;
	ev0  += 2;
	ev1  += 2;
	}

    ev0[0] = 1.0 / ev0[0];
    FINDPIVOT(ev0,ev1,a11,0);
    for (j=1; j<pnz; j++) 
	ELIMWITHPIVOT(ev0,ev1,a11,j);
    ev1[1] = 1.0 / ev1[1];
    }

/* Handle the remaining row by calling the routine to eliminate a single
   row with a block (unpacked) */
if (pi ==  blocksize-1) {
    ev0 = pivots[blocksize-1];
    SpiElimV( pivots, ev0, blocksize-1, blockwidth );
    ev0[blocksize-1] = 1.0 / ev0[blocksize-1];
    }
if (blocksize > MAX_BASE_PIVOTS)
    FREE( pivots );
}

/*
   This are base routines to do eliminations for inplace, dense ops
   
   Input parameters:
   pivots     - array of pointers to the pivot rows (starting from the
                diagonal element).
   elim       - row to eliminate (same structure as pivot rows)
   nblock     - number of pivot rows to use
   blockwidth - number of non-zero columns in block

   On Output, elim is modified to contain the eliminated info.
 */
SpiElimV( pivots, elim, nblock, blockwidth )
double          *(pivots[]);
register double *elim;
int             nblock, blockwidth; 
{
register int    i, j, pnz = blockwidth;
register double *pv0, *pv1, *pv2, *pv3, a1, a2, a3, a4;

for (i=0; i<nblock-3; i += 4) {
    pv0 = pivots[0] + i;
    pv1 = pivots[1] + i;
    pv2 = pivots[2] + i;
    pv3 = pivots[3] + i;
    
    FINDPIVOT(pv0,elim,a1,0);
    ELIMWITHPIVOT(pv0,elim,a1,1);
    FINDPIVOT(pv1,elim,a2,1);
    elim[2] -= (a1 * pv0[2] + a2 * pv1[2]);
    FINDPIVOT(pv2,elim,a3,2);
    elim[3] -= (a1 * pv0[3] + a2 * pv1[3] + a3 * pv2[3]);
    FINDPIVOT(pv3,elim,a4,3);
    for (j=4; j<pnz; j++) 
	elim[j] -= (a1 * pv0[j] + a2 * pv1[j] + a3 * pv2[j] + a4 * pv3[j]);
    /* skip to the next block */
    pnz     -= 4;
    elim    += 4;
    pivots  += 4;
    }
/* This could actually use a switch to choose between 1, 2, or 3 remaining
   elements */
if (i < nblock-1) {
    pv0 = pivots[0] + i;
    pv1 = pivots[1] + i;
    FINDPIVOT(pv0,elim,a1,0);
    ELIMWITHPIVOT(pv0,elim,a1,1);
    FINDPIVOT(pv1,elim,a2,1);
    for (j=2; j<pnz; j++) 
	elim[j] -= (a1 * pv0[j] + a2 * pv1[j]);
    /* skip to the next block */
    pnz     -= 2;
    pivots  += 2;
    elim    += 2;
    i       += 2;
    }
if (i == nblock-1) {
    /* Eliminate the last block */
    pv0 = pivots[0] + i;
    FINDPIVOT(pv0,elim,a1,0);
    for (j=1; j<pnz; j++) 
 	elim[j] -= a1 * pv0[j]; 
    }
}

/*
   This are base routines to do eliminations for inplace, dense ops
   
   Input parameters:
   pivots     - array of pointers to the pivot rows (starting from the
                diagonal element).
   elim       - row to eliminate (same structure as pivot rows)
   xi         - index vector to indirect elim through
   nblock     - number of pivot rows to use
   blockwidth - number of non-zero columns in block

   On Output, elim is modified to contain the eliminated info.
 */
SpiElimVPerm( pivots, elim, xi, nblock, blockwidth )
double          *(pivots[]);
register double *elim;
register int    *xi;
int             nblock, blockwidth; 
{
register int i, j, pnz = blockwidth, col;
register double *pv0, *pv1, *pv2, *pv3, a1, a2, a3, a4;

col = xi[0];
for (i=0; i<nblock-3; i += 4) {
    pv0 = pivots[0] + i;
    pv1 = pivots[1] + i;
    pv2 = pivots[2] + i;
    pv3 = pivots[3] + i;
    
    FINDPIVOT2(pv0,elim,a1,0,col);
    ELIMWITHPIVOT2(pv0,elim,a1,1,col+1);
    FINDPIVOT2(pv1,elim,a2,1,col+1);
    elim[col+2] -= (a1 * pv0[2] + a2 * pv1[2]);
    FINDPIVOT2(pv2,elim,a3,2,col+2);
    elim[col+3] -= (a1 * pv0[3] + a2 * pv1[3] + a3 * pv2[3]);
    FINDPIVOT2(pv3,elim,a4,3,col+3);
    for (j=4; j<pnz; j++) 
	elim[xi[j]] -= (a1 * pv0[j] + a2 * pv1[j] + a3 * pv2[j] + a4 * pv3[j]);
    /* skip to the next block */
    pnz     -= 4;
    col     += 4;
    pivots  += 4;
    xi      += 4;
    }

/* This could actually use a switch to choose between 1, 2, or 3 remaining
   elements */
if (i < nblock-1) {
    pv0 = pivots[0] + i;
    pv1 = pivots[1] + i;
    FINDPIVOT2(pv0,elim,a1,0,col);
    ELIMWITHPIVOT2(pv0,elim,a1,1,col+1);
    FINDPIVOT2(pv1,elim,a2,1,col+1);
    for (j=2; j<pnz; j++) 
	elim[xi[j]] -= (a1 * pv0[j] + a2 * pv1[j]);
    /* skip to the next block */
    pnz     -= 2;
    pivots  += 2;
    col     += 2;
    i       += 2;
    xi      += 2;
    }
if (i == nblock-1) {
    /* Eliminate the last block */
    pv0 = pivots[0] + i;
    FINDPIVOT2(pv0,elim,a1,0,col);
    for (j=1; j<pnz; j++) 
 	elim[xi[j]] -= a1 * pv0[j]; 
    }
}

/*@
    SpGetColIdx - Get the indices of all the (nonzero) columns.
    
    Input parameters:
.   mat     - matrix
.   lidx    - index vector (large enough to hold all columns).  Must be
              at least cols+1 in size.
.   offset  - offset to apply to column indices (columns must lie in
              [0,cols)

    Returns:
    Number of columns containing non-zeros
@*/
int SpGetColIdx( mat, lidx, offset )
SpMat *mat;
int   *lidx, offset;
{
int          i, j, row, *xi, nc, nz, n, fm, idx, m, nzf; 
SpVec        *x, **rs;
SpRowMat     *R = (SpRowMat *)mat->data;

n    = mat->rows;
nc   = mat->cols;
rs   = R->rs;
nzf      = 0;
lidx[nc] = nc;
for (row=0; row<n; row++) {
    x   = rs[row];
    xi  = x->i;
    nz  = x->nz;
    /* This is code from the symbolic factor routines; it is basically a 
       linked list inside storage sufficient for all possible columns */
    while (nz--) {
	fm   = nc;
	idx  = *xi++ - offset;
	do {
	    m  = fm;
	    fm = lidx[m];
	    } while (fm < idx);
	if (fm != idx) {
	    /* insert */
	    lidx[m]   = idx;
	    lidx[idx] = fm;
	    fm        = idx;
	    nzf++;
	    }
	}
    }

/* Repack lidx with contiguous indices; adjust the column numbers */
j  = 0;
fm = lidx[nc];
for (i=0; i<nzf; i++) {
    m  = fm;
    fm = lidx[m];
    lidx[j++] = lidx[m] + offset;
    }
return nzf;
}
