#ifndef lint
static char SCCSid[] = "@(#) ./sparse/addelms.c 07/23/93";
#endif

/*
    This file contains routines to form a matrix (in general format) by
    adding multiple elements in a single row at a time.

    If necessary, additional routines that add in a dense submatrix will be
    added.

    These routines where added because the assembly of a 3-d FEM matrix using
    SpAddValue was far too time consuming (often meeting or exceeding the
    time to setup and solve the linear system).  The significant advantages
    of this approach are
      (a) fewer memory allocations as a row is extended
      (b) fewer copies as the insertion method is O(nzr^2) where nzr is the
          number of non-zeros in a row (this is a feature of the linear
          storage for the elements).  Using a multiple element approach
          reduces this cost to O(na*nzr), where na is the number of calls
          to SpAddRowToRow
      (c) In order to further reduce the cost of allocation of storage,
          a "cache" is maintained of the work area.  This can be enabled
          and disabled (thereby recovering the used space) or ignored, in
   	  which case space is allocated on each call.

    Finally, because this routine works with multiple elements at a time,
    it is reasonable to use the general-purpose scatter/gather routines
    to manipulate the row.  
 */
#include "tools.h"
#include "sparse/spmat.h"
#include "sparse/sppriv.h"

/* Here is the local cache */
static int    AddElmCacheEnabled = 0;
static int    *AddElmXI = 0, AddElmNz = 0;
static double *AddElmXV = 0;

void SpAddElmCacheEnable()
{
AddElmCacheEnabled++;
}
void SpAddElmCacheDisable()
{
AddElmCacheEnabled--;
if (AddElmCacheEnabled < 0) AddElmCacheEnabled = 0;	
if (!AddElmCacheEnabled && AddElmXV) {
    FREE( AddElmXV );
    AddElmXV = 0;
    }
}
	
/*@
    SpAddRowToRow - Adds a row into a sparse matrix.

    Input parameters:
.    mat   - matrix to add row to
.    row   - row index
.    n     - number of values to add
.    v     - values
.    c     - columns, these MUST be in increasing order.

    Notes:  
    By add, we mean add.  Entries will be summed into the matrix.
    Values must be sorted in order of increasing column.

    For optimal performance when building a matrix, use to following
$   SpAddElmCacheEnable();
$   ... loop that calls SpAddRowToRow
$   SpAddElmCacheDisable();
$    
    Note also that this routine is basically a generic form of
    SpGatherAddToRow.
 @*/
void SpAddRowToRow( mat, row, n, v, c )    
SpMat           *mat;
int             row;
register int    n, *c;
register double *v;
{
register int    nz, *xi, nzo, *xio, 
                col, ocol;        /* used to hold *c and *xi resp. */
register double *xv, *xvo;	
int             Nz, *Xi;
double          *Xv;

SPLITTOMAT(mat);

if (n == 0) return;

/* We get the values into temps so that we can use register variable for the
   actual operations (you can't take the address of a register variable) */
SpScatterFromRow( mat, row, &Nz, &Xi, &Xv );
nz = Nz;
xv = Xv;
xi = Xi;
if (nz == 0) {
    /* Special case for empty row */
    SpRReplaceRow( mat, row, n, c, v );
    return;
    }

/* Allocate workspace as necessary */
/* An alternate possibility here is to allocate INTO THE MATRIX enough
   space to contain the row.  Then we can eliminate the second copy.
   Another is to not sort the elements until the cache is disabled; this
   allows us to use better sorting techniques (this version still requires
   O(nz)*number-of-inserts-per-row time, rather than O(nz) */
if (AddElmCacheEnabled) {
    /* We always allocate enough for the case of no overlap between current
       and added row */	
    if (AddElmNz < nz + n) {
    	if (AddElmXV) {
    	    FREE( AddElmXV );
    	    }
    	AddElmXV = (double *)MALLOC( (nz + n)*(sizeof(double) + sizeof(int)) );
    	CHKPTR(AddElmXV);
    	AddElmXI = (int *)(AddElmXV + nz + n);
    	AddElmNz = nz + n;
        }
    xvo = AddElmXV;
    xio = AddElmXI;
    }
else {
    xvo = (double *)MALLOC( (nz + n) * (sizeof(double) + sizeof(int)) );
    CHKPTR(xvo);
    xio = (int *)(xvo + nz + n);	
    }

/*  Merge the two lists into xvo/xio/nzo */
nzo = 0;
/* Optimizations that were taken here:
   replace *c with col and *xi with ocol; then the comparisions
   can be done with fewer loads, depending on the compiler.
   The tests on nz and n werer moved to where they are decremented, saving
   one test on average.
 */   
col  = *c;
ocol = *xi;
while (1) {
    if (col < ocol) {
	xvo[nzo] = *v++; xio[nzo++] = col; col = *++c; n--;
	if (n == 0) break;
	}
    else if (col > ocol) {
	xvo[nzo] = *xv++; xio[nzo++] = ocol; ocol = *++xi; nz--;
	if (nz == 0) break;
	}
    else {
    	/* Columns equal, so add the elements together */
    	xvo[nzo]   = *v++ + *xv++;
    	xio[nzo++] = col; col = *++c;
    	ocol = *++xi;
    	n--;
    	nz--;
	if (n == 0 || nz == 0) break;
        }
    }
/* Add the remaining elements */
while (nz-- > 0) {
    xio[nzo]   = *xi++;
    xvo[nzo++] = *xv++;
    }
while (n-- > 0) {
    xio[nzo]   = *c++;
    xvo[nzo++] = *v++;
    }

/* Need to make sure that Gather is defined as a REPLACE row routine */
/* For now... */
SpRReplaceRow( mat, row, nzo, xio, xvo );

/* Free the workspace if necessary */    
if (!AddElmCacheEnabled) {
    FREE( xvo );
    }
}

/*
   For simplicity, there should also be a routine that does not assume sorted
   rows.  Because of the added overhead of the sorting, we may consider
   doing a dense submatrix (computing the sort permutation and then applying
   it to each row).

   In addition, we may want to allow for "invalid" column numbers for entries
   that are "missing" (for example, boundary nodes).

   The goal here should be to make it easy to assemble a matrix from the sum
   of element contributions.
 */

/*@
   SpAddDenseSubmatrix - Adds a dense submatrix into a matrix

   Input Parameters:
.  mat - Matrix to add to
.  dn  - pointer to dense submatrix
.  nr,nc - dn is (nr,nc) matrix
.  nrd  - declarse row dimension of dn (usually nr)
.  irow - irow[i] = row of mat that i'th row of dn is added to
.  icol - icol[j] = column of mat that j'th column of dn is added to
@*/
void SpAddDenseSubmatrix( mat, dn, nr, nc, nrd, irow, icol )
SpMat  *mat;
double *dn;
int    nr, nc, nrd, *irow, *icol;
{
register double *v;
register int    *iperm, *c, i, j, *dnoff;

v     = (double *)MALLOC( nc * (sizeof(double) + 3*sizeof(int)) ); CHKPTR(v);
iperm = (int *)(v + nc);
c     = iperm + nc;
dnoff = c + nc;

for (i=0; i<nc; i++) iperm[i] = i;
SYiIsortPerm( nc, icol, iperm );
for (i=0; i<nc; i++) c[i] = icol[iperm[i]];

for (i=0; i<nc; i++) 
    dnoff[i] = iperm[i]*nrd;

for (j=0; j<nr; j++) {
    /* Copy the values into the temporary storage */
    for (i=0; i<nc; i++) 
	v[i] = dn[j + dnoff[i]];
    SpAddRowToRow( mat, irow[j], nc, v, c );
    }
FREE( v );
}
