/*--------------------------------------------------------------------------
 Routines to convolve each record of a signal with an impulse response.
--------------------------------------------------------------------------*/
#include <stdio.h>
#include <assert.h>
#include <limits.h>

#include "imath.h"
#include "sig.h"
#include "bfp.h"
#include "arith.h"
#include "fft.h"

static char sccsid[] = "@(#)convolve.c	1.1 7/15/91";

#if 0
/* Print a vector */
void
printem(s, xn, xr, xi)
    char *s;
    int xn;
    short *xr, *xi;
{
    int i;

    if (s) printf(s);
    printf("real ");
    for (i=0; i<xn; i++)
	printf("%d ", xr[i]);
    printf("\n");
    printf("imag ");
    for (i=0; i<xn; i++)
	printf("%d ", xi[i]);
    printf("\n");
}

/* Print a vector in bit-reversed order */
void
printem_bitrev(s, xn, xr, xi)
    char *s;
    int xn;
    short *xr, *xi;
{
    int i;

    if (s) printf(s);
    printf("real ");
    for (i=0; i<xn; i++)
	printf("%d ", xr[bitrev(i, xn)]);
    printf("\n");
    printf("imag ");
    for (i=0; i<xn; i++)
	printf("%d ", xi[bitrev(i, xn)]);
    printf("\n");
}
#endif

/*--------------------------------------------------------------------------
 Convolve (large) array A with (small) array B directly, without FFT.
 Assumes that a is padded beyond its end with blen zeroes.
 Result returned in C, and has same size as A (i.e. it's truncated).

 Also returns shift factor applied to output to prevent overflow.
 [ I suppose an alternative is to clip output to +-SHRT_MAX, but I haven't. ]
--------------------------------------------------------------------------*/
int
cconvolve_direct(ar, ai, alen, br, bi, blen, cr, ci)
    short *ar, *ai;
    int alen;
    short *br, *bi;
    int blen;
    short *cr, *ci;
{
    int i, j;
    int shift = 0;

    assert(alen > 1);
    assert(blen > 1);

    /* Loop over all points of output */
    for (i=0; i<alen; i++) {
	double sr, si;
	int ir, ii;
	sr = 0;
	si = 0;
	/* z = sum over [j=-inf to +inf] of a[j] * b[i-j]
	 * but...
	 * a[j   >=  0] -> sum over [j=0 ... alen-1]
	 * b[i-j >=  0] -> sum over [j=0 ... i]
	 * b[i-j <blen] -> sum over [j=max(0,i-blen+1) ... i]
	 */

	/* printf("i = %d\n", i); */
	for (j=max(0,i-blen+1); j<=i; j++) {
	    sr += ar[j] * br[i - j] - ai[j] * bi[i - j];
	    si += ar[j] * bi[i - j] + ai[j] * br[i - j];
	    /* printf("a[%d] * b[%d] = %d; sr=%f\n", j, i-j, ((ar[j] * br[i-j])>>FRAC_BITS), sr / i_unity);*/
	}

	ir = (int) (sr / i_unity);
	ii = (int) (si / i_unity);

	/*** Block floating point support */
	ir >>= shift;
	ii >>= shift;
	if(ir>SHRT_MAX||ii>SHRT_MAX||ir<-SHRT_MAX||ii<-SHRT_MAX){
	    int dshift;
	    /* Overflow!  Find new scale factor... */
	    for (dshift = 0; 
		ir>SHRT_MAX||ii>SHRT_MAX||ir<-SHRT_MAX||ii<-SHRT_MAX;
		dshift++) {
		    ir >>= 1;
		    ii >>= 1;
	    }
	    shift += dshift;
	    /* ... and shift old results. */
	    for (j=0; j<i; j++) {
		cr[j] >>= dshift;
		ci[j] >>= dshift;
	    }
	}
	/*** End of block floating point support */

	cr[i] = (short) ir;
	ci[i] = (short) ii;
    }

    return shift;
}

/*--------------------------------------------------------------------------
 Convolve (large) array A with (small) array B using fft of size 2^log_n.
 Use overlap-save method.
 size A >> 2^log_n.
 size B < 2^log_n.
 The FFTs are spaced (2^log_n - (size B) + 1) samples apart along A, and
 produce that many usable samples.

 Result returned in C, and has same size as A (i.e. it's truncated).
--------------------------------------------------------------------------*/
void
cconvolve(log_n, ar, ai, alen, br, bi, blen, cr, ci)
    int log_n;
    short *ar, *ai;
    int alen;
    short *br, *bi;
    int blen;
    short *cr, *ci;
{
    int i;
    int dft_size = 1<<log_n;
    int overlap_len = blen - 1;
    int nonoverlap_len = dft_size - blen + 1;
    short *tr, *ti;
    short *btr, *bti;
    int scale_of_bt;

    tr = (short *)malloc(dft_size * sizeof(short));
    ti = (short *)malloc(dft_size * sizeof(short));
    btr = (short *)malloc(dft_size * sizeof(short));
    bti = (short *)malloc(dft_size * sizeof(short));

    assert(alen >= dft_size);
    assert(blen < dft_size);
    assert(blen > 1);

    /* Take FFT of B, save for repeated use. */
    /* printem("B:\n", 1<<log_n, br, bi); */
    bcopy(br, btr, blen * sizeof(short));
    bcopy(bi, bti, blen * sizeof(short));
    bzero(btr+blen, (dft_size-blen) * sizeof(short));
    bzero(bti+blen, (dft_size-blen) * sizeof(short));
    bfp_scale_ct = 0;
    cfft_dif(log_n, btr, bti);
    scale_of_bt = bfp_scale_ct;		/* save & apply later */

    /* printem_bitrev("FFT of B:\n", 1<<log_n, btr, bti); */
    /* printf("Nonoverlap_len is %d, fft size is %d\n", nonoverlap_len, 1<<log_n); */

    for (i=0; i<alen; i += nonoverlap_len) {
	/* Note that convolution is causal, so we need left context.  
	 * Because the output of the circular convolution is not timeshifted, 
	 * we have to throw away the output samples corresponding to the left
	 * context.  Any questions :-?
	 */
	/* Grab this section of input w/ left context.  Pad with zeroes. */
	if (i == 0) {
	    bzero(tr, overlap_len * sizeof(short));
	    bzero(ti, overlap_len * sizeof(short));
	    bcopy(ar+i, tr+overlap_len, nonoverlap_len * sizeof(short));
	    bcopy(ai+i, ti+overlap_len, nonoverlap_len * sizeof(short));
	} else if (i+nonoverlap_len>alen) {
	    int r = alen - i;
	    assert(r > 0);
	    assert(r < dft_size);
	    bcopy(ar+i-overlap_len, tr, r * sizeof(short));
	    bcopy(ai+i-overlap_len, ti, r * sizeof(short));
	    bzero(tr+r, (dft_size-r) * sizeof(short));
	    bzero(ti+r, (dft_size-r) * sizeof(short));
	} else {
	    bcopy(ar+i-overlap_len, tr, dft_size * sizeof(short));
	    bcopy(ai+i-overlap_len, ti, dft_size * sizeof(short));
	}

	/* Take its FFT. */
	/* printf("offset %d: ", i); */
	/* printem("fft input:\n", 1<<log_n, tr, ti); */
	bfp_scale_ct = scale_of_bt;
	cfft_dif(log_n, tr, ti);

	/* Multiply A and B in frequency domain; results to tr,ti. */
	mult_cint16(dft_size, btr, bti, tr, ti);

	/* Reverse FFT. */
	cifft_dif(log_n, tr, ti);
	/* printf("bfp_scale_ct = %d\n", bfp_scale_ct); */

	bfp_normalize_result(1<<log_n, tr, ti);
	/* printem("ifft output:\n", 1<<log_n, tr, ti); */

	/* And copy results to output area. */
	bcopy(tr+overlap_len, cr+i, min(nonoverlap_len,alen-i) * sizeof(short));
	bcopy(ti+overlap_len, ci+i, min(nonoverlap_len,alen-i) * sizeof(short));
    }

    free(tr);
    free(ti);
    free(btr);
    free(bti);
}

/*-----------------------------------------------------------------------------
 Run arrays thru cconvolve, return result.
 This should be larger than that.
 fftsize is log2 of size of fft to use when convolving; 0 -> direct convolution.
------------------------------------------------------------------------------*/
sig_t *
sig_cconvolve(this, that, fftsize)
    sig_t *this;
    sig_t *that;
    int fftsize;
{
    sig_t *sig;
    int nrec = this->nrec;
    int reclen = this->reclen;
    int offset;
    int n;

    /* 2nd (small) input must be one record long. */
    if (that->nrec != 1) {
	fprintf(stderr, "convolve: NOS must be a single record, but nrec=%d\n",
	    that->nrec);
	exit(1);
    }

    samekind("sig_cconvolve", this, that);

    /* Copy dimensions and allocate space.  We'll truncate convolution result
     * to same size as 1st (large) input.
     */
    sig = sig_new(this->nrec, this->reclen, this->kind, this->fname);

    switch (this->kind) {
    case hdr_CINT16:
	for (n=0, offset=0; n<nrec; n++, offset+=reclen) {
	    printf("."); fflush(stdout);
	    if (fftsize > 1) {
		cconvolve(ilog2(fftsize), 
		    this->re+offset, this->im+offset, this->reclen,
		    that->re, that->im, that->reclen, 
		    sig->re+offset, sig->im+offset);
	    } else {
		int shift = cconvolve_direct( 
		    this->re+offset, this->im+offset, this->reclen,
		    that->re, that->im, that->reclen, 
		    sig->re+offset, sig->im+offset);
		if (shift) {
		    fprintf(stderr, "sig_cconvolve: overflow in record %d.\n",n);
		    exit(1);
		}
	    }
	}
	break;

    case hdr_CBFP16:
	for (n=0, offset=0; n<nrec; n++, offset+=reclen) {
	    printf("."); fflush(stdout);
	    if (fftsize > 1) {
		cconvolve(ilog2(fftsize), 
		    this->re+offset, this->im+offset, this->reclen,
		    that->re, that->im, that->reclen, 
		    sig->re+offset, sig->im+offset);
		sig->exp[n] = this->exp[n] + that->exp[n];
	    } else {
		int shift = cconvolve_direct( 
		    this->re+offset, this->im+offset, this->reclen,
		    that->re, that->im, that->reclen, 
		    sig->re+offset, sig->im+offset);
		sig->exp[n] = this->exp[n] + that->exp[n] + shift;
	    }
	}
	break;
    default:
	badkind("sig_cconvolve", this->kind);
    }

    return sig;
}
