/*--------------------------------------------------------------------------
 Transposition operator.
--------------------------------------------------------------------------*/
#include <stdio.h>
#include <math.h>

#include "sig.h"
#include "arith.h"

static char sccsid[] = "@(#)transpose.c	1.1 7/15/91";

/*--------------------------------------------------------------------------
 Transpose a rectangular array.
 Input is nreci records of length recleni (i.e. pi[nrec, reclen])
 Output is recleni records of length nreci (i.e. po[reclen, nrec])
--------------------------------------------------------------------------*/
void
transpose_int16(nreci, recleni, pi, po)
    int nreci;
    int recleni;
    short *pi, *po;
{
    int a, b;
    int b_x_nreci;

    /* Optimized; performs po[b,a]=pi[a,b], knowing that rightmost
     * index varies fastest.
     */
    for (a=0; a<nreci; a++)
	for (b_x_nreci=a, b=recleni; b--; b_x_nreci += nreci)
	    po[b_x_nreci] = *pi++;
}

void
transpose_float32(nreci, recleni, pi, po)
    int nreci;
    int recleni;
    float *pi, *po;
{
    int a, b;
    int b_x_nreci;

    /* Optimized; performs po[b,a]=pi[a,b], knowing that rightmost
     * index varies fastest.
     */
    for (a=0; a<nreci; a++)
	for (b_x_nreci=a, b=recleni; b--; b_x_nreci += nreci)
	    po[b_x_nreci] = *pi++;
}

/*--------------------------------------------------------------------------
 Perform a running corner-turn on the signal.

 In a hardware corner turn system, the input is processed as follows:
 allocate nframes_per_blk buffers
 bufnum=0
 repeat {
    bufnum = (bufnum + 1) mod nframes_per_blk
    input nrec_per_frame records into buffer bufnum
    concatenate buffers (bufnum+1, ..., bufnum), transpose, and output. 
 }
 To get naive transposition of a square array, set nframes_per_blk to 1 
 and nrec_per_blk to reclen.

 Thus, a stream of nrec=n records with reclen=m gets turned into 
 f = n/nrec_per_frame frames; this is interpreted as
 b = f-(nframes_per_blk-1) overlapping blocks.
 Each block is nframes_per_blk * nrec_per_blk records long, and
 is transposed before being output.  Thus the output has b * m
 records and each record has length nframe_per_blk * nrec_per_frame.

 Partial output blocks at the end are discarded.

 Example: if TOS=
    1a 1b 1c 1d
    2a 2b 2c 2d
    3a 3b 3c 3d
    4a 4b 4c 4d
    5a 5b 5c 5d
    6a 6b 6c 6d

 transpose 4 1 would yield
    1a 2a 3a 4a
    1b 2b 3b 4b
    1c 2c 3c 4c
    1d 2d 3d 4d

 transpose 2 2 would yield
    1a 2a 3a 4a
    1b 2b 3b 4b
    1c 2c 3c 4c
    1d 2d 3d 4d

    3a 4a 5a 6a
    3b 4b 5b 6b
    3c 4c 5c 6c
    3d 4d 5d 6d

 transpose 2 1 would yield
    1a 2a
    1b 2b
    1c 2c
    1d 2d

    3a 4a
    3b 4b
    3c 4c
    3d 4d

    5a 6a
    5b 6b
    5c 6c
    5d 6d

 and transpose 1 1 would yield
    1a 
    1b
    1c
    1d
    2a 
    .
    .
    .
    5c
    5d

 This operator could be thought of as producing a 3-d output array.
--------------------------------------------------------------------------*/

void
sig_transpose(this, nrec_per_frame, nframe_per_blk)
    sig_t *this;
    int nrec_per_frame;
    int nframe_per_blk;
{
    int nrec = this->nrec;
    int reclen = this->reclen;
    char *thisre = (char *)this->re;
    char *thisim = (char *)this->im;

    int nframes = nrec / nrec_per_frame;
    int framebytes = nrec_per_frame * this->reclen * sig_ELBYTES(this);
    int nblks = nframes - (nframe_per_blk - 1);
    int nrec_per_blk =  nrec_per_frame * nframe_per_blk;
    int blkbytes = nrec_per_blk * this->reclen * sig_ELBYTES(this);

    sig_t *sig;
    char *sigre;
    char *sigim;
    int blk;

    if (nblks <= 0) {
	fprintf(stderr, "sig_transpose: no output blks\n");
	exit(1);
    }
    sig = sig_new(nblks * reclen, nrec_per_frame*nframe_per_blk, 
	this->kind, this->fname);
    sigre = (char *)sig->re;
    sigim = (char *)sig->im;

    /* Scan thru input array, stepping by framesize; scan thru
     * output array, stepping by blocksize.  
     * Output is nframe_per_blk times larger than input.
     */
    switch (this->kind) {
    case hdr_CINT16:
	for (blk=0; blk<nblks; blk++,thisim+=framebytes,sigim+=blkbytes) 
	    transpose_int16(nrec_per_blk,reclen,(short*)thisim,(short*)sigim);
	/* fall thru */
    case hdr_INT16:
	for (blk=0; blk<nblks; blk++,thisre+=framebytes,sigre+=blkbytes) 
	    transpose_int16(nrec_per_blk, reclen,(short*)thisre,(short*)sigre);
	break;

    case hdr_CFLOAT32:
	for (blk=0; blk<nblks; blk++,thisim+=framebytes,sigim+=blkbytes) 
	    transpose_float32(nrec_per_blk, reclen, thisim, sigim);
	/* fall thru */
    case hdr_FLOAT32:
	for (blk=0; blk<nblks; blk++,thisre+=framebytes,sigre+=blkbytes) 
	    transpose_float32(nrec_per_blk, reclen, thisre, sigre);
	break;
    default:
	badkind("sig_transpose", this->kind);
    }

    free(this->re);  this->re = sig->re;
    if (this->kind & hdr_KIND_COMPLEX) {
	free(this->im);  this->im = sig->im;
    }
    this->nrec = sig->nrec;
    this->reclen = sig->reclen;
    free(sig);
}
