/*****************************************************************************/
/* This software module was originally developed by                          */
/*   Naoki Iwakami (NTT)                                                     */
/* and edited by                                                             */
/*   Naoki Iwakami (NTT) on 1997-07-17,                                      */
/* in the course of development of the                                       */
/* MPEG-2 NBC/MPEG-4 Audio standard ISO/IEC 13818-7, 14496-1,2 and 3.        */
/* This software module is an implementation of a part of one or more        */
/* MPEG-2 NBC/MPEG-4 Audio tools as specified by the MPEG-2 NBC/MPEG-4 Audio */
/* standard. ISO/IEC  gives users of the MPEG-2 NBC/MPEG-4 Audio standards   */
/* free license to this software module or modifications thereof for use in  */
/* hardware or software products claiming conformance to the MPEG-2 NBC/     */
/* MPEG-4 Audio  standards. Those intending to use this software module in   */
/* hardware or software products are advised that this use may infringe      */
/* existing patents. The original developer of this software module and      */
/* his/her company, the subsequent editors and their companies, and ISO/IEC  */
/* have no liability for use of this software module or modifications        */
/* thereof in an implementation. Copyright is not released for non           */
/* MPEG-2 NBC/MPEG-4 Audio conforming products. The original developer       */
/* retains full right to use the code for his/her  own purpose, assign or    */
/* donate the code to a third party and to inhibit third party from using    */
/* the code for non MPEG-2 NBC/MPEG-4 Audio conforming products.             */
/* This copyright notice must be included in all copies or derivative works. */
/* Copyright (c)1996.                                                        */
/*****************************************************************************/

#include <stdio.h>  /* added by K.Mano */
#include <math.h>
#include "ntt_conf.h"
#include "ntt_tools.h"


/* --- ntt_alfcep ---

******************************************************************
*     LPC / PARCOR / CSM / LSP   subroutine  library     # 08    *
*               coded by S.Sagayama,                1/08/1976    *
******************************************************************
 ( C version coded by S. Sagayama,  6/19/1986 )

   description:
     * conversion of "alf  into "cep".
     * computation of cepstum of AR-process specified by "alf".
     * computation of sum of m-th power of LPC poles, m=1,..,n,
       since:         1     IP                m
             cep(m)= --- * sum ( (LPC pole(i))  ).
                      m    i=1

   synopsis:
          -------------------------
          ntt_alfcep(ip,alf,cep,n)
          -------------------------
   IP      : input.        integer.
             the order of analysis; the number of poles in LPC;
             the degree of freedom of the model - 1.
   alf[.]  : input.        double array : dimension=IP.
             linear prediction coefficients; ar parameters.
             alf[0] is implicitly assumed to be 1.0.
   cep[.]  : output.       double array : dimension=n.
             LPC (all-pole modeled) cepstum.
             cep[0] is implicitly assumed to be alog(resid/pi),
             where "resid" is residual power of LPC/PARCOR.
   n       : input.        integer.
             the number of required points of LPC-cepstum.
             note that the degree of freedom remains IP.
*/

void ntt_alfcep(int p,        /* Input : the number of poles in LPC */
                double alf[], /* Input : linear prediction coefficients */
                double cep[], /* Output : LPC (all-pole modeled) cepstum */
                int n)        /* Input : the number of required points of LPC-cepstum */

{ double ss; int i,m;
  cep[1]= -alf[1]; if(p>n) p=n;
  for(m=2;m<=p;m++)
  { ss= -alf[m]*m; for(i=1;i<m;i++) ss-=alf[i]*cep[m-i]; cep[m]=ss; }
  for(m=p+1;m<=n;m++)
  { ss=0.0; for(i=1;i<=p;i++) ss-=alf[i]*cep[m-i]; cep[m]=ss; }
  for(m=2;m<=n;m++) cep[m]/=m; }


/* --- ntt_alflsp ---

******************************************************************
*     LPC / PARCOR / CSM / LSP   subroutine  library     # 40    *
*               coded by S.Sagayama,                 5/5/1982    *
******************************************************************
 ( C version coded by S.Sagayama )
 ( revised (convergence trial loop) by S.Sagayama, 6/24/1987 )

   description:
     * conversion of "alf" into "lsp".
     * computation of line spectrum pair frequencies from linear
       prediction coefficients.
     * computation of roots of p(x) and q(x):
                              p
           P(x) = z * a(z) - z   * a(1/z)
                              p
           Q(x) = z * a(z) + z   * a(1/z)  
       where
                   p           p-1
           A(z) = z   + a[1] * z     + .... + a[p].
       in case p=even, the roots of p(x) are cosine of: 
          0, freq[2], freq[4], ... , freq[p];
       and the roots of p(x) are cosine of:
          freq[1], freq[3], ... ,freq[p-1], pi.
     * the necesary and sufficient condition for existence of
       the solution is that all the roots of polynomial A(z) lie
       inside the unit circle.

   synopsis:
          ------------------------
          void ntt_alflsp(p,alf,freq)
          ------------------------
   p      : input.        integer.     2 =< p =< 14.
             the order of analysis; the number of poles in LPC;
   alf[.]  : input.        double array : dimension=p.
             linear prediction coefficients; ar parameters.
             alf[0] is implicitly assumed to be 1.0.
   freq[.] : output.       double array : dimension=p.
             LSP frequencies, ranging between 0 and 1;
             CSM frequencies under two diferrent conditions,
               p=even:   freq(0)=0,order=n / freq(n+1)=1,order=n
               p=odd:    order=n / freq(0)=0,freq(n+1)=1,order=n-1,
             where n=[(p+1)/2].
             increasingly ordered.  freq(1)=<freq(2)=<.....

   note: (1) p must not be greater than 20. (limiation in "ntt_excheb")
         (2) subroutine call: "ntt_excheb", "ntt_nrstep".
*/

void ntt_alflsp(/* Input */
                int    p,      /* LSP analysis order */
	        double alf[],  /* linear predictive coefficients */
                /* Output */
	        double fq[] )  /* LSP frequencies, 0 < fq[.] < pi */
{
  int i,j,k,km,kp,nm,np,flag;
  double b,x,y,eps,opm[50],opp[50],opm1[50],opp1[50];
  static int p0=0;
  static double tbl[200],eps0=0.00001;
  if(p>p0) { p0=p; ntt_chetbl(tbl,(p+1)/2); } /* making Chebyshev coef table */

  np=p/2; nm=p-np;
  if(nm==np) /* ---- in case of p=even ---- */
  { opp[1]=alf[1]-alf[p]+1.0; opm[1]=alf[1]+alf[p]-1.0;
    for(i=2;i<=nm;i++)
    { b=alf[p+1-i]; opp[i]=alf[i]-b+opp[i-1]; opm[i]=alf[i]+b-opm[i-1]; } }
  else /* ---- in case of p=odd ---- */
  { opm[1]=alf[1]+alf[p];
    if(nm>1)
    { opp[1]=alf[1]-alf[p]; opm[2]=alf[2]+alf[p-1];
      if(nm>2)
      { opp[2]=alf[2]-alf[p-1]+1.0;
        for(i=3;i<=nm;i++) opm[i]=alf[i]+alf[p+1-i];
        for(i=3;i<=np;i++) opp[i]=alf[i]-alf[p+1-i]+opp[i-2]; } } }
  if(nm>1) ntt_excheb(np,opp,opp,tbl); ntt_excheb(nm,opm,opm,tbl);
  if(p==1) fq[p]= -opm[1];
  else if(p<=2) { fq[p-1]= -opm[1]; fq[p]= -opp[1]; }
  else /* ---- find roots of the polynomials ---- */
  { eps=eps0;
    for(k=0;k<6;k++) /* trying 6 times while LSP invalid */
    { for(i=1;i<=nm;i++) opm1[i]=opm[i];
      for(i=1;i<=np;i++) opp1[i]=opp[i];
      kp=np; km=nm; j=0; x=1.0; y=1.0; flag=1;
      for(i=1;i<=p;i++)
      { if((j=1-j)!=0) { ntt_nrstep(opm1,km,eps,&x); km--; }
        else { ntt_nrstep(opp1,kp,eps,&x); kp--; }
        if(x>=y || x<= -1.0) { flag=0; break; }
        else { y=x; fq[i]=x; } }
      if(flag) break;
      else { eps*=0.5; fprintf(stderr,"ntt_alflsp(%d)",k); } /* 1/2 criterion */
  } }
  for(i=1;i<=p;i++) fq[i]=acos(fq[i]);
}


/* --- ntt_alfref ---

******************************************************************
*     LPC / PARCOR / CSM / LSP   subroutine  library     # 03    *
*               coded by S.Sagayama,              autumn/1975    *
******************************************************************
 ( C version coded by S.Sagayama, 6/19/1986 )

   description:
     * conversion of "alf" into "ref".
     * discrimination of stability of all-pole filter specified
       by "alf".  If every ref[.] satisfies -1<ref[.]<1.
     * discrimination if sequence "alf" is of minimum phase or not.

   synopsis:
          ------------------------
          ntt_alfref(p,alf,ref,&resid)
          ------------------------
   p       : input.        integer.
             the order of analysis; the number of poles in LPC;
             the degree of freedom of the model - 1.
   alf[.]  : output.       double array : dimension=p+1.
             linear prediction coefficients; AR parameters.
             alf[0] is implicitly assumed to be 1.0.
   ref[.]  : output.       double array : dimension=p+1.
             PARCOR coefficients; reflection coefficients.
             all of ref[.] range between -1 and 1.
   resid   : output.       double.
             linear prediction / PARCOR residual power;
             reciprocal of power gain of PARCOR/LPC/LSP all-pole
             filter.
*/

void ntt_alfref(/* Input */
                int p,          /* the number of poles in LPC */
                double alf[],   /* linear prediction coefficients */
                /* Output */
                double ref[],   /* reflection coefficients */
                double *_resid) /* linear prediction / PARCOR residual power */
{ int i,j,n; double r,rr,u;
  *_resid=1.0;
  for(n=1;n<=p;n++) ref[n]=alf[n];
  for(n=p;n>0;n--)
  { r=(ref[n]= -ref[n]); u=(1.0-r)*(1.0+r);
    i=0; j=n;
    while(++i <= --j)
    { rr=ref[i]; ref[i]=(rr+r*ref[j])/u;
      if(i<j) ref[j]=(ref[j]+r*rr)/u; }
    *_resid*=u; } }


/* --- ntt_cep2alf ---
******************************************************************
*/
/* LPC cep to alf by solving normal equation */
/*
 mata' * mata * alf = - mata' * cep
 mata =  1         0         0       0
	 c[1]/2    1         0       0
         c[2]*2/3  c[1]/3    1       0
         c[3]*3/4  c[2]*2/4  c[1]/4  1
            ...........
         c[n]*(n-1)/n  .....         c[n-p]*(n-p)/n

 cep'  = c[0],c[1], c[2],      c[n]
 alf'  = alf[1],alf[1], alf[2] .. alf[p]


*/

#define LPC_MAX  (20+1)
#define MAT_MAX  (40+1)

void ntt_cholesky(/* In/Out */
                  double a[],
                  /* Output */
                  double b[], 
                  /* Input */
                  double c[],
                  int n)     
{ 
  int i,j,k;
  double t[LPC_MAX*LPC_MAX], invt[LPC_MAX];
  register double acc;
  static double eps=1.e-16;

  t[0] = sqrt(a[0]+eps);
  invt[0] = 1./t[0];
  for(k=1; k<n; k++) t[k*n] = a[k*n] * invt[0];
  for(i=1;i<n;i++) {
     acc = a[i*n+i]+eps;
     for(k=0; k<i; k++) acc -= t[i*n+k] * t[i*n+k];
     t[i*n+i] = sqrt(acc);
     invt[i] = 1./t[i*n+i] ;
     for(j=i+1;j<n;j++){
        acc = a[j*n+i]+eps;
        for(k=0; k<i; k++) acc -= t[j*n+k] * t[i*n+k];
	t[j*n+i] = acc * invt[i];
     }
  } 
  for(i=0;i<n;i++) {
     acc = c[i];
     for(k=0; k<i; k++) acc -= t[i*n+k] * a[k];
     a[i] = acc * invt[i];
  } 
  
  for(i=n-1;i>=0;i--) {
     acc = a[i];
     for(k=i+1; k<n; k++) acc -= t[k*n+i] * b[k];
     b[i] = acc * invt[i];
  } 
}


void ntt_cep2alf(/* Input */
              int npc, 		/* Cepstrum order */
	      int np, 		/* LPC order      */
	      double *cep, 	/* LPC cepstrum cep[0] = cep_1 */
              /* Output */
	      double *alf)     	/* LPC coefficients alf[0] =alf_1     */

{
 double  mata[LPC_MAX*MAT_MAX]; 
 double  matb[LPC_MAX*LPC_MAX], matc[LPC_MAX];
 register double acc;
 int inp, inpc, k;
 double invpc; 
 int inpcc; 

    for(inpc=0; inpc<npc; inpc++){
      invpc= 1./(double)(inpc+1);
      inpcc = (inpc < np) ? inpc:np;
      for(inp=0; inp<inpcc; inp++) {  
         mata[inpc*np + inp ] = (double)(inpc-inp)*invpc*cep[inpc-inp];
      }
    }

    for(inp=0;inp<np;inp++) {
        for(inpc=0;inpc<inp;inpc++) { 
	  acc=mata[inp*np+inpc]; 
          for(k=inp+1;k<npc;k++) acc+=mata[k*np+inp]*mata[k*np+inpc]; 
	  matb[inp*np+inpc]=acc; 
        }
	acc=1.; 
        for(k=inp+1;k<npc;k++) acc+=mata[k*np+inp]*mata[k*np+inp]; 
	matb[inp*np+inpc]=acc; 
       
        acc=cep[inp+1];
	for(inpc=inp+1;inpc<npc;inpc++) acc+=mata[inpc*np+inp] * cep[inpc+1];
	matc[inp] = -acc;
    }
    alf[0]=1.0;
    ntt_cholesky(matb, alf+1, matc, np);
}


/* --- ntt_chetbl ---

******************************************************************
*     LPC / PARCOR / CSM / LSP   subroutine  library     # 41    *
*               coded by S.Sagayama,                 5/5/1982    *
******************************************************************
 ( C version coded by S.Sagayama, 2/28/1987 )

   description:
     * makes a Tchebycheff (Chebyshev) polynomial coefficient
       table.  It is equivalent to the expansion of cos(nx)
       into polynomials of (cos x).  It is given by:
                   [k/2]     i  k-2i-1  n (k-i-1)!         k-2i
         cos k x =  sum  (-1)  2       ------------ (cos x)
                    i=0                 i! (k-2i)!
                         --------------------------
       (this function computes this (^ t[k,i]) for k=0..n; i=0..[k/2].)

       t[k,i+1] = - t[k,i] * (k-2i)(k-2i-1)/4(i+1)(k-i-1)

     * makes a table for
       expansion of a linear combination of Chebycheff polynomials
       into a polynomial of x:  suppose a linear combination of
       Tchebycheff(Chebyshev) polynomials:

           S(x) = T(x,n) + a[1] * T(x,n-1) + .... + a[n] * T(x,0)

       where T(x,k) denotes k-th Tchebycheff polynomial of x,
       then, expand each Chebycheff polynomial and get a polynomial
       of x:
                   n           n-1
           S(x) = x  + b[1] * x    + .... + b[n].

     * this problem is equivalent to the conversion of a linear
                         k        k
       combination of ( z  + 1 / z  ) into a polynomial of
       ( z + 1/z ).
     * this problem is equivalent to the conversion of a linear
       combination of cos(k*x), k=1,...,n, into a polynomial of cos(x).
     * table contents:
        0)    1/2
        1)          1  
        2)      2      -1
        3)          4     -3  
        4)      8      -8     1
        5)         16    -20     5
        6)     32     -48    18    -1
        7)         64   -112    56    -7
        8)    128    -256   160   -32     1
        9)        256   -576   432  -120     9
       10)    512   -1280  1120  -400    50    -1
                  ..................................

   synopsis:
          -------------
          ntt_chetbl(coef,n)
          -------------
   n       : input.        integer.   n =< 10.
   tbl[.]  : output.       double array : dimension=~= (n+1)(n+2)/4
             Chebyshev (Tchebycheff) polynomial coefficients.
*/

void ntt_chetbl(/* Output */
                double tbl[], /* Chebyshev (Tchebycheff) polynomial coefficients */
                /* Input */
                int n)

{ int i,j,k,l,m; double p,t;
  k=0; p=0.5;
  for(i=0;i<=n;i++)
  { t=p; p*=2.0; l=i/2; m=0;
    for(j=0;j<=l;j++)
    { tbl[k++]=t; if(j<l) { t*=(m-i)*(i-m-1); t/=(j+1)*(i-j-1)*4; m+=2; }
  } }
}


/*--- ntt_corref ---

******************************************************************
*     LPC / PARCOR / CSM / LSP   subroutine  library     # 01    *
*               coded by S.Sagayama,           september/1976    *
******************************************************************
 ( C version coded by S.Sagayama; revised,  6/20/1986, 2/4/1987 )

 - description:
   * conversion of "cor" into "ref".
     "" is simultaneously obtained.
   * computation of PARCOR coefficients "ref" of an arbitrary
     signal from its autocorrelation "cor".
   * computation of orthogonal polynomial coefficients from 
     autocorrelation function.
   * recursive algorithm for solving toeplitz matrix equation.
     example(p=3):  solve in respect to a1, a2, and a3.
         ( v0 v1 v2 )   ( a1 )     ( v1 )
         ( v1 v0 v1 ) * ( a2 ) = - ( v2 )
         ( v2 v1 v0 )   ( a3 )     ( v3 )
     where v0 = 1, vj = cor(j), aj = (j).
   * recursive computation of coefficients of a polynomial:(ex,p=4)
               | v0   v1   v2   v3   |    /      | v0   v1   v2 |
     A(z)= det | v1   v0   v1   v2   |   /   det | v1   v0   v1 |
               | v2   v1   v0   v1   |  /        | v2   v1   v0 |
               | 1    z   z**2 z**3  | /       
     where A(z) = z**p + (1) * z**(p-1) + ... + (p).
     note that the coefficient of z**3 is always equal to 1.
   * Gram-Schmidt orthogonalization of a sequence, ( 1, z, z**2,
     z**3, ... ,z**(2n-1) ), on the unit circle, giving their inner
     products:
                       k    l
          v(k-l) = ( z  , z   ),    0 =< k,l =< p.
     where v(j) = cor(j), v(0) = 1.
     coefficients of p-th order orthogonal polynomial are obtained
     through this subroutine. ((1),...,(p))
   * computation of reflection coefficients ref(i) at the boundary
     of the i-th section and (i+1)-th section in acoustic tube
     modeling of vocal tract.
   * the necesary and sufficient condition for existence of
     solution is that toeplitz matrix ( v(i-j) ), i,j=0,1,... 
     be positive definite.

 - synopsis:
          ----------------------------
          ntt_corref(p,cor,alf,ref,&resid)
          ----------------------------
   p       : input.        integer.
             the order of analysis; the number of poles in LPC;
             the degree of freedom of the model - 1.
   cor[.]  : input.        double array : dimension=p+1
             autocorrelation coefficients.
             cor[0] is implicitly assumed to be 1.0.
   alf[.]  : output.       double array : dimension=p+1
             linear prediction coefficients; AR parameters.
             [0] is implicitly assumed to be 1.0.
   ref[.]  : output.       double array : dimension=p+1
             PARCOR coefficients; reflection coefficients.
             all of ref[.] range between -1 and 1.
   resid   : output.       double.
             linear prediction / PARCOR residual power;
             reciprocal of power gain of PARCOR/LPC/LSP all-pole filter.

 - note: * if p<0, p is regarded as p=0. then, resid=1, and
           alf[.] and ref[.] are not obtained.
*/

void ntt_corref(int p,          /* Input : LPC analysis order */
	    double cor[],   /* Input : correlation coefficients */
	    double alf[],   /* Output : linear predictive coefficients */
	    double ref[],   /* Output : reflection coefficients */
	    double *resid_) /* Output : normalized residual power */
{
  int i,j,k;
  double resid,r,a;
  if(p>0)
  { ref[1]=cor[1]; alf[1]= -ref[1]; resid=(1.0-ref[1])*(1.0+ref[1]);
    for(i=2;i<=p;i++)
    { r=cor[i]; for(j=1;j<i;j++) r+=alf[j]*cor[i-j];
      alf[i]= -(ref[i]=(r/=resid));
      j=0; k=i;
      while(++j<=--k) { a=alf[j]; alf[j]-=r*alf[k]; if(j<k) alf[k]-=r*a; }
      resid*=(1.0-r)*(1.0+r); }
    *resid_=resid;
  }
  else *resid_=1.0;
}


/* --- ntt_cutfr ---
******************************************************************
*/

void ntt_cutfr(int    st,      /* Input  --- Start point */
	   int    len,     /* Input  --- Block length */
	   int    ich,     /* Input  --- Channel number */
	   double frm[],   /* Input  --- Input frame */
	   double buf[])   /* Output --- Output data buffer */
{
    /*--- Variables ---*/
    int stb, sts, edb, nblk, iblk, ibuf, ifrmb, ifrms;

    stb = (st/ntt_N_FR)*ntt_N_SUP + ich;      /* start block */
    sts = st % ntt_N_FR;            /* start sample */
    edb = ((st+len)/ntt_N_FR)*ntt_N_SUP + ich;        /* end block */
    nblk = (edb-stb)/ntt_N_SUP;             /* number of overflow */

    ibuf=0; ifrmb=stb; ifrms=sts;
    for ( iblk=0; iblk<nblk; iblk++ ){
	while( ifrms < ntt_N_FR )  buf[ibuf++] = frm[(ifrms++)+ifrmb*ntt_N_FR];
	ifrms = 0;
	ifrmb += ntt_N_SUP;
    }
    while( ibuf < len )	buf[ibuf++] = frm[(ifrms++)+ifrmb*ntt_N_FR];

}


/* --- ntt_difddd ---
******************************************************************
*/

void ntt_difddd(/* Input */
                int n,
                double xx[],
                double yy[],
                /* Output */
                double zz[])
{
   double
        *p_xx,
        *p_yy,
        *p_zz;
   register int
         iloop_fr;

   p_xx = xx;
   p_yy = yy;
   p_zz = zz;
   iloop_fr = n;
   do
   {
      *(p_zz++) = *(p_xx++) - *(p_yy++);
   }
   while ((--iloop_fr) > 0);
}


/* --- ntt_dotdd ---

******************************************************************
*     LPC / PARCOR / CSM / LSP   subroutine  library     # nn    *
*               coded by S.Sagayama,                3/10/1987    *
******************************************************************
 ( C version coded by S.Sagayama, 3/10/1987)

   description:
     * array arithmetic :  xx * yy
       i.e. sum of xx[i] * yy[i] for i=0,n-1

   synopsis:
          ---------------------
          double ntt_dotdd(n,xx,yy)
          ---------------------

    n      : dimension of data
    xx[.]  : input data array (double)
    yy[.]  : input data array (double)
*/

double ntt_dotdd(/* Input */
                 int n,       /* dimension of data */
                 double xx[],
                 double yy[])
{ int i; double s;
  s=0.0; for(i=0;i<n;i++) s+=xx[i]*yy[i]; return(s); }


/* --- ntt_excheb ---

******************************************************************
*     LPC / PARCOR / CSM / LSP   subroutine  library     # 41    *
*               coded by S.Sagayama,                 5/5/1982    *
******************************************************************
 ( C version coded by S.Sagayama, 2/28/1987 )

   description:
     * expansion of a linear combination of Chebycheff polynomials
       into a polynomial of x:  suppose a linear combination of
       Tchebycheff(Chebyshev) polynomials:

           S(x) = T(x,n) + a[1] * T(x,n-1) + .... + a[n] * T(x,0)

       where T(x,k) denotes k-th Tchebycheff polynomial of x,
       then, expand each Chebycheff polynomial and get a polynomial
       of x:
                   n           n-1
           S(x) = x  + b[1] * x    + .... + b[n].

     * this problem is equivalent to the conversion of a linear
                         k        k
       combination of ( z  + 1 / z  ) into a polynomial of
       ( z + 1/z ).
     * this problem is equivalent to the conversion of a linear
       combination of cos(k*x), k=1,...,n, into a polynomial of cos(x).

   synopsis:
          -------------
          ntt_excheb(n,a,b)
          -------------
   n       : input.        integer.   n =< 10.
   a[.]    : input.        double array : dimension=n.
             implicitly, a[0]=1.0.
   b[.]    : output.       double array : dimension=n.
             implicitly, b[0]=1.0.
   coef[.] : input.        double array : dimension=~=(n+1)(n+2)/4
             A table of Chebyshev polynomial coefficients which looks like:
             .5,                                    0
             1.,                                    1
             2.,                                    2
             4.,-3.,                                3
             8.,-8.,1.,                             4
             16.,-20.,5.,                           5
             32.,-48.,18.,-1.,                      6
             64.,-112.,56.,-7.,                     7
             128.,-256.,160.,-32.,1.,               8
             256.,-576.,432.,-120.,9.,              9
             512.,-1280.,1120.,-400.,50.,-1.,      10
             ............                           n

   note: (1) arrays "a" and "b" can be identical.
*/

void ntt_excheb(int n,        /* Input */
                double a[],   /* Input */
                double b[],   /* Output */
                double tbl[]) /* Input */
{ int i,j,k; double c,t;
  if(n<=0) return;
  k=0;
  for(i=n;i>=1;i--)
  { t=a[i]; b[i]=0.0; for(j=i;j<=n;j+=2) b[j]+=t*tbl[k++]; }
  c=tbl[k++]; for(j=2;j<=n;j+=2) b[j]+=tbl[k++];
  for(j=1;j<=n;j++) b[j]/=c; }


/* --- ntt_fft ---

******************************************************************
*     LPC / PARCOR / CSM / LSP   subroutine  library     # nn    *
*               coded by S.Sagayama,                5/03/1977    *
******************************************************************
 ( C version coded by S.Sagayama, 2/9/1987 )

  description:
  * complex number FFT. ( Fast Fourier Transform )
  * This FFT algorithm requires only N ( log2( N ) - 2 ) multiplications
    because the last 2 stages, i.e., 4 point DFT and 2 point DFT, are
    coded only by additions and subtractions.

  synopsis:
        ------------
        fft(xr,xi,m)
        ------------

    xr[.]   : input/output. double array : dimension=2^m.
              real part of input data/ output data.
    xi[.]   : input/output. double array : dimension=2^m.
              imaginary  part of input data/ output data.
    m       : input.        integer.
              exponent.  i.e., dimension = 2^m.  (nonnegative)

  note:
    This subroutine is a modification of "cfft".
    "cfft" is a modified copy of p.332 of
    "Digital Signal Processing" by A.V.Oppenheim and R.W.Schafer.
    The modification is suggested in the book.
    The first modification is done by S.Sagayama,  12.nov.,1976.
    "fft" is the secondary modification. (S.Sagayama,  3,may,1977)
*/


void ntt_fft(/* In/Out */
             double xr[], /* real part of input data/ output data */
             double xi[], /* imaginary  part of input data/ output data */
             /* Input */
             int m)       /* exponent */
{ int h,ih,i,j,k,l,n; 
  register double a,b,d,tr,ti,ur,ui,wr,wi;
  static double pi=3.1415926535897932384626433832795;

 if(m<=0) return;
  n=1<<m; /* n=2^m; power */
  if(m>=3)             /** radix-2 FFT **/
  { h=n;
    for(l=m;l>2;l--)
    { k=h; h=k/2; ur=1.0; ui=0.0; d=pi/h;
      wr=cos(d); wi= -sin(d);
      for(j=0;j<h;j++)
      { for(i=j;i<n;i+=k)
        { ih=i+h; tr=xr[ih]; ti=xi[ih]; a=xr[i]-tr; b=xi[i]-ti;
          xr[ih]=a*ur-b*ui; xi[ih]=a*ui+b*ur; xr[i]+=tr; xi[i]+=ti; }
        a=ur*wr-ui*wi; ui=ur*wi+ui*wr; ur=a;
      }
  } }
  if(m>=2)             /** 4-point DFT **/
  { for(j=0;j<n;j+=4)
    { tr=xr[j]; ti=xi[j]; xr[j]=tr+xr[j+2]; xi[j]=ti+xi[j+2];
      xr[j+2]=tr-xr[j+2]; xi[j+2]=ti-xi[j+2];
      tr=xr[j+1]; ti=xi[j+1]; xr[j+1]=tr+xr[j+3]; xi[j+1]=ti+xi[j+3];
      tr=tr-xr[j+3]; ti=ti-xi[j+3]; xr[j+3]=ti; xi[j+3]= -tr;
  } }
  for(j=0;j<n;j+=2)    /** 2-point DFT **/
  { tr=xr[j]; xr[j]=tr+xr[j+1]; xr[j+1]=tr-xr[j+1];
    ti=xi[j]; xi[j]=ti+xi[j+1]; xi[j+1]=ti-xi[j+1];
  }
  h=n/2; j=0;          /** bit reversal **/
  for(i=0;i<n-1;i++)
  { if(i<j)
    { tr=xr[j]; ti=xi[j]; xr[j]=xr[i]; xi[j]=xi[i]; xr[i]=tr; xi[i]=ti; }
    k=h; while(k<=j) { j-=k; k/=2; } j+=k;
} }


/*--------------------------------------------------------------------*
 * Function  fft842                                                   *
 *    Fast Fourier Transform for N=2**M                               *
 *    Complex Input                                                   *
 *--------------------------------------------------------------------*/
/*                                                                    *
 *   This program replaces the vector z=x+iy by its  finite           *
 *   discrete, complex fourier transform if in=0.  The inverse        *
 *   transform is calculated for in=1.  It performs as many base      *
 *   8 iterations as possible and then finishes with a base           *
 *   4 iteration or a base 2 iteration if needed.                     *
 *                                                                    *
 *--------------------------------------------------------------------*
 *   The function is called as                                        *
 *        fft842_m (n,x,y);                                           *
 *        int     n;            / * n must be a power of 2 * /        *
 *        double  *x, *y;                                             *
 *--------------------------------------------------------------------*
 *                                                                    */

#define  P7   0.707106781186548

void ntt_fft842_m(/* Input */
                  int m,
                  /* In/Out */
                  double x[],
                  double y[])
{
    int    i, n, n2pow, n8pow, nthpo;
    int    lengt, nxtlt;
    double  ti, tr;
    int h, j, k;

    n = 1 << m;
    
    n2pow = m;
    nthpo = n;

    n8pow = n2pow / 3;
    if ( n8pow != 0 )  {
	for ( i = 0; i < n8pow; i++ ) {
	    /*	    nxtlt = pow2( n2pow - 3*(i+1) ); */
	    nxtlt = 1 << (n2pow - 3*(i+1));
	    lengt = nxtlt << 3;
	    ntt_r8tx ( nxtlt, nthpo, lengt,
		  &x[0],       &x[nxtlt],   &x[2*nxtlt], &x[3*nxtlt],
		  &x[4*nxtlt], &x[5*nxtlt], &x[6*nxtlt], &x[7*nxtlt],
		  &y[0],       &y[nxtlt],   &y[2*nxtlt], &y[3*nxtlt],
		  &y[4*nxtlt], &y[5*nxtlt], &y[6*nxtlt], &y[7*nxtlt] );
	}
    }
    switch ( n2pow - 3*n8pow ) {
    case 0:
	break;
    case 1:
	ntt_r2tx( nthpo, &x[0], &x[1], &y[0], &y[1] );
	break;
    case 2:
	ntt_r4tx( nthpo, &x[0], &x[1], &x[2], &x[3],
	     &y[0], &y[1], &y[2], &y[3] );
	break;
    default:
	fprintf( stderr,"-- Argorithm Error fft842\n");
	exit(1);
    }
  n=1<<m; /* n=2^m; power */
  h=n/2; j=0;          /** bit reversal **/
  for(i=0;i<n-1;i++)
  { if(i<j)
    { tr=x[j]; ti=y[j]; x[j]=x[i]; y[j]=y[i]; x[i]=tr; y[i]=ti; }
    k=h; while(k<=j) { j-=k; k/=2; } j+=k;
  }
}

/*--------------------------------------------------------------------*
 * Function:  ntt_r2tx                                                *
 *      RADIX 2 ITERATION SUBROUTINE                                  *
 *--------------------------------------------------------------------*/
void ntt_r2tx(/* Input */
	      int    nthpo,
	      /* In/Out */
	      double *cr0, double *cr1,
	      double *ci0, double *ci1)
{
    int    k;
    register double  r1, r2, fi1, fi2;
    
    for ( k = 0; k < nthpo; k += 2 ) {
	r1 = cr0[k] + cr1[k];
	r2 = cr0[k] - cr1[k];
	fi1 = ci0[k] + ci1[k];
	fi2 = ci0[k] - ci1[k];
	cr0[k] = r1;
	cr1[k] = r2;
	ci0[k] = fi1;
	ci1[k] = fi2;
    }
}

/*--------------------------------------------------------------------*
 * Function:  ntt_r4tx                                                *
 *      RADIX 4 ITERATION SUBROUTINE                                  *
 *--------------------------------------------------------------------*/
void ntt_r4tx(/* Input */
	      int    nthpo,
	      /* Output */
	      double *cr0, double *cr1, double *cr2, double *cr3,
	      double *ci0, double *ci1, double *ci2, double *ci3 )
{
    int   k;
    double  r1,  r2,  r3,  r4;
    double  fi1, fi2, fi3, fi4;
    
    for ( k = 0; k < nthpo; k += 4 ) {
	r1 = cr0[k] + cr2[k];
	r2 = cr0[k] - cr2[k];
	r3 = cr1[k] + cr3[k];
	r4 = cr1[k] - cr3[k];
	fi1 = ci0[k] + ci2[k];
	fi2 = ci0[k] - ci2[k];
	fi3 = ci1[k] + ci3[k];
	fi4 = ci1[k] - ci3[k];
	cr0[k] = r1 + r3;
	ci0[k] = fi1 + fi3;
	cr1[k] = r1 - r3;
	ci1[k] = fi1 - fi3;
	cr2[k] = r2 - fi4;
	ci2[k] = fi2 + r4;
	cr3[k] = r2 + fi4;
	ci3[k] = fi2 - r4;
    }
}

/*--------------------------------------------------------------------*
 * Function:  ntt_r8tx                                                *
 *      RADIX 8 ITERATION SUBROUTINE                                  *
 *--------------------------------------------------------------------*/
void ntt_r8tx (/* Input */
	       int nxtlt, 
	       int nthpo,
	       int lengt,
	       /* In/Out */
	       double *cr0, double *cr1, double *cr2, double *cr3,
	       double *cr4, double *cr5, double *cr6, double *cr7,
	       double *ci0, double *ci1, double *ci2, double *ci3,
	       double *ci4, double *ci5, double *ci6, double *ci7)
{
    double  c1,  c2,  c3,  c4,  c5,  c6,  c7;
    double  s1,  s2,  s3,  s4,  s5,  s6,  s7;
    double  ar0, ar1, ar2, ar3, ar4, ar5, ar6, ar7;
    double  ai0, ai1, ai2, ai3, ai4, ai5, ai6, ai7;
    double  br0, br1, br2, br3, br4, br5, br6, br7;
    double  bi0, bi1, bi2, bi3, bi4, bi5, bi6, bi7;
    double  tr,  ti;
    int    j, k;
    int    jpoint, unit;

     for ( k = 0; k < nthpo; k += lengt ) {
	    /*
	    ar0 = *(buf[0]++) + cr4[k];
	    ar1 = *(buf[1]++) + cr5[k];
	    ar2 = *(buf[2]++) + cr6[k];
	    ar3 = *(buf[3]++) + cr7[k];
	    */
	    ar0 = cr0[k] + cr4[k];
	    ar1 = cr1[k] + cr5[k];
	    ar2 = cr2[k] + cr6[k];
	    ar3 = cr3[k] + cr7[k];
	    ar4 = cr0[k] - cr4[k];
	    ar5 = cr1[k] - cr5[k];
	    ar6 = cr2[k] - cr6[k];
	    ar7 = cr3[k] - cr7[k];
	    ai0 = ci0[k] + ci4[k];
	    ai1 = ci1[k] + ci5[k];
	    ai2 = ci2[k] + ci6[k];
	    ai3 = ci3[k] + ci7[k];
	    ai4 = ci0[k] - ci4[k];
	    ai5 = ci1[k] - ci5[k];
	    ai6 = ci2[k] - ci6[k];
	    ai7 = ci3[k] - ci7[k];
	    br0 = ar0 + ar2;
	    br1 = ar1 + ar3;
	    br2 = ar0 - ar2;
	    br3 = ar1 - ar3;
	    br4 = ar4 - ai6;
	    br5 = ar5 - ai7;
	    br6 = ar4 + ai6;
	    br7 = ar5 + ai7;
	    bi0 = ai0 + ai2;
	    bi1 = ai1 + ai3;
	    bi2 = ai0 - ai2;
	    bi3 = ai1 - ai3;
	    bi4 = ai4 + ar6;
	    bi5 = ai5 + ar7;
	    bi6 = ai4 - ar6;
	    bi7 = ai5 - ar7;
	    cr0[k] = br0 + br1;
	    ci0[k] = bi0 + bi1;
		cr1[k] = br0 - br1;
		ci1[k] = bi0 - bi1;
		cr2[k] = br2 - bi3;
		ci2[k] = bi2 + br3;
		cr3[k] = br2 + bi3;
		ci3[k] = bi2 - br3;
		tr = P7*(br5-bi5);
		ti = P7*(br5+bi5);
		cr4[k] = br4 + tr;
		ci4[k] = bi4 + ti;
		cr5[k] = br4 - tr;
		ci5[k] = bi4 - ti;
		tr = -P7*(br7+bi7);
		ti = P7*(br7-bi7);
		cr6[k] = br6 + tr;
		ci6[k] = bi6 + ti;
		cr7[k] = br6 - tr;
		ci7[k] = bi6 - ti;
	}
    jpoint = ntt_N_FR*8/lengt;
    for ( j = 1; j < nxtlt; j++ ) {
     unit = jpoint*j;
    if(unit == ntt_N_FR/2){
     c1 = ntt_cos_TT[unit];
     s1 = ntt_cos_TT[ntt_N_FR*2-unit];

   for ( k = j; k < nthpo; k += lengt ) {
	    ar0 = cr0[k] + cr4[k];
	    ar1 = cr1[k] + cr5[k];
	    ar2 = cr2[k] + cr6[k];
	    ar3 = cr3[k] + cr7[k];
	    ar4 = cr0[k] - cr4[k];
	    ar5 = cr1[k] - cr5[k];
	    ar6 = cr2[k] - cr6[k];
	    ar7 = cr3[k] - cr7[k];
	    ai0 = ci0[k] + ci4[k];
	    ai1 = ci1[k] + ci5[k];
	    ai2 = ci2[k] + ci6[k];
	    ai3 = ci3[k] + ci7[k];
	    ai4 = ci0[k] - ci4[k];
	    ai5 = ci1[k] - ci5[k];
	    ai6 = ci2[k] - ci6[k];
	    ai7 = ci3[k] - ci7[k];
	    br0 = ar0 + ar2;
	    br1 = ar1 + ar3;
	    br2 = ar0 - ar2;
	    br3 = ar1 - ar3;
	    br4 = ar4 - ai6;
	    br5 = ar5 - ai7;
	    br6 = ar4 + ai6;
	    br7 = ar5 + ai7;
	    bi0 = ai0 + ai2;
	    bi1 = ai1 + ai3;
	    bi2 = ai0 - ai2;
	    bi3 = ai1 - ai3;
	    bi4 = ai4 + ar6;
	    bi5 = ai5 + ar7;
	    bi6 = ai4 - ar6;
	    bi7 = ai5 - ar7;
	    cr0[k] = br0 + br1;
	    ci0[k] = bi0 + bi1;
		cr1[k] = -(bi0-bi1);
		ci1[k] =  (br0-br1);
		cr2[k] = P7*(br2-bi3 - bi2-br3);
		ci2[k] = P7*(bi2+br3 + br2-bi3);
		cr3[k] = -P7*(br2+bi3 +bi2-br3);
		ci3[k] = -P7*(bi2-br3 -br2-bi3);
		tr = P7*(br5-bi5);
		ti = P7*(br5+bi5);
		cr4[k] = c1*(br4+tr) - s1*(bi4+ti);
		ci4[k] = c1*(bi4+ti) + s1*(br4+tr);
		cr5[k] = -s1*(br4-tr) - c1*(bi4-ti);
		ci5[k] = -s1*(bi4-ti) + c1*(br4-tr);
		tr = -P7*(br7+bi7);
		ti = P7*(br7-bi7);
		cr6[k] = s1*(br6+tr) - c1*(bi6+ti);
		ci6[k] = s1*(bi6+ti) + c1*(br6+tr);
		cr7[k] = -c1*(br6-tr) - s1*(bi6-ti);
		ci7[k] = -c1*(bi6-ti) + s1*(br6-tr);
	}
    }
    else if(unit == ntt_N_FR/4){
     c1 = ntt_cos_TT[unit];
     s1 = ntt_cos_TT[ntt_N_FR*2-unit];
/*
	c2 = c1*c1 - s1*s1;
	s2 = c1*s1 + c1*s1;
	c3 = c1*c2 - s1*s2;
	s3 = c2*s1 + s2*c1;
*/
	c2 = ntt_cos_TT[2*unit];
        s2 = ntt_cos_TT[ntt_N_FR*2-unit*2];
	c3 = ntt_cos_TT[3*unit];
        s3 = ntt_cos_TT[ntt_N_FR*2-unit*3];
   for ( k = j; k < nthpo; k += lengt ) {
	    ar0 = cr0[k] + cr4[k];
	    ar1 = cr1[k] + cr5[k];
	    ar2 = cr2[k] + cr6[k];
	    ar3 = cr3[k] + cr7[k];
	    ar4 = cr0[k] - cr4[k];
	    ar5 = cr1[k] - cr5[k];
	    ar6 = cr2[k] - cr6[k];
	    ar7 = cr3[k] - cr7[k];
	    ai0 = ci0[k] + ci4[k];
	    ai1 = ci1[k] + ci5[k];
	    ai2 = ci2[k] + ci6[k];
	    ai3 = ci3[k] + ci7[k];
	    ai4 = ci0[k] - ci4[k];
	    ai5 = ci1[k] - ci5[k];
	    ai6 = ci2[k] - ci6[k];
	    ai7 = ci3[k] - ci7[k];
	    br0 = ar0 + ar2;
	    br1 = ar1 + ar3;
	    br2 = ar0 - ar2;
	    br3 = ar1 - ar3;
	    br4 = ar4 - ai6;
	    br5 = ar5 - ai7;
	    br6 = ar4 + ai6;
	    br7 = ar5 + ai7;
	    bi0 = ai0 + ai2;
	    bi1 = ai1 + ai3;
	    bi2 = ai0 - ai2;
	    bi3 = ai1 - ai3;
	    bi4 = ai4 + ar6;
	    bi5 = ai5 + ar7;
	    bi6 = ai4 - ar6;
	    bi7 = ai5 - ar7;
	    cr0[k] = br0 + br1;
	    ci0[k] = bi0 + bi1;
		tr = P7*(br0-br1);
		ti = P7*(bi0-bi1);
		cr1[k] = tr-ti;
		ci1[k] = tr+ti;
		cr2[k] = c2*(br2-bi3) - s2*(bi2+br3);
		ci2[k] = c2*(bi2+br3) + s2*(br2-bi3);
		cr3[k] = s2*(br2+bi3) - c2*(bi2-br3);
		ci3[k] = s2*(bi2-br3) + c2*(br2+bi3);
		tr = P7*(br5-bi5);
		ti = P7*(br5+bi5);
		cr4[k] = c1*(br4+tr) - s1*(bi4+ti);
		ci4[k] = c1*(bi4+ti) + s1*(br4+tr);
		cr5[k] = s3*(br4-tr) - c3*(bi4-ti);
		ci5[k] = s3*(bi4-ti) + c3*(br4-tr);
		tr = -P7*(br7+bi7);
		ti = P7*(br7-bi7);
		cr6[k] = c3*(br6+tr) - s3*(bi6+ti);
		ci6[k] = c3*(bi6+ti) + s3*(br6+tr);
		cr7[k] = s1*(br6-tr) - c1*(bi6-ti);
		ci7[k] = s1*(bi6-ti) + c1*(br6-tr);
	}
    } /* else */
    else if(unit == (ntt_N_FR/4)*3){
     c1 = ntt_cos_TT[unit];
     s1 = ntt_cos_TT[ntt_N_FR*2-unit];
	c2 = c1*c1 - s1*s1;
	s2 = c1*s1 + c1*s1;
	c3 = c1*c2 - s1*s2;
	s3 = c2*s1 + s2*c1;
   for ( k = j; k < nthpo; k += lengt ) {
	    ar0 = cr0[k] + cr4[k];
	    ar1 = cr1[k] + cr5[k];
	    ar2 = cr2[k] + cr6[k];
	    ar3 = cr3[k] + cr7[k];
	    ar4 = cr0[k] - cr4[k];
	    ar5 = cr1[k] - cr5[k];
	    ar6 = cr2[k] - cr6[k];
	    ar7 = cr3[k] - cr7[k];
	    ai0 = ci0[k] + ci4[k];
	    ai1 = ci1[k] + ci5[k];
	    ai2 = ci2[k] + ci6[k];
	    ai3 = ci3[k] + ci7[k];
	    ai4 = ci0[k] - ci4[k];
	    ai5 = ci1[k] - ci5[k];
	    ai6 = ci2[k] - ci6[k];
	    ai7 = ci3[k] - ci7[k];
	    br0 = ar0 + ar2;
	    br1 = ar1 + ar3;
	    br2 = ar0 - ar2;
	    br3 = ar1 - ar3;
	    br4 = ar4 - ai6;
	    br5 = ar5 - ai7;
	    br6 = ar4 + ai6;
	    br7 = ar5 + ai7;
	    bi0 = ai0 + ai2;
	    bi1 = ai1 + ai3;
	    bi2 = ai0 - ai2;
	    bi3 = ai1 - ai3;
	    bi4 = ai4 + ar6;
	    bi5 = ai5 + ar7;
	    bi6 = ai4 - ar6;
	    bi7 = ai5 - ar7;
	    cr0[k] = br0 + br1;
	    ci0[k] = bi0 + bi1;
		cr1[k] = -P7*(br0-br1) - P7*(bi0-bi1);
		ci1[k] = -P7*(bi0-bi1) + P7*(br0-br1);
		cr2[k] = c2*(br2-bi3) - s2*(bi2+br3);
		ci2[k] = c2*(bi2+br3) + s2*(br2-bi3);
		cr3[k] = -s2*(br2+bi3) + c2*(bi2-br3);
		ci3[k] = -s2*(bi2-br3) - c2*(br2+bi3);
		tr = P7*(br5-bi5);
		ti = P7*(br5+bi5);
		cr4[k] = c1*(br4+tr) - s1*(bi4+ti);
		ci4[k] = c1*(bi4+ti) + s1*(br4+tr);
		cr5[k] = -s3*(br4-tr) + c3*(bi4-ti);
		ci5[k] = -s3*(bi4-ti) - c3*(br4-tr);
		tr = -P7*(br7+bi7);
		ti = P7*(br7-bi7);
		cr6[k] = c3*(br6+tr) - s3*(bi6+ti);
		ci6[k] = c3*(bi6+ti) + s3*(br6+tr);
		cr7[k] = -s1*(br6-tr) + c1*(bi6-ti);
		ci7[k] = -s1*(bi6-ti) - c1*(br6-tr);
	}
    } /* else */
    else {
     c1 = ntt_cos_TT[unit];
     s1 = ntt_cos_TT[ntt_N_FR*2-unit];

	c2 = c1*c1 - s1*s1;
	s2 = c1*s1 + c1*s1;
	c3 = c1*c2 - s1*s2;
	s3 = c2*s1 + s2*c1;
	c4 = c2*c2 - s2*s2;
	s4 = c2*s2 + c2*s2;
	c5 = c2*c3 - s2*s3;
	s5 = c3*s2 + s3*c2;
	c6 = c3*c3 - s3*s3;
	s6 = c3*s3 + c3*s3;
	c7 = c3*c4 - s3*s4;
	s7 = c4*s3 + s4*c3;
   for ( k = j; k < nthpo; k += lengt ) {
	    ar0 = cr0[k] + cr4[k];
	    ar1 = cr1[k] + cr5[k];
	    ar2 = cr2[k] + cr6[k];
	    ar3 = cr3[k] + cr7[k];
	    ar4 = cr0[k] - cr4[k];
	    ar5 = cr1[k] - cr5[k];
	    ar6 = cr2[k] - cr6[k];
	    ar7 = cr3[k] - cr7[k];
	    ai0 = ci0[k] + ci4[k];
	    ai1 = ci1[k] + ci5[k];
	    ai2 = ci2[k] + ci6[k];
	    ai3 = ci3[k] + ci7[k];
	    ai4 = ci0[k] - ci4[k];
	    ai5 = ci1[k] - ci5[k];
	    ai6 = ci2[k] - ci6[k];
	    ai7 = ci3[k] - ci7[k];
	    br0 = ar0 + ar2;
	    br1 = ar1 + ar3;
	    br2 = ar0 - ar2;
	    br3 = ar1 - ar3;
	    br4 = ar4 - ai6;
	    br5 = ar5 - ai7;
	    br6 = ar4 + ai6;
	    br7 = ar5 + ai7;
	    bi0 = ai0 + ai2;
	    bi1 = ai1 + ai3;
	    bi2 = ai0 - ai2;
	    bi3 = ai1 - ai3;
	    bi4 = ai4 + ar6;
	    bi5 = ai5 + ar7;
	    bi6 = ai4 - ar6;
	    bi7 = ai5 - ar7;
	    cr0[k] = br0 + br1;
	    ci0[k] = bi0 + bi1;
		cr1[k] = c4*(br0-br1) - s4*(bi0-bi1);
		ci1[k] = c4*(bi0-bi1) + s4*(br0-br1);
		cr2[k] = c2*(br2-bi3) - s2*(bi2+br3);
		ci2[k] = c2*(bi2+br3) + s2*(br2-bi3);
		cr3[k] = c6*(br2+bi3) - s6*(bi2-br3);
		ci3[k] = c6*(bi2-br3) + s6*(br2+bi3);
		tr = P7*(br5-bi5);
		ti = P7*(br5+bi5);
		cr4[k] = c1*(br4+tr) - s1*(bi4+ti);
		ci4[k] = c1*(bi4+ti) + s1*(br4+tr);
		cr5[k] = c5*(br4-tr) - s5*(bi4-ti);
		ci5[k] = c5*(bi4-ti) + s5*(br4-tr);
		tr = -P7*(br7+bi7);
		ti = P7*(br7-bi7);
		cr6[k] = c3*(br6+tr) - s3*(bi6+ti);
		ci6[k] = c3*(bi6+ti) + s3*(br6+tr);
		cr7[k] = c7*(br6-tr) - s7*(bi6-ti);
		ci7[k] = c7*(bi6-ti) + s7*(br6-tr);
	}
    } /* else */
    } /*loop*/
}


/* --- ntt_hamwdw ---

******************************************************************
*     LPC / PARCOR / CSM / LSP   subroutine  library     # nn    *
*               coded by s.sagayama,                4/23/1981    *
******************************************************************
 ( C version coded by S. Sagayama,  6/20/1986 )

   description:
     * Hamming window generation.

   synopsis:
          ------------------
          void ntt_hamwdw(wdw,n)
          ------------------
   n        : input.        integer.
              the dimension of data; data length; window length.
   wdw[.]   : output.       double array : dimension=n.
              Hamming window data.
*/

void ntt_hamwdw(/* Output */
                double wdw[],  /* Hamming window data */
                int n)         /* window length */
{ int i;
  double d,pi=3.14159265358979323846264338327950288419716939;
  if(n>0)
  { d=2.0*pi/n;
    for(i=0;i<n;i++) wdw[i]=0.54-0.46*cos(d*i); } }


/* --- ntt_lagwdw ---

******************************************************************
*     LPC / PARCOR / CSM / LSP   subroutine  library     # nn    *
*               coded by S.Sagayama,                7/27/1978    *
******************************************************************
 ( C version coded by S. Sagayama,  6/21/1986 )

   description:
     * lag window (pascal) data generation.

   synopsis:
          ---------------
          ntt_lagwdw(wdw,n,h)
          ---------------

   wdw[.]  : output.       double array : dimension=n+1.
             lag window data. wdw[0] is always 1.
   n       : input.        integer.
             dimension of wdw[.].  (the order of LPC analysis.)
   h       : input.        double.
              0.0 < h < 1.0 .... if h=0, wdw(i)=1.0 for all i.
             ratio of window half value band width to sampling frequency.
             example: If lag window half value band width = 100 Hz and
             sampling frequency = 8 kHz, then h = 100/8k = 1/80 =0.0125

   revised by s.sagayama,       1982.7.19
*/

void ntt_lagwdw(/* Output */
                double wdw[],  /* lag window data */
                /* Input */
                int n,         /* dimension of wdw[.] */
                double h)      /* ratio of window half value band width to sampling frequency */
{ int i;
  double pi=3.14159265358979323846264338327959288419716939;
  double a,b,w;
  if(h<=0.0) for(i=0;i<=n;i++) wdw[i]=1.0;
  else
  { a=log(0.5)*0.5/log(cos(0.5*pi*h));
    a=(double)((int)a);
    w=1.0; b=a; wdw[0]=1.0;
    for(i=1;i<=n;i++)
    { b+=1.0; w*=a/b; wdw[i]=w; a-=1.0; } } }


/* --- ntt_mulcdd ---

******************************************************************
*     LPC / PARCOR / CSM / LSP   subroutine  library     # nn    *
*               coded by N.Iwakami,                 m/dd/19yy    *
******************************************************************
 ( C version coded by N.Iwakami, 4/4/1994)

   description:
     * array arithmetic :  c * xx = zz
       i.e. zz[i]=c*xx[i] for i=0,n-1

   synopsis:
          ------------------
          ntt_mulcdd(n,c,xx,zz)
          ------------------

    n      : dimension of data
    c      : input data (double)
    xx[.]  : input data array (double)
    zz[.]  : output data array (double)
*/

void ntt_mulcdd(/* Input */
                int n,        /* dimension of data */
		double c,     
		double xx[],
		/* Output */
		double zz[])
{ int i; for(i=0;i<n;i++) zz[i]=c*xx[i]; }


/* --- ntt_mulddd ---

******************************************************************
*     LPC / PARCOR / CSM / LSP   subroutine  library     # nn    *
*               coded by S.Sagayama,                m/dd/19yy    *
******************************************************************
 ( C version coded by S.Sagayama, 2/5/1987)

   description:
     * array arithmetic :  xx * yy = zz
       i.e. zz[i]=xx[i]*yy[i] for i=0,n-1

   synopsis:
          ------------------
          ntt_mulddd(n,xx,yy,zz)
          ------------------

    n      : dimension of data
    xx[.]  : input data array (double)
    yy[.]  : input data array (double)
    zz[.]  : output data array (double)
*/

void ntt_mulddd(/* Input */
		int n,        /* dimension of data */
		double xx[],
		double yy[],
		/* Output */
		double zz[])
{
    int ii;
    for (ii=0; ii<n; ii++ ) zz[ii] = xx[ii]*yy[ii];
}


/* --- ntt_nrstep ---

******************************************************************
*     LPC / PARCOR / CSM / LSP   subroutine  library     # 42    *
*               coded by S.Sagayama,               11/14/1980    *
******************************************************************
 ( C version coded by S.Sagayama, 2/25/1987 )
 ( revised (argument 'eps' has been added) by S.Sagayama, 6/24/1987 )

   description:
     * apply a single step of newton-raphson iteration to an
       algebraic equation and divide it by ( x - root ).
     * the necesary and sufficient condition for existence of
       the solution is that all the roots of polynomial a(z) lie
       inside the unit circle.

   synopsis:
          -----------------
          ntt_nrstep(coef,n,&x)
          -----------------
   coef[.] : input/output. double array : dimension=n+1.
             coefficients of the n-th order polynomial (input).
             coefficients of new (n-1)-th order polynomial (output).
             new polynomial(x) = given polynomial(x)/(x-root).
             coef[0] is implicitly assumed to be 1.0.
   n       : input.        integer.
             the order of the polynomial (n>0).
   x       : input/output. double.
             initial value of the iteration (input),
             the root (output).

   note: * originally coded by F.Itakura (original name was "newton"),
           renamed and revised by S.Sagayama,  1981.11.14. 
         * effective dimension of "coef" changes into (n-1) after
           calling this subroutine.  coef(n) is meaningless.
*/

void ntt_nrstep(double coef[], /* In/Out : coefficients of the n-th order polynomial */
		int n,      /* Input : the order of the polynomial */
		double eps, /* Input */
		double *_x) /* In/Out : initial value of the iteration */
{ int i; double x,dx,f,d; /* standard setting: eps=0.0000001 */
  if(n<2) { *_x= -coef[1]; return; } x= *_x; /* initial value */
  do /* Newton-Raphson iteration */
  { d=1.0; f=x+coef[1]; for(i=2;i<=n;i++) { d=d*x+f; f=f*x+coef[i]; }
    dx=f/d; x-=dx; } while(dx>eps);
  coef[1]=coef[1]+x; for(i=2;i<n;i++) coef[i]+=x*coef[i-1];
  *_x=x; } /* returns the solution and the (n-1)th order polynomial */


/* --- ntt_setd ---

******************************************************************
*     LPC / PARCOR / CSM / LSP   subroutine  library     # nn    *
*    originally coded by S.Sagayama,               12/28/1987    *
******************************************************************
 ( C version coded by S.Sagayama, 12/28/1987)
 ( Modified by N.Iwakami, 27/8/1996:
   changed from vector-to-vector-copy to constant-to-vector-copy )

   description:
     * copy an array :  const => xx
       i.e. xx[i]=const for i=0,n-1

   synopsis:
          --------------
          ntt_setd(n,c,xx)
          --------------

    n      : dimension of data
    c      : input data (double)
    xx[.]  : output data array (double)
*/

void ntt_setd(/* Input */
	      int n,         /* dimension of data */
	      double c,
	      /* Output */
	      double xx[])
{
    int i;
    for(i=0;i<n;i++) xx[i]=c;
}


/* --- ntt_sigcor ---

******************************************************************
*     LPC / PARCOR / CSM / LSP   subroutine  library     # 50    *
*               coded by S.Sagayama,                7/25/1979    *
*               modified by K. Mano                 4/24/1990    *
******************************************************************
 ( C version coded by S.Sagayama, 2/05/1987 )

   description:
     * conversion of "sig" into "cor".
     * computation of autocorrelation coefficients "cor" from
       signal samples.

   synopsis:
          ------------------------
          ntt_sigcor(n,sig,&pow,cor,p)    : old
          ntt_sigcor(sig,n,&pow,cor,p)    : modified 
          ------------------------
   n       : input.        integer.
             length of sample sequence.
   sig[.]  : input.        double array : dimension=n. 
             signal sample sequence.
   pow     : output.       double.
             power. (average energy per sample).
   cor[.]  : output.       double array : dimension=lmax
             autocorrelation coefficients.
             cor[0] is implicitly assumed to be 1.0.
   p       : input.        integer.
             the number of autocorrelation points required.
*/

void ntt_sigcor(double *sig, /* Input : signal sample sequence */
		int n,       /* Input : length of sample sequence*/
		double *_pow,/* Output : power */
		double cor[],/* Output : autocorrelation coefficients */
		int p)       /* Input : the number of autocorrelation points r\
equired*/ 
{ 
   int k; 
   register double sqsum,c, dsqsum;

   if (n>0) {
      sqsum = ntt_dotdd(n, sig, sig)+1.e-35;
      dsqsum = 1./sqsum;
      k=p;
      do{
	 c = ntt_dotdd(n-k, sig, sig+k);
	 cor[k] = c*dsqsum;
      }while(--k);
   }
   *_pow = (sqsum-1.e-35)/(double)n; 
}


/* --- ntt_sinwinl ---
******************************************************************
*/

void ntt_sinwinl(double in[],  /* Input  --- Input frame */
	     double out[],     /* Output --- Windowed input frame */
	     int    id)        /* Input  --- Window ID (0: start,  1: stop) */
{
    /*--- Variables ---*/
    
    /*--- Initialization ---*/
    
    /*--- Windowing ---*/
    if(id==0){ /* start */
	ntt_mulddd(ntt_N_FR_WDW, in, ntt_sinTwdl, out); 
    }
    else{ /* stop */
	ntt_muldddre(ntt_N_FR_WDW, in, ntt_sinTwdl+ntt_N_FR_WDW-1, out); 
    }
    
}


/* --- ntt_muldddre ---
******************************************************************
*/

void ntt_muldddre(/* Input */
                  int n,
                  double x[],
                  double y[],
                  /* Output */
                  double z[])
{
    do{ *(z++) = *(x++) * *(y--);}while(--n);
}


/* --- ntt_sinwinm ---
******************************************************************
*/

void ntt_sinwinm(double in[],  /* Input  --- Input frame */
	     double out[], /* Output --- Windowed input frame */
	     int    id)    /* Input  --- Window ID (0: start,  1: stop) */
{
    /*--- Variables ---*/
    
    /*--- Initialization ---*/
    
    /*--- Windowing ---*/
    if(id==0){ /* start */
	ntt_mulddd(ntt_N_FR_M_WDW, in, ntt_sinTwdm, out); 
    }
    else{ /* stop */
	ntt_muldddre(ntt_N_FR_M_WDW, in, ntt_sinTwdm+ntt_N_FR_M_WDW-1, out); 
    }
    
}


/* --- ntt_sinwins ---
******************************************************************
*/

void ntt_sinwins(double in[],  /* Input  --- Input frame */
	     double out[], /* Output --- Windowed input frame */
	     int    id)    /* Input  --- Window ID (0: start,  1: stop) */
{
    /*--- Variables ---*/
    
    /*--- Initialization ---*/
    
    /*--- Windowing ---*/
    if(id==0){ /* start */
	ntt_mulddd(ntt_N_FR_S_WDW, in, ntt_sinTwds, out); 
    }
    else{ /* stop */
	ntt_muldddre(ntt_N_FR_S_WDW, in, ntt_sinTwds+ntt_N_FR_S_WDW-1, out); 
    }
    
}


