//
// LiDIA - a library for computational number theory
//   Copyright (c) 1994, 1995 by the LiDIA Group
//
// File        : fft_rep.c
// Author      : Victor Shoup, Thomas Pfahler (TPf)
// Last change : TPf, Feb 29, 1996, initial version
//


#if defined(HAVE_MAC_DIRS) || defined(__MWERKS__)

#include <LiDIA:Fp_polynomial_fft.h>
#include <LiDIA:crt.h>
#include <LiDIA:udigit.h>

#else

#include <LiDIA/Fp_polynomial_fft.h>
#include <LiDIA/crt.h>
#include <LiDIA/udigit.h>

#endif



/***************************************************************

					 class fft_rep

****************************************************************/

fft_rep::fft_rep() :
	k(0),
	max_k(-1),
	tbl(0)
{
    debug_handler( "fft_rep", "fft_rep ( void )" );
}

fft_rep::fft_rep(lidia_size_t InitK, const mcp& m) :
	k(0),
	max_k(-1),
	tbl(0)
{
    debug_handler( "fft_rep", "fft_rep ( lidia_size_t, mcp& )" );
    init(InitK, m);
}

fft_rep::fft_rep(const fft_rep& R) :
	k(0),
	max_k(-1),
	tbl(0),
	F(R.F)
{
    debug_handler( "fft_rep", "fft_rep( fft_rep& )" );
    C.init(F.crttable());
    set_length(R.k);
}
	
fft_rep::~fft_rep()
{
    debug_handler( "fft_rep", "destructor()" );
    if (max_k == -1)
        return;

    for (lidia_size_t i = 0; i < C.number_of_primes(); i++)
        delete[] tbl[i];

    delete[] tbl;
}

void fft_rep::set_length(lidia_size_t NewK)
{
    debug_handler( "fft_rep", "set_length( lidia_size_t )" );
    if (NewK < 0)
	lidia_error_handler( "fft_rep", "set_length( lidia_size_t )::bad arg" );

    if (NewK <= max_k)
    {
	k = NewK;
	C.reset();
	return;
    }

    lidia_size_t i, n;

    if (max_k == -1)
    {
	tbl = new sdigit*[C.number_of_primes()];
	if (!tbl)
	    lidia_error_handler( "fft_rep", "set_length( lidia_size_t )::out of space" );
    }
    else
    {
	for (i = C.number_of_primes()-1; i >= 0; i--)
	    delete[] tbl[i];
    }

    n = 1 << NewK;

    for (i = C.number_of_primes()-1; i >= 0; i--)
    {
	if ( !(tbl[i] = new sdigit[n]) )
	    lidia_error_handler( "fft_rep", "set_length( lidia_size_t )::out of space" );
    }

    k = max_k = NewK;
}

void fft_rep::set_size(lidia_size_t l)
{
    debug_handler( "fft_rep", "set_size( lidia_size_t )" );

    lidia_size_t old_max_k = max_k;
    set_length(l);
    if (l <= old_max_k) return;      //we do not need a new F
	
    if (F.set_new_length(l) == true)	//i.e. need a new F for length l
	C.init(F.crttable());
    else
	C.reset();
}


void fft_rep::init(lidia_size_t l, const mcp& m)
{
    debug_handler( "fft_rep", "init( lidia_size_t, mcp& )" );
    F.init(l, m);
    C.init(F.crttable());
    set_length(l);
}

void fft_rep::init(lidia_size_t l, const fft_data& FD)
{
    debug_handler( "fft_data" ,"init( lidia_size_t, const fft_data& )" );
    if (l > FD.maxroot())
	lidia_error_handler( "fft_data" ,"init( lidia_size_t, const fft_data& )::fft_data too small" );

    F.init(FD);
    C.init(F.crttable());
    set_length(l);
}

void fft_rep::to_fft_rep(const Fp_polynomial& x, lidia_size_t lo, lidia_size_t hi)
// computes an n = 2^k point convolution.
// if deg(x) >= 2^k, then x is first reduced modulo X^n-1.
{
    debug_handler( "fft_rep", "to_fft_rep( Fp_polynomial, lidia_size_t, lidia_size_t )" );

    lidia_size_t K, index, j, m, j1, num_primes = C.number_of_primes();
    bigint accum; //static
    const bigint &p = x.modulus();

    if (lo < 0)
	lidia_error_handler( "fft_rep", "to_fft_rep( Fp_polynomial, lidia_size_t, lidia_size_t )::bad arg" );

    hi = comparator<lidia_size_t>::min(hi, x.degree());

    K = 1 << k;
    m = comparator<lidia_size_t>::max(hi-lo + 1, 0);

    const bigint *xx = &x.coeff[lo];

    sdigit* sp = new sdigit[K];
    sdigit *up;
    if (!sp)
        lidia_error_handler( "fft_rep", "to_fft_rep( Fp_polynomial, lidia_size_t, lidia_size_t )::out of space" );

    if (m <= K)
    {
        for (index = 0; index < num_primes; index++)
        {
            C.reduce(sp, xx, m, index);
            for (up=&sp[m], j = m; j < K; j++, up++)
                *up = 0;
            F.evaluate(tbl[index], sp, k, index);
        }
    }
    else
    {
        if (m <= (K<<1))
        {
            lidia_size_t m2 = m - K;
            //compute residues :
            //sp = residues mod C.get_prime(0)
            //tbl[i-1] = residues mod C.get_prime(i)     i=1,..,num_primes
            for (j = 0; j < m2; j++, xx++)
            {
                add(accum, *xx, xx[K]);
                C.reduce(sp[j], accum, 0);
                for (index = 0; index < num_primes-1; index++)
                    C.reduce(tbl[index][j], accum, index+1);
            }
            //no addition needed for the following coefficients
	    xx = &x.coeff[lo+m2];
	    lidia_size_t diff = K-m2;
            C.reduce(&sp[m2], xx, diff, 0);
            for (index = 0; index < num_primes-1; index++)
                C.reduce(&tbl[index][m2], xx, diff, index+1);


            //evaluate : tbl[i] = evaluation mod C.get_prime(i)    i=0,..,num_primes
            for (index = num_primes-1; index > 0; index--)
                F.evaluate(tbl[index], tbl[index-1], k, index);
            F.evaluate(tbl[0], sp, k, 0);
        }

	else	// I doubt that this will ever happen ... (TPf)
        {
            //compute residues modulo all primes:
            //sp = residues mod C.get_prime(0)
            //tbl[i-1] = residues mod C.get_prime(i)     i=1,..,num_primes
            for (j=0; j < K; j++)
            {
                accum.assign( xx[j] );
                for (j1 = j + K; j1 < m; j1 += K)
                    AddMod(accum, accum, xx[j1], p);

                C.reduce(sp[j], accum, 0);
                for (index = 0; index < num_primes-1; index++)
                    C.reduce(tbl[index][j], accum, index+1);
            }

            //evaluate : tbl[i] = evaluation mod C.get_prime(i)    i=0,..,num_primes
            for (index = num_primes-1; index > 0; index--)
                F.evaluate(tbl[index], tbl[index-1], k, index);
            F.evaluate(tbl[0], sp, k, 0);
        }

    }//end if (m < K):else
    delete[] sp;
}


void fft_rep::from_fft_rep(Fp_polynomial& x, lidia_size_t lo, lidia_size_t hi)
   // converts from FFT-representation to coefficient representation
   // only the coefficients lo..hi are computed
   // NOTE: this version does not destroy the data in (*this)
{
    debug_handler( "fft_rep", "from_fft_rep( Fp_polynomial, lidia_size_t, lidia_size_t )" );

    lidia_size_t K, i, l;

    K = (1 << k);

    hi = comparator<lidia_size_t>::min(hi, K-1);
    l = comparator<lidia_size_t>::max(hi-lo+1, 0);

    sdigit* sdigit_array = new sdigit[K];
    if (!sdigit_array)
	lidia_error_handler( "fft_rep", "from_fft_rep( Fp_polynomial, lidia_size_t, lidia_size_t )::out of space" );
	
    sdigit *sp = &sdigit_array[lo];
    sdigit *yp;

    for (i = C.number_of_primes()-1; i >= 0; i--)
    {
	yp = tbl[i];

	F.interpolate(sdigit_array, yp, k, i);
	F.divide_by_power_of_two(sp, l, k, i);

	C.combine(sp, l, i);
    }

    delete[] sdigit_array;
    
    x.set_degree(l-1);
    x.MOD = F.CT->mod;
    C.get_result(x.coeff, l);
	
    const bigint &p = x.modulus();
    bigint *xp = x.coeff;
    for (i = l; i != 0; i--, xp++)
	Remainder(*xp, *xp, p);

    x.remove_leading_zeros();
}


void multiply(fft_rep& z, const fft_rep& x, const fft_rep& y)
{
    debug_handler( "fft_rep", "multiply( fft_rep&, fft_rep&, fft_rep& )" );
    lidia_size_t k, n, i;

    if (x.k != y.k || z.F.FT != x.F.FT || z.F.FT != y.F.FT)
	lidia_error_handler( "fft_rep", "multiply( fft_rep&, fft_rep&, fft_rep& )::FFT rep mismatch" );

    k = x.k;
    n = 1 << k;

    z.set_size(k);

    for (i = x.C.number_of_primes()-1; i >= 0; i--)
    {
	sdigit *zp = &z.tbl[i][0];
	const sdigit *xp = &x.tbl[i][0];
	const sdigit *yp = &y.tbl[i][0];

	z.F.pointwise_multiply(zp, xp, yp, k, i);
    }
}


void reduce(fft_rep& x, const fft_rep& a, lidia_size_t k)
  // reduces a 2^l point FFT-rep to a 2^k point FFT-rep
  // input may alias output
{
    debug_handler( "fft_rep", "reduce( fft_rep&, fft_rep&, lidia_size_t )" );
    lidia_size_t i, j, l, n;
    sdigit* xp;
    const sdigit* ap;

    l = a.k;
    n = 1 << k;
    lidia_size_t diff = l-k;

    if (diff < 0)
	lidia_error_handler( "fft_rep", "reduce( fft_rep&, fft_rep&, lidia_size_t )::bad operands" );
    if (x.F.FT != a.F.FT)
	lidia_error_handler( "fft_rep", "reduce( fft_rep&, fft_rep&, lidia_size_t )::Reps do not match" );
    x.set_size(k);

    for (i = a.C.number_of_primes()-1; i >= 0; i--)
    {
	ap = &a.tbl[i][0];
	xp = &x.tbl[i][0];
	for (j = 0; j < n; j++, xp++)
	    *xp = ap[j << diff];
    }
}


void reduce(fft_rep &x, const modular_fft_rep &a, lidia_size_t l, lidia_size_t index)
{
// reduces a 2^k point ModularFFT-rep to a 2^l point FFT-rep
    debug_handler( "fft_rep", "reduce( fft_rep&, modular_fft_rep&, lidia_size_t, lidia_size_t )");
    lidia_size_t diff = a.k - l;
    if (diff < 0)
	lidia_error_handler( "fft_rep", "reduce( fft_rep&, modular_fft_rep&, lidia_size_t, lidia_size_t ):: bad operand");
    if (x.F.FT != a.F.FT)
	lidia_error_handler( "fft_rep", "reduce( fft_rep&, modular_fft_rep&, lidia_size_t, lidia_size_t )::Reps do not match" );

    lidia_size_t L = 1 << l;
    x.set_size(l);
    const sdigit *ap = &a.s[0];
    sdigit *xp = &x.tbl[index][0];
    for (lidia_size_t j = 0; j < L; j++, xp++)
	*xp = ap[j << diff];
}



/////////////////////////////////////////////////////////////////////////
// three special purpose functions, used only in
// void update_map(base_vector<bigint>& x, const base_vector<bigint>& a,
//					   const poly_multiplier& B, const poly_modulus& F)
//


void fft_rep::rev_to_fft_rep(const base_vector<bigint>& x, lidia_size_t lo,
			lidia_size_t hi, lidia_size_t offset)
// computes an n = 2^k point convolution of X^offset*x[lo..hi]
// using "inverted" evaluation points.
// if deg(x) >= 2^k, then x is first reduced modulo X^n-1.
{
    debug_handler( "fft_rep", "rev_to_fft_rep( base_vector<bigint>& x, lidia_size_t, lidia_size_t, lidia_size_t)" );
    lidia_size_t n, i, j, m, j1;

    bigint accum;	//static

    if (lo < 0)
	lidia_error_handler( "fft_rep", "rev_to_fft_rep( base_vector<bigint>& x, lidia_size_t, lidia_size_t, lidia_size_t)::bad arg" );

    hi = comparator<lidia_size_t>::min(hi, x.size()-1);

    n = 1 << k;
    m = comparator<lidia_size_t>::max(hi-lo + 1, 0);

    const bigint *xx;
    if (x.size()==0)
	xx = 0;
    else
	xx = &x[0];

    sdigit *sp = new sdigit[n];
    if (!sp)
	lidia_error_handler( "fft_rep", "rev_to_fft_rep( base_vector<bigint>& x, lidia_size_t, lidia_size_t, lidia_size_t)::out of space" );

    if (m+offset <= n)
    {
	sdigit *up;
	for (i = C.number_of_primes()-1; i >= 0; i--)
	{
	    for (up=sp, j = 0; j < offset; j++, up++)
		*up =0;
	    C.reduce(&sp[offset], &xx[lo], m, i);
	    for (up=&sp[m+offset], j = m+offset; j < n; j++, up++)
		*up =0;
	    F.interpolate(tbl[i], sp, k, i);
	    F.divide_by_power_of_two(tbl[i], n, k, i);
	}
    }
    else
    {
	for (j = 0; j < n; j++)
	{
	    if (j < offset || j >= m+offset)
	    {
		for (i = C.number_of_primes()-1; i >= 0; i--)
		    tbl[i][j] = 0;
	    }
	    else
	    {
		accum = xx[j+lo-offset];
		for (j1 = j + n; j1 < m+offset; j1 += n)
		    add(accum, accum, xx[j1+lo-offset]);

		for (i = C.number_of_primes()-1; i >= 0; i--)
		    C.reduce(tbl[i][j], accum, i);
	    }
	}

	for (i = C.number_of_primes()-1; i >= 0; i--)
	{
	    sdigit *yp = &tbl[i][0];

	    F.interpolate(sp, yp, k, i);
	    for (j = 0; j < n; j++)
		yp[j] = sp[j];
	    F.divide_by_power_of_two(yp, n, k, i);
	}
    }
    delete[] sp;
}



void fft_rep::rev_from_fft_rep(base_vector<bigint>& x, lidia_size_t lo, lidia_size_t hi)
// converts from FFT-representation to coefficient representation
// using "inverted" evaluation points.
// only the coefficients lo..hi are computed
{
    debug_handler( "fft_rep", "rev_from_fft_rep( lidia_size_t, lidia_size_t )" );
    lidia_size_t n, i, j, l;
    n = (1 << k);

    sdigit *sp = new sdigit[n];
    if (!sp)
	lidia_error_handler( "fft_rep", "rev_from_fft_rep( lidia_size_t, lidia_size_t )::out of space" );

    for (i = C.number_of_primes()-1; i >= 0; i--)
    {
	sdigit *yp = &tbl[i][0], *SP = sp;
	F.evaluate(sp, yp, k, i);
	
	for (j = n-1; j >= 0; j--,yp++,SP++)
	    *yp = *SP;
    }
    delete[] sp;

    hi = comparator<lidia_size_t>::min(hi, n-1);
    l = comparator<lidia_size_t>::max(hi-lo+1, 0);

    if (x.capacity() < l)
	x.set_capacity(l);
    x.set_size(l);

    C.reset();
    for (i = C.number_of_primes()-1; i >= 0; i--)
	C.combine(&tbl[i][lo], l, i);
    
    if (l != 0)
    {
	bigint* xx = &x[0];
	C.get_result(xx, l);

	if (xx!=&x[0])
	    lidia_error_handler( "Fp_polynomial, fft_rep", "error in class crt: vector has unnecessarily been reallocated in function get_result(base_vector<bigint>,lidia_size_t)" );
		
	const bigint &p = F.CT->mod.mod();
	for (i = l; i > 0; i--, xx++)
	    Remainder(*xx,*xx,p);
    }
}



void fft_rep::add_expand(const fft_rep& a)
//  x = x + (an "expanded" version of a)
{
    debug_handler( "fft_rep", "add_expand( fft_rep&)" );
    lidia_size_t j, j1, index;
    lidia_size_t a_k = a.k;
    lidia_size_t n = 1 << a_k;

    if (k < a_k)
	lidia_error_handler( "fft_rep", "add_expand( fft_rep& )::bad args" );
    if (F.FT != a.F.FT)
	lidia_error_handler( "fft_rep", "add_expand( fft_rep& )::Reps do not match" );

    for (index = C.number_of_primes()-1; index >= 0; index--)
    {
	sdigit q = C.get_prime(index);
	const sdigit *ap = &a.tbl[index][0];
	sdigit *xp = &tbl[index][0];
	for (j = n-1; j >= 0; j--)
	{
	    j1 = j << (k-a_k);
	    xp[j1] = udigit_add_mod(xp[j1], ap[j], q);
	}
    }
}
