#include "LLL.h"
#include "vec_long.h"
#include "tools.h"

#include "math.h"


static void ExactDiv(ZZ& qq, const ZZ& a, const ZZ& b)
{
   static ZZ q, r;

   DivRem(q, r, a, b);
   if (!IsZero(r)) {
      cerr << "a = " << a << "\n";
      cerr << "b = " << b << "\n";
      Error("ExactDiv: nonzero remainder");
   }
   qq = q;
}


static void BalDiv(ZZ& q, const ZZ& a, const ZZ& d)

//  rounds a/d to nearest integer, breaking ties
//    by rounding towards zero.  Assumes d > 0.

{
   static ZZ r;
   DivRem(q, r, a, d);


   add(r, r, r);

   long cmp = compare(r, d);
   if (cmp > 0 || (cmp == 0 && q < 0))
      add(q, q, 1);
}



static void MulAddDiv(ZZ& c, const ZZ& c1, const ZZ& c2, 
                      const ZZ& x, const ZZ& y, const ZZ& z)

// c = (x*c1 + y*c2)/z

{
   static ZZ t1, t2;

   mul(t1, x, c1);
   mul(t2, y, c2);
   add(t1, t1, t2);
   ExactDiv(c, t1, z);
}


static void MulSubDiv(ZZ& c, const ZZ& c1, const ZZ& c2, 
                      const ZZ& x, const ZZ& y, const ZZ& z)

// c = (x*c1 - y*c2)/z

{
   static ZZ t1, t2;

   mul(t1, x, c1);
   mul(t2, y, c2);
   sub(t1, t1, t2);
   ExactDiv(c, t1, z);
}
   




static void MulSubDiv(vector(ZZ)& c, const vector(ZZ)& c1, const vector(ZZ)& c2,
                      const ZZ& x, const ZZ& y, const ZZ& z)

// c = (x*c1 + y*c2)/z

{
   long n = c1.length();
   if (c2.length() != n) Error("MulSubDiv: length mismatch");
   c.SetLength(n);

   long i;
   for (i = 1; i <= n; i++) 
      MulSubDiv(c(i), c1(i), c2(i), x, y, z);
}

static void RowTransform(vector(ZZ)& c1, vector(ZZ)& c2,
                         const ZZ& x, const ZZ& y, const ZZ& u, const ZZ& v)

// (c1, c2) = (x*c1 + y*c2, u*c1 + v*c2)

{
   long n = c1.length();
   if (c2.length() != n) Error("MulSubDiv: length mismatch");
   static ZZ t1, t2, t3, t4;

   long i;
   for (i = 1; i <= n; i++) {
      mul(t1, x, c1(i));
      mul(t2, y, c2(i));
      add(t1, t1, t2);

      mul(t3, u, c1(i));
      mul(t4, v, c2(i));
      add(t3, t3, t4);

      c1(i) = t1;
      c2(i) = t3;
   }
}

static void RowTransform(ZZ& c1, ZZ& c2,
                         const ZZ& x, const ZZ& y, const ZZ& u, const ZZ& v)

// (c1, c2) = (x*c1 + y*c2, u*c1 + v*c2)

{
   static ZZ t1, t2, t3, t4;

   mul(t1, x, c1);
   mul(t2, y, c2);
   add(t1, t1, t2);

   mul(t3, u, c1);
   mul(t4, v, c2);
   add(t3, t3, t4);

   c1 = t1;
   c2 = t3;
}



static void MulSub(ZZ& c, const ZZ& c1, const ZZ& c2, const ZZ& x)

// c = c1 - x*c2

{
   static ZZ t1;

   mul(t1, x, c2);
   sub(c, c1, t1);
}


static void MulSub(vector(ZZ)& c, const vector(ZZ)& c1, const vector(ZZ)& c2,
                   const ZZ& x)

// c = c1 - x*c2

{
   long n = c1.length();
   if (c2.length() != n) Error("MulSub: length mismatch");
   c.SetLength(n);

   long i;
   for (i = 1; i <= n; i++)
      MulSub(c(i), c1(i), c2(i), x);
}

      
      
   
 

static long LLLInit(matrix(ZZ)& B, vector(long)& P, vector(ZZ)& D,
             vector(vector(ZZ))& lam)

{
   long m = B.NumRows();
   long n = B.NumCols();
   long i, j, s;
   ZZ t1;
   matrix(ZZ) C;
   vector(ZZ) c;

   P.SetLength(m);
   D.SetLength(m+1);
   C.SetDims(m, n);
   lam.SetLength(m);

   s = 0;
   set(D[0]);

   for (i = 1; i <= m; i++) {
      lam(i).SetLength(s);
      c = B(i);
      for (j = 1; j <= i-1; j++) {
         if (P(j) != 0) {
            InnerProduct(lam(i)(P(j)), B(i), C(P(j)));
            MulSubDiv(c, c, C(P(j)), D[P(j)], lam(i)(P(j)), D[P(j)-1]);
         }
      }

      InnerProduct(t1, c, c);
      if (IsZero(t1))
         P(i) = 0;
      else {
         s++;
         P(i) = s;
         C(s) = c;
         ExactDiv(D[s], t1, D[s-1]);
      }
   }

   return s;
}

   



static long SwapTest(const ZZ& d0, const ZZ& d1, const ZZ& d2, const ZZ& lam,
                     long a, long b)

// test if a*d1^2 > b*(d0*d2 + lam^2)

{
   static ZZ t1, t2;

   mul(t1, d0, d2);
   sqr(t2, lam);
   add(t1, t1, t2);
   mul(t1, t1, b);

   sqr(t2, d1);
   mul(t2, t2, a);

   return t2 > t1;
}






static
void reduce(long k, long l, 
            matrix(ZZ)& B, vector(long)& P, vector(ZZ)& D, 
            vector(vector(ZZ))& lam, matrix(ZZ)* U)
{
   static ZZ t1;
   static ZZ r;

   if (P(l) == 0) return;
   add(t1, lam(k)(P(l)), lam(k)(P(l)));
   abs(t1, t1);
   if (t1 <= D[P(l)]) return;

   long j;

   BalDiv(r, lam(k)(P(l)), D[P(l)]);
   MulSub(B(k), B(k), B(l), r);

   if (U) MulSub((*U)(k), (*U)(k), (*U)(l), r);

   for (j = 1; j <= l-1; j++)
      if (P(j) != 0)
         MulSub(lam(k)(P(j)), lam(k)(P(j)), lam(l)(P(j)), r);

   MulSub(lam(k)(P(l)), lam(k)(P(l)), D[P(l)], r);
}

static
void swap(long k, matrix(ZZ)& B, vector(long)& P, vector(ZZ)& D, 
          vector(vector(ZZ))& lam, matrix(ZZ)* U, long verbose)

// swaps vectors k-1 and k;  assumes P(k-1) != 0

{
   long i, j;
   long m = B.NumRows();
   static ZZ t1, t2, t3, e, x, y;


   if (P(k) != 0) {
      if (verbose) cerr << "swap case 1: " << k << "\n";

      swap(B(k-1), B(k));
      if (U) swap((*U)(k-1), (*U)(k));
   
      for (j = 1; j <= k-2; j++)
         if (P(j) != 0)
            swap(lam(k-1)(P(j)), lam(k)(P(j)));

      for (i = k+1; i <= m; i++) {
         MulAddDiv(t1, lam(i)(P(k)-1), lam(i)(P(k)), 
                   lam(k)(P(k)-1), D[P(k)-2], D[P(k)-1]); 
         MulSubDiv(t2, lam(i)(P(k)-1), lam(i)(P(k)), 
                   D[P(k)], lam(k)(P(k)-1), D[P(k)-1]);
         lam(i)(P(k)-1) = t1;
         lam(i)(P(k)) = t2;
      }

      MulAddDiv(D[P(k)-1], D[P(k)], lam(k)(P(k)-1),
                D[P(k)-2], lam(k)(P(k)-1), D[P(k)-1]);
   }
   else if (!IsZero(lam(k)(P(k-1)))) {
      if (verbose) cerr << "swap case 2: " << k << "\n";
      XGCD(e, x, y, lam(k)(P(k-1)), D[P(k-1)]);

      ExactDiv(t1, lam(k)(P(k-1)), e);
      ExactDiv(t2, D[P(k-1)], e);

      t3 = t2;
      negate(t2, t2);
      RowTransform(B(k-1), B(k), t1, t2, y, x);
      if (U) RowTransform((*U)(k-1), (*U)(k), t1, t2, y, x);
      for (j = 1; j <= k-2; j++)
         if (P(j) != 0)
            RowTransform(lam(k-1)(P(j)), lam(k)(P(j)), t1, t2, y, x);

      sqr(t2, t2);
      ExactDiv(D[P(k-1)], D[P(k-1)], t2);

      for (i = k+1; i <= m; i++)
         if (P(i) != 0) {
            ExactDiv(D[P(i)], D[P(i)], t2);
            for (j = i+1; j <= m; j++) {
               ExactDiv(lam(j)(P(i)), lam(j)(P(i)), t2);
            }
         }

      for (i = k+1; i <= m; i++) {
         ExactDiv(lam(i)(P(k-1)), lam(i)(P(k-1)), t3);
      }

      swap(P(k-1), P(k));
   }
   else {
      if (verbose) cerr << "swap case 3: " << k << "\n";

      swap(B(k-1), B(k));
      if (U) swap((*U)(k-1), (*U)(k));
   
      for (j = 1; j <= k-2; j++)
         if (P(j) != 0)
            swap(lam(k-1)(P(j)), lam(k)(P(j)));

      swap(P(k-1), P(k));
   }
}

   


static
long LLL(ZZ& det, matrix(ZZ)& B, matrix(ZZ)* U, long a, long b, long verbose)
{
   vector(long) P;
   vector(ZZ) D;
   vector(vector(ZZ)) lam;
   long s;

   if (verbose) cerr << "Gramm-Schmidt...";
   s = LLLInit(B, P, D, lam);
   if (verbose) cerr << "\n";

   long m = B.NumRows();
   long k;
   long i, j;

   if (U) ident(*U, m);

   if (s < m) {
      // There are linear dependencies; let's get rid of them first...
      if (verbose) cerr << "rank=" << s << "\n";

      k = 2;
      while (k <= m) {
         if (verbose) cerr << "-";
   
         reduce(k, k-1, B, P, D, lam, U);
   
         if (P(k-1) != 0 && P(k) == 0) {
            swap(k, B, P, D, lam, U, verbose);
            if (k > 2) k--;
         }
         else {
            if (verbose) cerr << "reduce: " << k << "\n";
            for (j = k-2; j >= 1; j--) 
               reduce(k, j, B, P, D, lam, U);
            k++;
         }
      }
   }

   if (verbose) cerr << "independent part\n";

   k = (m-s)+2;
   while (k <= m) {
      if (verbose) cerr << "-";

      reduce(k, k-1, B, P, D, lam, U);

      if (P(k-1) != 0 && 
          (P(k) == 0 || 
           SwapTest(D[P(k)], D[P(k)-1], D[P(k)-2], lam(k)(P(k)-1), a, b))) {
         swap(k, B, P, D, lam, U, verbose);
         if (k > (m-s)+2) k--;
      }
      else {
         if (verbose) cerr << "reduce: " << k << "\n";
         for (j = k-2; j >= 1; j--) 
            reduce(k, j, B, P, D, lam, U);
         k++;
      }
   }


   det = D[s];
   return s;
}

long LLL(ZZ& det, matrix(ZZ)& B, matrix(ZZ)& U, long verbose)
{
   return LLL(det, B, &U, 3, 4, verbose);
}

long LLL(ZZ& det, matrix(ZZ)& B, long verbose)
{
   return LLL(det, B, 0, 3, 4, verbose);
}

long LLL(ZZ& det, matrix(ZZ)& B, matrix(ZZ)& U, long a, long b, long verbose)
{
   if (a <= 0 || b <= 0 || a > b || b/4 >= a) Error("LLL: bad args");
   
   return LLL(det, B, &U, a, b, verbose);
}

long LLL(ZZ& det, matrix(ZZ)& B, long a, long b, long verbose)
{
   if (a <= 0 || b <= 0 || a > b || b/4 >= a) Error("LLL: bad args");

   return LLL(det, B, 0, a, b, verbose);
}




   



static
long image(ZZ& det, matrix(ZZ)& B, matrix(ZZ)* U, long verbose)
{
   vector(long) P;
   vector(ZZ) D;
   vector(vector(ZZ)) lam;
   long s;

   if (verbose) cerr << "Gramm-Schmidt...";
   s = LLLInit(B, P, D, lam);
   if (verbose) cerr << "\n";

   long m = B.NumRows();
   long k = 2;
   long i, j;

   if (U) ident(*U, m);

   while (k <= m) {
      if (verbose) cerr << "-";

      reduce(k, k-1, B, P, D, lam, U);

      if (P(k-1) != 0 && P(k) == 0) { 
         swap(k, B, P, D, lam, U, verbose);
         if (k > 2) k--;
      }
      else {
         if (verbose) cerr << "reduce: " << k << "\n";
         for (j = k-2; j >= 1; j--) 
            reduce(k, j, B, P, D, lam, U);
         k++;
      }
   }

   det = D[s];
   return s;
}

long image(ZZ& det, matrix(ZZ)& B, matrix(ZZ)& U, long verbose)
{
   return image(det, B, &U, verbose);
}

long image(ZZ& det, matrix(ZZ)& B, long verbose)
{
   return image(det, B, 0, verbose);
}

static double InnerProduct(double *a, double *b, long n)
{
   double s, c, y, t;
   long i;

   // Kahan summation

   s = c = 0;
   for (i = 1; i <= n; i++) {
      y = a[i]*b[i] - c;
      t = s+y;
      c = t-s;
      c = c-y;
      s = t;
   }

   return s;
}

static void RowTransform(vector(ZZ)& A, vector(ZZ)& B, const ZZ& MU1)
// x = x - y*MU
{
   static ZZ T, MU;
   long k;

   MU = MU1;

   if (MU == 0) return;

   if (digit(MU, 0) == 0) 
      k = MakeOdd(MU);
   else
      k = 0;

   long n = A.length();
   long i;

   if (MU.size() <= 1) {
      long mu1;
      mu1 << MU;

      for (i = 1; i <= n; i++) {
         mul(T, B(i), mu1);
         if (k > 0) LeftShift(T, T, k);
         sub(A(i), A(i), T);
      }
   }
   else {
      for (i = 1; i <= n; i++) {
         mul(T, B(i), MU);
         if (k > 0) LeftShift(T, T, k);
         sub(A(i), A(i), T);
      }
   }
}

static void RowTransform2(vector(ZZ)& A, vector(ZZ)& B, const ZZ& MU1)
// x = x + y*MU
{
   static ZZ T, MU;
   long k;

   MU = MU1;

   if (MU == 0) return;

   if (digit(MU, 0) == 0) 
      k = MakeOdd(MU);
   else
      k = 0;

   long n = A.length();
   long i;

   if (MU.size() <= 1) {
      long mu1;
      mu1 << MU;

      for (i = 1; i <= n; i++) {
         mul(T, B(i), mu1);
         if (k > 0) LeftShift(T, T, k);
         add(A(i), A(i), T);
      }
   }
   else {
      for (i = 1; i <= n; i++) {
         mul(T, B(i), MU);
         if (k > 0) LeftShift(T, T, k);
         add(A(i), A(i), T);
      }
   }
}


void ComputeGS(matrix(ZZ)& B, double **B1, double **mu, double *b, 
               double *c, long k, double bound)
{
   long n = B.NumCols();
   long i, j;
   double s, t1, y, c1, t;
   ZZ T1;
   long test;

   for (j = 1; j <= k-1; j++) {
      s = InnerProduct(B1[k], B1[j], n);

      // test = s^2 <= b[k]*b[j]/bound,
      // but we compute it in a strange way to avoid overflow

      y = fabs(s);
      if (y == 0)
         test = (b[k] != 0);
      else {
         t = y/b[j];
         t1 = b[k]/y;
         if (t <= 1)
            test = (t*bound <= t1);
         else if (t1 >= 1)
            test = (t <= t1/bound);
         else
            test = 0;
      }

      if (test) {
         InnerProduct(T1, B(k), B(j));
         s << T1;
      }

      // Kahan summation
      t1 = c1 = 0;
      for (i = 1; i <= j-1; i++) {
         y = mu[j][i]*mu[k][i]*c[i] - c1;
         t = t1+y;
         c1 = t-t1;
         c1 = c1-y;
         t1 = t;
      }
 
      mu[k][j] = (s - t1)/c[j];
   }

   // Kahan summation
   s = c1 = 0;
   for (j = 1; j <= k-1; j++) {
      y = mu[k][j]*mu[k][j]*c[j] - c1;
      t = s+y;
      c1 = t-s;
      c1 = c1-y;
      s = t;
   }

   c[k] = b[k] - s;
}

static
long ll_LLL_FP(matrix(ZZ)& B, matrix(ZZ)* U, double delta, long deep, 
           LLLCheckFct check, double **B1, double **mu, double *b, double *c,
           long m, long init_k, long &quit)
{
   long n = B.NumCols();

   long i, j, k, Fc, Fc1;
   ZZ MU;
   double mu1;

   double t1;
   ZZ T1;
   double *tp;

   static double bound = 0;

   if (bound == 0) {
      // we tolerate a 15% loss of precision in computing
      // inner products in ComputeGS.

      bound = 1;
      for (i = 2*long(0.15*ZZ_DOUBLE_PRECISION); i > 0; i--)
         bound = bound * 2;
   }

   static double bound1 = 0;

   if (bound1 == 0) {
      bound1 = 1;
      // for (i = ZZ_DOUBLE_PRECISION/2; i > 0; i--)
      for (i = long(0.15*ZZ_DOUBLE_PRECISION); i > 0; i--)
         bound1 = bound1 * 2;
   }

   quit = 0;
   k = init_k;

   while (k <= m) {
      ComputeGS(B, B1, mu, b, c, k, bound);


      do {
         // size reduction

         Fc = Fc1 = 0;
   
         for (j = k-1; j >= 1; j--) {
            t1 = fabs(mu[k][j]);
            if (t1 > 0.5) {
               Fc1 = 1;
   
               mu1 = mu[k][j];
               if (mu1 >= 0)
                  mu1 = ceil(mu1-0.5);
               else
                  mu1 = floor(mu1+0.5);
   
               if (fabs(mu1) > bound1)
                  Fc = 1;
   
               for (i = 1; i <= j-1; i++)
                  mu[k][i] -= mu1*mu[j][i];
   
               mu[k][j] -= mu1;
   
               MU << mu1;
   
               RowTransform(B(k), B(j), MU);
               if (U) RowTransform((*U)(k), (*U)(j), MU);
            }
         }

         if (Fc1) {
            for (i = 1; i <= n; i++)
               B1[k][i] << B(k, i);
   
            b[k] = InnerProduct(B1[k], B1[k], n);
            ComputeGS(B, B1, mu, b, c, k, bound);
         }
      } while (Fc);

      if (check && (*check)(B(k))) 
         quit = 1;

      if (b[k] == 0) {
         for (i = k; i < m; i++) {
            // swap i, i+1
            swap(B(i), B(i+1));
            tp = B1[i]; B1[i] = B1[i+1]; B1[i+1] = tp;
            t1 = b[i]; b[i] = b[i+1]; b[i+1] = t1;
            if (U) swap((*U)(i), (*U)(i+1));
         }

         m--;
         if (quit) break;
         continue;
      }

      if (quit) break;

      if (deep > 0) {
         // deep insertions
   
         double cc = b[k];
         long l = 1;
         while (l <= k-1 && delta*c[l] <= cc) {
            cc = cc - mu[k][l]*mu[k][l]*c[l];
            l++;
         }
   
         if (l <= k-1 && (l <= deep || k-l <= deep)) {
            // deep insertion at position l
   
            for (i = k; i > l; i--) {
               // swap rows i, i-1
               swap(B(i), B(i-1));
               tp = B1[i]; B1[i] = B1[i-1]; B1[i-1] = tp;
               t1 = b[i]; b[i] = b[i-1]; b[i-1] = t1;
               if (U) swap((*U)(i), (*U)(i-1));
            }
   
            k = l;
            continue;
         }
      } // end deep insertions

      // test LLL reduction condition

      if (k > 1 && delta*c[k-1] > c[k] + mu[k][k-1]*mu[k][k-1]*c[k-1]) {
         // swap rows k, k-1
         swap(B(k), B(k-1));
         tp = B1[k]; B1[k] = B1[k-1]; B1[k-1] = tp;
         t1 = b[k]; b[k] = b[k-1]; b[k-1] = t1;
         if (U) swap((*U)(k), (*U)(k-1));

         k--;
      }
      else
         k++;
   }

   return m;
}

static
long LLL_FP(matrix(ZZ)& B, matrix(ZZ)* U, double delta, long deep, 
           LLLCheckFct check)
{
   long m = B.NumRows();
   long n = B.NumCols();

   long i, j, k, Fc;
   long new_m, dep, quit;
   double s;
   ZZ MU;
   double mu1;

   double t1;
   ZZ T1;
   double *tp;

   if (U) ident(*U, m);

   double **B1;  // approximates B

   typedef double *doubleptr;

   B1 = new doubleptr[m+1];
   if (!B1) Error("LLL_FP: out of memory");

   for (i = 1; i <= m; i++) {
      B1[i] = new double[n+1];
      if (!B1[i]) Error("LLL_FP: out of memory");
   }

   double **mu;
   mu = new doubleptr[m+1];
   if (!mu) Error("LLL_FP: out of memory");

   for (i = 1; i <= m; i++) {
      mu[i] = new double[m+1];
      if (!mu[i]) Error("LLL_FP: out of memory");
   }

   double *c; // squared lengths of Gramm-Schmidt basis vectors

   c = new double[m+1];
   if (!c) Error("LLL_FP: out of memory");

   double *b; // squared lengths of basis vectors

   b = new double[m+1];
   if (!b) Error("LLL_FP: out of memory");


   for (i = 1; i <=m; i++)
      for (j = 1; j <= n; j++) 
         B1[i][j] << B(i, j);

         
   for (i = 1; i <= m; i++) {
      b[i] = InnerProduct(B1[i], B1[i], n);
   }

   new_m = ll_LLL_FP(B, U, delta, deep, check, B1, mu, b, c, m, 1, quit);
   dep = m - new_m;
   m = new_m;

   if (dep > 0) {
      // for consistency, we move all of the zero rows to the front

      for (i = 0; i < m; i++) {
         swap(B(m+dep-i), B(m-i));
         if (U) swap((*U)(m+dep-i), (*U)(m-i));
      }
   }


   // clean-up

   for (i = 1; i <= m; i++) {
      delete [] B1[i];
   }

   delete [] B1;

   for (i = 1; i <= m; i++) {
      delete [] mu[i];
   }

   delete [] mu;

   delete [] c;

   delete [] b;

   return m;
}

         

long LLL_FP(matrix(ZZ)& B, double delta, long deep, LLLCheckFct check)
{
   if (delta <= 0.25 || delta > 1) Error("LLL_FP: bad delta");
   if (deep < 0) Error("LLL_FP: bad deep");
   return LLL_FP(B, 0, delta, deep, check);
}

long LLL_FP(matrix(ZZ)& B, matrix(ZZ)& U, double delta, long deep, 
           LLLCheckFct check)
{
   if (delta <= 0.25 || delta > 1) Error("LLL_FP: bad delta");
   if (deep < 0) Error("LLL_FP: bad deep");
   return LLL_FP(B, &U, delta, deep, check);
}



static
long BKZ_FP(matrix(ZZ)& BB, matrix(ZZ)* UU, double delta, 
         long beta, long prune, LLLCheckFct check)
{
   long m = BB.NumRows();
   long n = BB.NumCols();
   long m_orig = m;
   
   long i, j;
   ZZ MU;

   double t1;
   ZZ T1;
   double *tp;

   matrix(ZZ) B;
   B = BB;

   B.SetDims(m+1, n);


   double **B1;  // approximates B

   typedef double *doubleptr;

   B1 = new doubleptr[m+2];
   if (!B1) Error("BKZ_FP: out of memory");

   for (i = 1; i <= m+1; i++) {
      B1[i] = new double[n+1];
      if (!B1[i]) Error("BKZ_FP: out of memory");
   }

   double **mu;
   mu = new doubleptr[m+2];
   if (!mu) Error("LLL_FP: out of memory");

   for (i = 1; i <= m+1; i++) {
      mu[i] = new double[m+1];
      if (!mu[i]) Error("BKZ_FP: out of memory");
   }

   double *c; // squared lengths of Gramm-Schmidt basis vectors

   c = new double[m+2];
   if (!c) Error("BKZ_FP: out of memory");

   double *b; // squared lengths of basis vectors

   b = new double[m+2];
   if (!b) Error("BKZ_FP: out of memory");

   double cbar;

   double *ctilda;
   ctilda = new double[m+2];
   if (!ctilda) Error("BKZ_FP: out of memory");

   double *vvec;
   vvec = new double[m+2];
   if (!vvec) Error("BKZ_FP: out of memory");

   double *yvec;
   yvec = new double[m+2];
   if (!yvec) Error("BKZ_FP: out of memory");

   double *uvec;
   uvec = new double[m+2];
   if (!uvec) Error("BKZ_FP: out of memory");

   double *utildavec;
   utildavec = new double[m+2];
   if (!utildavec) Error("BKZ_FP: out of memory");


   double *Deltavec;
   Deltavec = new double[m+2];
   if (!Deltavec) Error("BKZ_FP: out of memory");

   double *deltavec;
   deltavec = new double[m+2];
   if (!deltavec) Error("BKZ_FP: out of memory");

   matrix(ZZ) Ulocal;
   matrix(ZZ) *U;

   if (UU) {
      Ulocal.SetDims(m+1, m);
      for (i = 1; i <= m; i++)
         Ulocal(i, i) << 1;
      U = &Ulocal;
   }
   else
      U = 0;

   long quit;
   long new_m;
   long z, jj, kk;
   long s, t;
   long h;
   long mu1;
   double alpha;


   for (i = 1; i <=m; i++)
      for (j = 1; j <= n; j++) 
         B1[i][j] << B(i, j);

         
   for (i = 1; i <= m; i++) {
      b[i] = InnerProduct(B1[i], B1[i], n);
   }

   // cerr << "\n";
   // cerr << "first LLL\n";

   m = ll_LLL_FP(B, U, delta, 0, check, B1, mu, b, c, m, 1, quit);

   if (m < m_orig) {
      for (i = m_orig+1; i >= m+2; i--) {
         // swap i, i-1

         swap(B(i), B(i-1));
         if (U) swap((*U)(i), (*U)(i-1));
      }
   }

   if (!quit && m > 1) {
      // cerr << "continuing\n";
      if (beta > m) beta = m;

      z = 0;
      jj = 0;
   
      while (z < m-1) {
         jj++;
         kk = min(jj+beta-1, m);
   
         if (jj == m) {
            jj = 1;
            kk = beta;
         }
   
         // ENUM
   
         cbar = c[jj];
         utildavec[jj] = uvec[jj] = 1;
   
         yvec[jj] = Deltavec[jj] = vvec[jj] = 0;
   
   
         s = t = jj;
         deltavec[jj] = 1;
   
         for (i = jj+1; i <= kk+1; i++) {
            ctilda[i] = uvec[i] = utildavec[i] = yvec[i] = Deltavec[i] = 0;
            vvec[i] = 0;
            deltavec[i] = 1;
         }
   
         while (t <= kk) {
            ctilda[t] = ctilda[t+1] + 
               (yvec[t]+utildavec[t])*(yvec[t]+utildavec[t])*c[t];
   
            if (prune) {
               alpha = 1.05*double(kk-t+1)/double(kk-jj);
               if (alpha > 1) alpha = 1;
            }
            else
               alpha = 1;
   
            if (ctilda[t] < alpha*cbar) {
               if (t > jj) {
                  t--;
                  t1 = 0;
                  for (i = t+1; i <= s; i++)
                     t1 += utildavec[i]*mu[i][t];
                  yvec[t] = t1;
                  t1 = -t1;
                  if (t1 >= 0)
                     t1 = ceil(t1-0.5);
                  else
                     t1 = floor(t1+0.5);
                  utildavec[t] = vvec[t] = t1;
                  Deltavec[t] = 0;
                  if (utildavec[t] > -yvec[t]) 
                     deltavec[t] = -1;
                  else
                     deltavec[t] = 1;
               }
               else {
                  cbar = ctilda[jj];
                  for (i = jj; i <= kk; i++) {
                     uvec[i] = utildavec[i];
                  }
               }
            }
            else {
               t++;
               s = max(s, t);
               if (t < s) Deltavec[t] = -Deltavec[t];
               if (Deltavec[t]*deltavec[t] >= 0) Deltavec[t] += deltavec[t];
               utildavec[t] = vvec[t] + Deltavec[t];
            }
         }
         
   
         h = min(kk+1, m);
   
         if (delta*c[jj] > cbar) {
            // we treat the case that the new vector is b_s (jj < s <= kk)
            // as a special case that appears to occur most of the time.
   
            s = 0;
            for (i = jj+1; i <= kk; i++) {
               if (uvec[i] != 0) {
                  if (s == 0)
                     s = i;
                  else
                     s = -1;
               }
            }
   
            if (s == 0) Error("BKZ_FP: internal error");
   
            if (s > 0) {
               // special case
   
               for (i = s; i > jj; i--) {
                  // swap i, i-1
                  swap(B(i-1), B(i));
                  if (U) swap((*U)(i-1), (*U)(i));
                  tp = B1[i-1]; B1[i-1] = B1[i]; B1[i] = tp;
                  t1 = b[i-1]; b[i-1] = b[i]; b[i] = t1;
               }
   
               // cerr << "special case\n";
               new_m = ll_LLL_FP(B, U, delta, 0, check, 
                                B1, mu, b, c, h, jj, quit);
               if (new_m != h) Error("BKZ_FP: internal error");
               if (quit) break;
            }
            else {
               // the general case
   
               for (i = 1; i <= n; i++) B(m+1, i) << 0;

               if (U) {
                  for (i = 1; i <= m_orig; i++)
                     (*U)(m+1, i) << 0;
               }

               for (i = jj; i <= kk; i++) {
                  if (uvec[i] == 0) continue;
                  MU << uvec[i];
                  RowTransform2(B(m+1), B(i), MU);
                  if (U) RowTransform2((*U)(m+1), (*U)(i), MU);
               }
      
               for (i = m+1; i >= jj+1; i--) {
                  // swap i, i-1
                  swap(B(i-1), B(i));
                  if (U) swap((*U)(i-1), (*U)(i));
                  tp = B1[i-1]; B1[i-1] = B1[i]; B1[i] = tp;
                  t1 = b[i-1]; b[i-1] = b[i]; b[i] = t1;
               }
      
               for (i = 1; i <= n; i++)
                  B1[jj][i] << B(jj, i);
      
               b[jj] = InnerProduct(B1[jj], B1[jj], n);
      
               if (b[jj] == 0) Error("BKZ_FP: internal error"); 
      
               // remove linear dependencies
   
               // cerr << "general case\n";
               new_m = ll_LLL_FP(B, U, delta, 0, 0, B1, mu, b, c, kk+1, jj, quit);
              
               if (new_m != kk) Error("BKZ_FP: internal error"); 

               // remove zero vector
      
               for (i = kk+2; i <= m+1; i++) {
                  // swap i, i-1
                  swap(B(i-1), B(i));
                  if (U) swap((*U)(i-1), (*U)(i));
                  tp = B1[i-1]; B1[i-1] = B1[i]; B1[i] = tp;
                  t1 = b[i-1]; b[i-1] = b[i]; b[i] = t1;
               }
      
               quit = 0;
               if (check) {
                  for (i = 1; i <= kk; i++)
                     if ((*check)(B(i))) {
                        quit = 1;
                        break;
                     }
               }

               if (quit) break;
   
               if (h > kk) {
                  // extend reduced basis
   
                  new_m = ll_LLL_FP(B, U, delta, 0, check, 
                                   B1, mu, b, c, h, h, quit);
   
                  if (new_m != h) Error("BKZ_FP: internal error");
                  if (quit) break;
               }
            }
   
            z = 0;
         }
         else {
            // LLL_FP
            // cerr << "progress\n";
            new_m = ll_LLL_FP(B, U, delta, 0, check, B1, mu, b, c, h, h, quit);
   
   
            if (new_m != h) Error("BKZ_FP: internal error");
            if (quit) break;
   
            z++;
         }
      }
   }

   // clean up

   if (m_orig > m) {
      // for consistency, we move zero vectors to the front

      for (i = m+1; i <= m_orig; i++) {
         swap(B(i), B(i+1));
         if (U) swap((*U)(i), (*U)(i+1));
      }

      for (i = 0; i < m; i++) {
         swap(B(m_orig-i), B(m-i));
         if (U) swap((*U)(m_orig-i), (*U)(m-i));
      }
   }

   B.SetDims(m_orig, n);
   BB = B;

   if (U) {
      U->SetDims(m_orig, m_orig);
      *UU = *U;
   }

   for (i = 1; i <= m+1; i++) {
      delete [] B1[i];
   }

   delete [] B1;

   for (i = 1; i <= m+1; i++) {
      delete [] mu[i];
   }

   delete [] mu;

   delete [] c;
   delete [] b;
   delete [] ctilda;
   delete [] vvec;
   delete [] yvec;
   delete [] uvec;
   delete [] utildavec;
   delete [] Deltavec;
   delete [] deltavec;

   return m;
}

long BKZ_FP(matrix(ZZ)& BB, matrix(ZZ)& UU, double delta, 
         long beta, long prune, LLLCheckFct check)
{
   if (delta <= 0.25 || delta > 1) Error("BKZ_FP: bad delta");
   if (beta < 2) Error("BKZ_FP: bad block size");

   return BKZ_FP(BB, &UU, delta, beta, prune, check);
}

long BKZ_FP(matrix(ZZ)& BB, double delta, 
         long beta, long prune, LLLCheckFct check)
{
   if (delta <= 0.25 || delta > 1) Error("BKZ_FP: bad delta");
   if (beta < 2) Error("BKZ_FP: bad block size");

   return BKZ_FP(BB, 0, delta, beta, prune, check);
}
