/*
 *----------------------------------------------------------------------
 *
 * Copyright 1991, University of New Mexico.  All rights reserved.
 * Permission to copy and modify this software and its documen-
 * tation only for internal use in your organization is hereby
 * granted, provided that this notice is retained thereon and
 * on all copies.  UNM makes no representations as to the sui-
 * tability and operability of this software for any purpose.
 * It is provided "as is" without express or implied warranty.
 * 
 * UNM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FIT-
 * NESS.  IN NO EVENT SHALL UNM BE LIABLE FOR ANY SPECIAL,
 * INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY OTHER DAMAGES WHAT-
 * SOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
 * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PER-
 * FORMANCE OF THIS SOFTWARE.
 * 
 * No other rights, including, for example, the right to redis-
 * tribute this software and its documentation or the right to
 * prepare derivative works, are granted unless specifically
 * provided in a separate license agreement.
 *---------------------------------------------------------------------
 */

#include "unmcopyright.h"        /* Copyright 1991 by UNM */

/*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>  <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
 >>>>
 >>>>         File Name: ldct2d.c
 >>>>
 >>>>      Program Name: dct2d
 >>>>
 >>>> Date Last Updated: Mon Apr 22 21:55:31 1991 
 >>>>
 >>>>          Routines: ldct2d - the library call for dct2d
 >>>>
 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>   <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/


#include "vinclude.h"


/* -library_includes */
#define PI 3.1415927
     float  con, p04, p08, p16, p32; 
     float  c_01_pi_04, c_01_p_04, c_01_p_08, c_03_p_08, c_01_p_16, c_03_p_16, c_05_p_16;
     float  c_07_p_16, s_01_p_08, s_03_p_08, s_01_p_16, s_03_p_16, s_05_p_16, s_07_p_16;
     float  c_01_pi_08, c_03_pi_08, c_01_p_32, c_03_p_32, c_05_p_32, c_07_p_32, c_09_p_32;
     float  c_11_p_32, c_13_p_32, c_15_p_32, s_01_pi_08, s_03_pi_08, s_01_p_32, s_03_p_32;
     float  s_05_p_32, s_07_p_32, s_09_p_32, s_11_p_32, s_13_p_32, s_15_p_32;
/* -library_includes_end */


/****************************************************************
*
* Routine Name: ldct2d - library call for dct2d
*
* Purpose:
*    
*    Compute blockwise two-dimensional Discrete Cosine  Transform
*    (DCT) of an image
*    
*    
* Input:
*    
*    image          pointer to xvimage structure to be processed
*    
*    block_size     block-size to be used for  the  transform,  either
*                   8x8 or 16x16
*    
*    
* Output:
*    
*    image          holds the result of the Discrete Cosine  Transform
*                   operation.  The  output  data  is  of  type float,
*                   regardless of the input data type.
*    
*    Return Value:  1 on success, 0 on failure.
*    
*    
*
* Written By: Michael Gilge, gilge@icsi.berkeley.edu

        Copyright 1991, Michael Gilge. All rights reserved.
        Distribution and use permitted as provided in the 
        KHOROS copyright as long as this notice is attached
        to the code and its derivatives.
****************************************************************/


/* -library_def */
int ldct2d(image, block_size)
struct xvimage *image;
int block_size;
/* -library_def_end */

/* -library_code */
{
    int i, j, dimx, dimy;      /* Indexes and sizes of rows and columns */
    int blockx, blocky;        /* Number of block-rows and -columns */
    float *in, *in_ptr;        /* Pointer to float input array */
    unsigned char *cptr;
    short         *sptr;
    int           *iptr;
    float         *fptr;
    
    dimx = image->row_size;            /* Number of columns */
    dimy = image->col_size;            /* Number of rows */

    /* Make sure size is legal for an DCT */
    if (((dimx % block_size) != 0) || ((dimy % block_size) != 0))
    {  fprintf(stderr,"ldct2d: Input image size must be integer multiple of block-size\n");
       return(0);
    }

    blockx = dimx / block_size;
    blocky = dimy / block_size;

    /* Get space for the input float array */
    in = (float *)malloc(dimx*dimy*sizeof(float));
    if (in == NULL)
    {  
       fprintf(stderr,"ldct2d: Could not allocate enough memory!\n");
       return(0);
    }

    init_factors(block_size);

    in_ptr = in;
    cptr = (unsigned char *)(image->imagedata);
    sptr = (short *)(image->imagedata);
    iptr = (int *)(image->imagedata);
    fptr = (float *)(image->imagedata);

    switch (image->data_storage_type)
    {
       case VFF_TYP_1_BYTE :
          for (j=0; j<dimx*dimy; j++) /* Transfer to float array */
              *in_ptr++ = (float)(*cptr++); 
          break;
       case VFF_TYP_2_BYTE :
          for (j=0; j<dimx*dimy; j++) /* Transfer to float array */
              *in_ptr++ = (float)(*sptr++); 
          break;
       case VFF_TYP_4_BYTE :
          for (j=0; j<dimx*dimy; j++) /* Transfer to float array */
              *in_ptr++ = (float)(*iptr++); 
          break;
       case VFF_TYP_FLOAT :
          for (j=0; j<dimx*dimy; j++) /* Transfer to float array */
              *in_ptr++ = (*fptr++);
          break;
       default :
          fprintf(stderr,"ldct2d: Unsupported data storage type\n");
          free(in);
          return(0);
          break;
    }

    free(image->imagedata);      /* Give back input image space */

    dct_1d(in, block_size, blockx, dimy, 0); /* forward DCT over rows */
    dct_1d(in, block_size, blocky, dimx, 1); /* forward DCT over columns */

    image->imagedata = (char * ) in;
    image->data_storage_type = VFF_TYP_FLOAT;

    return(1);
}

static
init_factors (block_size)
  int block_size;
{        
   con = sqrt(2.0 / block_size);

   p04 = PI / 4;
   p08 = PI / 8;
   p16 = PI / 16;
   p32 = PI / 32;
   
   c_01_pi_04 = cos(p04);
   c_01_p_04  = con*cos(p04); 
   c_01_p_08  = con*cos(p08);
   c_03_p_08  = con*cos(3*p08);
   c_01_p_16  = con*cos(p16);
   c_03_p_16  = con*cos(3*p16);
   c_05_p_16  = con*cos(5*p16);
   c_07_p_16  = con*cos(7*p16);
   s_01_p_08  = con*sin(p08);
   s_03_p_08  = con*sin(3*p08);
   s_01_p_16  = con*sin(p16);
   s_03_p_16  = con*sin(3*p16);
   s_05_p_16  = con*sin(5*p16);
   s_07_p_16  = con*sin(7*p16);

   if (block_size == 16)
   {  
      c_01_pi_08 = cos(p08);
      c_03_pi_08 = cos(3*p08);
      s_01_pi_08 = sin(p08);
      s_03_pi_08 = sin(3*p08);

      c_01_p_32  = con*cos(p32);
      c_03_p_32  = con*cos(3*p32);
      c_05_p_32  = con*cos(5*p32);
      c_07_p_32  = con*cos(7*p32);
      c_09_p_32  = con*cos(9*p32);
      c_11_p_32  = con*cos(11*p32);
      c_13_p_32  = con*cos(13*p32);
      c_15_p_32  = con*cos(15*p32);
      s_01_p_32  = con*sin(p32);
      s_03_p_32  = con*sin(3*p32);
      s_05_p_32  = con*sin(5*p32);
      s_07_p_32  = con*sin(7*p32);
      s_09_p_32  = con*sin(9*p32);
      s_11_p_32  = con*sin(11*p32);
      s_13_p_32  = con*sin(13*p32);
      s_15_p_32  = con*sin(15*p32);
   }
}


/*------------------------------------------------------------*/
/* Function dct_1d (in, block_size, block_nr, dim_in, dir)    */
/*                                                            */
/* One-dimensional DCT of over complete image                 */
/* Depending on direction parameter, horizontally or verti-   */
/* cally oriented blocks of size 8x1 are transformed          */
/* in        : Pointer to input image array                   */
/* block_size: block size: 8 or 16                            */
/* block_nr  : Number of blocks                               */
/* dim_in    : Dimension of input data                        */
/* dir       : direction of operation: =0:rows / =1:columns   */
/*------------------------------------------------------------*/

dct_1d (in, blsz, block_nr, dim_in, dir)
 float *in;
 int blsz, block_nr, dim_in, dir;
{
   float vec[16], *vp, *ip;
   int i, k, m;

   ip = in;

   for (i = 0; i < dim_in; i++)
   {   
       if (dir == 1) ip = in + i;

       for (k = 0; k < block_nr; k++)
       {
          if (dir == 0)          /* copy horizontal vectors */          
             for (vp = vec, m = 0; m < blsz; m++) *vp++ = *(ip+m);
          else                   /* copy vertical vectors */          
             for (vp = vec, m = 0; m < blsz; m++) *vp++ = *(ip+(m*dim_in));

           if (blsz == 8) vec_dct_8 (vec);   /* transform block */            
           else vec_dct_16 (vec);  /* transform block */            
       
          if (dir == 0)          /* copy horizontal vectors */          
             for (vp = vec, m = 0; m < blsz; m++) *ip++ = *vp++ ;
          else                   /* copy vertical vectors */          
             for (vp = vec, m = 0; m < blsz; m++)
             {   
                 *ip = *vp++;
                 ip += dim_in;
             }
       }
   }
}


/*------------------------------------------------------------*/
/* Function vec_dct_8 (vector)                                */
/*                                                            */
/* One-dimensional DCT of a vector of size 8 pixel.           */
/* The output data (= the transform coefficients) replaces    */
/* the input data after the function call, i.e. input data    */
/* is overwritten.                                            */
/* vector  : 1-dimensional data array of length 8, used for   */
/*           input and output                                 */
/*------------------------------------------------------------*/

vec_dct_8 (vector)
float *vector;
{
   float  tmp1[8], tmp2[8];
   float  *vp, *tp, *up;
   
   vp = vector;
   tp = tmp1;
   up = tmp2;

/*---- Input butterfly ----------*/
   *tp     = *vp     + *(vp+7);
   *(tp+1) = *(vp+1) + *(vp+6);
   *(tp+2) = *(vp+2) + *(vp+5);
   *(tp+3) = *(vp+3) + *(vp+4);
   *(tp+4) = *(vp+3) - *(vp+4);
   *(tp+5) = *(vp+2) - *(vp+5);
   *(tp+6) = *(vp+1) - *(vp+6);
   *(tp+7) = *vp     - *(vp+7);   

/*---- Transformation -----------*/
   *up     = c_01_p_04 * (*tp + *(tp+1) + *(tp+2) + *(tp+3));
   *(up+1) = c_01_p_04 * (*tp - *(tp+1) - *(tp+2) + *(tp+3));
   *(up+2) = s_01_p_08 * (*(tp+1) - *(tp+2)) + c_01_p_08 * (*tp     - *(tp+3));
   *(up+3) = c_03_p_08 * (*tp     - *(tp+3)) + s_03_p_08 * (*(tp+2) - *(tp+1));
   *(up+4) = s_01_p_16 * (*(tp+4) + c_01_pi_04 * (*(tp+6) - *(tp+5)))
           + c_01_p_16 * (*(tp+7) + c_01_pi_04 * (*(tp+5) + *(tp+6)));
   *(up+5) = s_05_p_16 * (*(tp+4) - c_01_pi_04 * (*(tp+6) - *(tp+5)))
           + c_05_p_16 * (*(tp+7) - c_01_pi_04 * (*(tp+5) + *(tp+6)));
   *(up+6) = c_03_p_16 * (*(tp+7) - c_01_pi_04 * (*(tp+5) + *(tp+6)))
           - s_03_p_16 * (*(tp+4) - c_01_pi_04 * (*(tp+6) - *(tp+5)));
   *(up+7) = c_07_p_16 * (*(tp+7) + c_01_pi_04 * (*(tp+5) + *(tp+6)))
           - s_07_p_16 * (*(tp+4) + c_01_pi_04 * (*(tp+6) - *(tp+5)));

/*---- Output butterfly ----------*/
   *vp     = *up;
   *(vp+1) = *(up+4);
   *(vp+2) = *(up+2);
   *(vp+3) = *(up+6);
   *(vp+4) = *(up+1);
   *(vp+5) = *(up+5);
   *(vp+6) = *(up+3);
   *(vp+7) = *(up+7);   
}


/*------------------------------------------------------------*/
/* Function vec_dct_16 (vector)                               */
/*                                                            */
/* One-dimensional DCT of a vector of size 16 pixel.          */
/* The output data (= the transform coefficients) replaces    */
/* the input data after the function call, i.e. input data    */
/* is overwritten.                                            */
/* vector  : 1-dimensional data array of length 16, used for  */
/*           input and output                                 */
/*------------------------------------------------------------*/

vec_dct_16 (vector)
  float *vector;
{
   float  tmp[16], upper[8], uptmp[8], lotmp[8], lower[8], y[4], z[4];
   float  *vp, *tp, *up, *utp, *lp, *ltp, *yp, *zp;
   
   vp = vector;
   tp = tmp;
   up = upper;
   lp = lower;
   utp = uptmp;
   ltp = lotmp;
   yp = y;
   zp = z;
   
/*---- Input butterfly ----------*/
   *tp      = *vp     + *(vp+15);
   *(tp+ 1) = *(vp+1) + *(vp+14);
   *(tp+ 2) = *(vp+2) + *(vp+13);
   *(tp+ 3) = *(vp+3) + *(vp+12);
   *(tp+ 4) = *(vp+4) + *(vp+11);
   *(tp+ 5) = *(vp+5) + *(vp+10);
   *(tp+ 6) = *(vp+6) + *(vp+ 9);
   *(tp+ 7) = *(vp+7) + *(vp+ 8);   
   *(tp+ 8) = *(vp+7) - *(vp+ 8);
   *(tp+ 9) = *(vp+6) - *(vp+ 9);
   *(tp+10) = *(vp+5) - *(vp+10);
   *(tp+11) = *(vp+4) - *(vp+11);
   *(tp+12) = *(vp+3) - *(vp+12);
   *(tp+13) = *(vp+2) - *(vp+13);
   *(tp+14) = *(vp+1) - *(vp+14);   
   *(tp+15) = *vp     - *(vp+15);

/*---------------------------------------------------------*/
/*---- Transformation of the upper half = elements 0-7 ----*/
/*---------------------------------------------------------*/
   *up     = *tp     + *(tp+7);
   *(up+1) = *(tp+1) + *(tp+6);
   *(up+2) = *(tp+2) + *(tp+5);
   *(up+3) = *(tp+3) + *(tp+4);
   *(up+4) = *(tp+3) - *(tp+4);
   *(up+5) = *(tp+2) - *(tp+5);
   *(up+6) = *(tp+1) - *(tp+6);
   *(up+7) = *tp     - *(tp+7);   

   *utp     = c_01_p_04 * (*up + *(up+1) + *(up+2) + *(up+3));
   *(utp+1) = c_01_p_04 * (*up - *(up+1) - *(up+2) + *(up+3));
   *(utp+2) = s_01_p_08 * (*(up+1) - *(up+2)) + c_01_p_08 * (*up - *(up+3));
   *(utp+3) = c_03_p_08 * (*up - *(up+3)) + s_03_p_08 * (*(up+2) - *(up+1));
   *(utp+4) = s_01_p_16 * (*(up+4) + c_01_pi_04 * (*(up+6) - *(up+5)))
            + c_01_p_16 * (*(up+7) + c_01_pi_04 * (*(up+5) + *(up+6)));
   *(utp+5) = s_05_p_16 * (*(up+4) - c_01_pi_04 * (*(up+6) - *(up+5)))
            + c_05_p_16 * (*(up+7) - c_01_pi_04 * (*(up+5) + *(up+6)));
   *(utp+6) = c_03_p_16 * (*(up+7) - c_01_pi_04 * (*(up+5) + *(up+6)))
            - s_03_p_16 * (*(up+4) - c_01_pi_04 * (*(up+6) - *(up+5)));
   *(utp+7) = c_07_p_16 * (*(up+7) + c_01_pi_04 * (*(up+5) + *(up+6)))
            - s_07_p_16 * (*(up+4) + c_01_pi_04 * (*(up+6) - *(up+5)));

   *up     = *utp;
   *(up+1) = *(utp+4);
   *(up+2) = *(utp+2);
   *(up+3) = *(utp+6);
   *(up+4) = *(utp+1);
   *(up+5) = *(utp+5);
   *(up+6) = *(utp+3);
   *(up+7) = *(utp+7);   

/*----------------------------------------------------------*/
/*---- Transformation of the lower half = elements 8-15 ----*/   
/*----------------------------------------------------------*/
   tp = tmp + 8 ;
      
   *yp     = c_01_pi_04 * (*(tp+5) - *(tp+2));
   *(yp+1) = c_01_pi_04 * (*(tp+4) - *(tp+3));
   *(yp+2) = c_01_pi_04 * (*(tp+3) + *(tp+4));
   *(yp+3) = c_01_pi_04 * (*(tp+5) + *(tp+2)); 

   *zp     = s_01_pi_08 * (*(tp+6) + *(yp+3)) - c_01_pi_08 * (*(tp+1) + *yp);
   *(zp+1) = s_01_pi_08 * (*(tp+1) - *yp)     + c_01_pi_08 * (*(tp+6) - *(yp+3));
   *(zp+2) = c_03_pi_08 * (*(tp+6) - *(yp+3)) - s_03_pi_08 * (*(tp+1) - *yp);
   *(zp+3) = c_03_pi_08 * (*(tp+1) + *yp)     + s_03_pi_08 * (*(tp+6) + *(yp+3));
                                                                             
   *lp     = s_01_p_32 * (*tp     + *(yp+1) + *zp)     
           + c_01_p_32 * (*(tp+7) + *(yp+2) + *(zp+3));
   *(lp+1) = s_09_p_32 * (*tp     + *(yp+1) - *zp)     
           + c_09_p_32 * (*(tp+7) + *(yp+2) - *(zp+3));
   *(lp+2) = s_05_p_32 * (*tp     - *(yp+1) + *(zp+1)) 
           + c_05_p_32 * (*(tp+7) - *(yp+2) - *(zp+2));
   *(lp+3) = s_13_p_32 * (*tp     - *(yp+1) - *(zp+1))  
           + c_13_p_32 * (*(tp+7) - *(yp+2) + *(zp+2));
   *(lp+4) = c_03_p_32 * (*(tp+7) - *(yp+2) + *(zp+2)) 
           + s_03_p_32 * (*(yp+1) + *(zp+1) - *tp);
   *(lp+5) = c_11_p_32 * (*(tp+7) - *(yp+2) - *(zp+2)) 
           - s_11_p_32 * (*tp     - *(yp+1) + *(zp+1));
   *(lp+6) = c_07_p_32 * (*(tp+7) + *(yp+2) - *(zp+3)) 
           - s_07_p_32 * (*tp     + *(yp+1) - *zp);
   *(lp+7) = c_15_p_32 * (*(tp+7) + *(yp+2) + *(zp+3)) 
           - s_15_p_32 * (*tp     + *(yp+1) + *zp);

/*---- Output butterfly ----------*/
   *vp      = *up;
   *(vp+ 1) = *lp;
   *(vp+ 2) = *(up+4);   
   *(vp+ 3) = *(lp+4);
   *(vp+ 4) = *(up+2);
   *(vp+ 5) = *(lp+2);
   *(vp+ 6) = *(up+6);   
   *(vp+ 7) = *(lp+6);
   *(vp+ 8) = *(up+1);
   *(vp+ 9) = *(lp+1);
   *(vp+10) = *(up+5);   
   *(vp+11) = *(lp+5);
   *(vp+12) = *(up+3);
   *(vp+13) = *(lp+3);
   *(vp+14) = *(up+7);   
   *(vp+15) = *(lp+7);
}
/* -library_code_end */
