/*
 *----------------------------------------------------------------------
 *
 * Copyright 1991, University of New Mexico.  All rights reserved.
 * Permission to copy and modify this software and its documen-
 * tation only for internal use in your organization is hereby
 * granted, provided that this notice is retained thereon and
 * on all copies.  UNM makes no representations as to the sui-
 * tability and operability of this software for any purpose.
 * It is provided "as is" without express or implied warranty.
 * 
 * UNM DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FIT-
 * NESS.  IN NO EVENT SHALL UNM BE LIABLE FOR ANY SPECIAL,
 * INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY OTHER DAMAGES WHAT-
 * SOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
 * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PER-
 * FORMANCE OF THIS SOFTWARE.
 * 
 * No other rights, including, for example, the right to redis-
 * tribute this software and its documentation or the right to
 * prepare derivative works, are granted unless specifically
 * provided in a separate license agreement.
 *---------------------------------------------------------------------
 */

#include "unmcopyright.h"        /* Copyright 1991 by UNM */

/*>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>  <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
 >>>>
 >>>>         File Name: lidct2d.c
 >>>>
 >>>>      Program Name: idct2d
 >>>>
 >>>> Date Last Updated: Mon Apr 22 21:53:34 1991 
 >>>>
 >>>>          Routines: lidct2d - the library call for idct2d
 >>>>
 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>   <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<*/


#include "vinclude.h"


/* -library_includes */
#define PI 3.1415927
     float  con, p04, p08, p16, p32; 
     float  c_01_pi_04, c_01_p_04, c_01_p_08, c_03_p_08, c_01_p_16, c_03_p_16, c_05_p_16;
     float  c_07_p_16, s_01_p_08, s_03_p_08, s_01_p_16, s_03_p_16, s_05_p_16, s_07_p_16;
     float  c_01_pi_08, c_03_pi_08, c_01_p_32, c_03_p_32, c_05_p_32, c_07_p_32, c_09_p_32;
     float  c_11_p_32, c_13_p_32, c_15_p_32, s_01_pi_08, s_03_pi_08, s_01_p_32, s_03_p_32;
     float  s_05_p_32, s_07_p_32, s_09_p_32, s_11_p_32, s_13_p_32, s_15_p_32;
/* -library_includes_end */


/****************************************************************
*
* Routine Name: lidct2d - library call for idct2d
*
* Purpose:
*    
*    Compute blockwise two-dimensional  INVERSE  Discrete  Cosine
*    Transform (IDCT) of an image
*    
*    
* Input:
*    
*    image          pointer to xvimage structure to be processed
*    
*    block_size     block-size to be used for  the  transform,  either
*                   8x8 or 16x16
*    
*    round          round the reconstructed image pixel  values  to  8
*                   bit and limit the range to [0..255]
*    
*    
* Output:
*    
*    image          holds the result of the Discrete Cosine  Transform
*                   operation.  The  output  data  is  of  type float,
*                   regardless of the input data type.
*    
*    Return Value:  1 on success, 0 on failure.
*    
*    
*
* Written By: Michael Gilge, gilge@icsi.berkeley.edu

        Copyright 1991, Michael Gilge. All rights reserved.
        Distribution and use permitted as provided in the 
        KHOROS copyright as long as this notice is attached
        to the code and its derivatives.
****************************************************************/


/* -library_def */
int lidct2d(image, block_size, round)
struct xvimage *image;
int block_size, round;
/* -library_def_end */

/* -library_code */
{
    int i, j, dimx, dimy;      /* Indexes and sizes of rows and columns */
    int blockx, blocky;        /* Number of block-rows and -columns */
    float *in, *in_ptr;        /* Pointer to float input array */
    unsigned char *cptr;
    short         *sptr;
    int           *iptr;
    float         *fptr;
    unsigned char *b,*bp;
    
    dimx = image->row_size;            /* Number of columns */
    dimy = image->col_size;            /* Number of rows */

    /* Make sure size is legal for DCT */
    if (((dimx % block_size) != 0) || ((dimy % block_size) != 0))
    {  fprintf(stderr,"lidct2d: Input image size must be integer multiple of block-size\n");
       return(0);
    }

    blockx = dimx / block_size;
    blocky = dimy / block_size;

    /* Get space for the input float array */
    in = (float *)malloc(dimx*dimy*sizeof(float));
    if (in == NULL)
    {  
       fprintf(stderr,"lidct2d: Could not allocate enough memory!\n");
       return(0);
    }

    init_factors(block_size);

    in_ptr = in;
    cptr = (unsigned char *)(image->imagedata);
    sptr = (short *)(image->imagedata);
    iptr = (int *)(image->imagedata);
    fptr = (float *)(image->imagedata);

    switch (image->data_storage_type)
    {
       case VFF_TYP_1_BYTE :
          for (j=0; j<dimx*dimy; j++) /* Transfer to float array */
              *in_ptr++ = (float)(*cptr++); 
          break;
       case VFF_TYP_2_BYTE :
          for (j=0; j<dimx*dimy; j++) /* Transfer to float array */
              *in_ptr++ = (float)(*sptr++); 
          break;
       case VFF_TYP_4_BYTE :
          for (j=0; j<dimx*dimy; j++) /* Transfer to float array */
              *in_ptr++ = (float)(*iptr++); 
          break;
       case VFF_TYP_FLOAT :
          for (j=0; j<dimx*dimy; j++) /* Transfer to float array */
              *in_ptr++ = (*fptr++);
          break;
       default :
          fprintf(stderr,"lidct2d: Unsupported data storage type\n");
          free(in);
          return(0);
          break;
    }

    free(image->imagedata);      /* Give back input image space */

    idct_1d(in, block_size, blockx, dimy, 0); /* inverse DCT over rows */
    idct_1d(in, block_size, blocky, dimx, 1); /* inverse DCT over columns */

    image->data_storage_type = VFF_TYP_FLOAT;
    image->imagedata = (char * )in;

    if (round) 
      {
        b = (unsigned char *)malloc(dimx*dimy*sizeof(unsigned char));
        if (b == NULL)
          {
            (void)fprintf(stderr,"lidct2d: Unable to malloc temp array\n");
            free(in);
            return(0);
          }
        in_ptr = in;
        bp = b;
        for (j=0; j<dimx*dimy; j++) *bp++ = round_byte(*in_ptr++);
        free(in);
        image->data_storage_type = VFF_TYP_1_BYTE;
        image->imagedata = (char *)b;
      }

    return(1);
}


static
init_factors (block_size)
  int block_size;
{        
   con = sqrt(2.0 / block_size);

   p04 = PI / 4;
   p08 = PI / 8;
   p16 = PI / 16;
   p32 = PI / 32;
   
   c_01_pi_04 = cos(p04);

   c_01_p_04  = con*cos(p04); 
   c_01_p_08  = con*cos(p08);
   c_03_p_08  = con*cos(3*p08);
   c_01_p_16  = con*cos(p16);
   c_03_p_16  = con*cos(3*p16);
   c_05_p_16  = con*cos(5*p16);
   c_07_p_16  = con*cos(7*p16);
   s_01_p_08  = con*sin(p08);
   s_03_p_08  = con*sin(3*p08);
   s_01_p_16  = con*sin(p16);
   s_03_p_16  = con*sin(3*p16);
   s_05_p_16  = con*sin(5*p16);
   s_07_p_16  = con*sin(7*p16);

   if (block_size == 16
          )
   {  
      c_01_pi_08 = cos(p08);
      c_03_pi_08 = cos(3*p08);
      s_01_pi_08 = sin(p08);
      s_03_pi_08 = sin(3*p08);

      c_01_p_32  = con*cos(p32);
      c_03_p_32  = con*cos(3*p32);
      c_05_p_32  = con*cos(5*p32);
      c_07_p_32  = con*cos(7*p32);
      c_09_p_32  = con*cos(9*p32);
      c_11_p_32  = con*cos(11*p32);
      c_13_p_32  = con*cos(13*p32);
      c_15_p_32  = con*cos(15*p32);
      s_01_p_32  = con*sin(p32);
      s_03_p_32  = con*sin(3*p32);
      s_05_p_32  = con*sin(5*p32);
      s_07_p_32  = con*sin(7*p32);
      s_09_p_32  = con*sin(9*p32);
      s_11_p_32  = con*sin(11*p32);
      s_13_p_32  = con*sin(13*p32);
      s_15_p_32  = con*sin(15*p32);
   }
}


round_byte(y)
 float y;
{
   if (y <= 0)    return (0);
   if (y >= 255)  return (255);       
                  return (y+0.5);
}


/*------------------------------------------------------------*/
/* Function idct_1d (in, block_size, block_nr, dim_in, dir)   */
/*                                                            */
/* One-dimensional INVERSE DCT of complete image              */
/* Depending on direction parameter, horizontally or verti-   */
/* cally oriented blocks of size 8x1 are transformed          */
/* in        : Pointer to input image array                   */
/* block_size: block size: 8 or 16                            */
/* block_nr  : Number of blocks                               */
/* dim_in    : Dimension of input data                        */
/* dir       : direction of operation: =0:rows / =1:columns   */
/*------------------------------------------------------------*/

idct_1d (in, blsz, block_nr, dim_in, dir)
 float *in;
 int blsz, block_nr, dim_in, dir;
{
   float vec[16], *vp, *ip;
   int i, k, m;

   ip = in;

   for (i = 0; i < dim_in; i++)
   {   
       if (dir == 1) ip = in + i;

       for (k = 0; k < block_nr; k++)
       {
          if (dir == 0)          /* copy horizontal vectors */          
             for (vp = vec, m = 0; m < blsz; m++) *vp++ = *(ip+m);
          else                   /* copy vertical vectors */          
             for (vp = vec, m = 0; m < blsz; m++) *vp++ = *(ip+(m*dim_in));

           if (blsz == 8) vec_idct_8 (vec);   /* transform block */            
           else vec_idct_16 (vec);  /* transform block */            
       
          if (dir == 0)          /* copy horizontal vectors */          
             for (vp = vec, m = 0; m < blsz; m++) *ip++ = *vp++ ;
          else                   /* copy vertical vectors */          
             for (vp = vec, m = 0; m < blsz; m++)
             {   
                 *ip = *vp++;
                 ip += dim_in;
             }
       }
   }
}


/*------------------------------------------------------------*/
/* Function vec_idct_8 (vector)                               */
/*                                                            */
/* One-dimensional INVERSE DCT of a vector of size 8 pixel.   */
/* The output data (= the transform coefficients) replaces    */
/* the input data after the function call, i.e. input data    */
/* is overwritten.                                            */
/* vector  : 1-dimensional data array of length 8, used for   */
/*           input and output                                 */
/*------------------------------------------------------------*/

vec_idct_8 (vector)
float *vector;
{
   float  tmp1[8], tmp2[8];
   float  *vp, *tp, *up;
   
   vp = vector;
   tp = tmp1;
   up = tmp2;

/*---- Input butterfly ----------*/
   *tp     = *vp;
   *(tp+1) = *(vp+4);
   *(tp+2) = *(vp+2);
   *(tp+3) = *(vp+6);
   *(tp+4) = *(vp+1);
   *(tp+5) = *(vp+5);
   *(tp+6) = *(vp+3);
   *(tp+7) = *(vp+7);   
                        
/*---- Transformation -----------*/
   *up     = c_01_p_04 * (*tp + *(tp+1)) + c_01_p_08 * (*(tp+2)) + c_03_p_08 * (*(tp+3));
   *(up+1) = c_01_p_04 * (*tp - *(tp+1)) + s_01_p_08 * (*(tp+2)) - s_03_p_08 * (*(tp+3));
   *(up+2) = c_01_p_04 * (*tp - *(tp+1)) - s_01_p_08 * (*(tp+2)) + s_03_p_08 * (*(tp+3));
   *(up+3) = c_01_p_04 * (*tp + *(tp+1)) - c_01_p_08 * (*(tp+2)) - c_03_p_08 * (*(tp+3));
   *(up+4) = s_01_p_16 * (*(tp+4)) - s_07_p_16 * (*(tp+7)) 
           + s_05_p_16 * (*(tp+5)) - s_03_p_16 * (*(tp+6));     
   *(up+5) = c_01_pi_04 * ( ( *(tp+4) * (c_01_p_16 - s_01_p_16) ) 
                          + ( *(tp+5) * (s_05_p_16 - c_05_p_16) ) 
                          - ( *(tp+6) * (c_03_p_16 + s_03_p_16) ) 
                          + ( *(tp+7) * (s_07_p_16 + c_07_p_16) ) );
   *(up+6) = c_01_pi_04 * ( ( *(tp+4) * (c_01_p_16 + s_01_p_16) ) 
                          - ( *(tp+5) * (s_05_p_16 + c_05_p_16) ) 
                          - ( *(tp+6) * (c_03_p_16 - s_03_p_16) ) 
                          - ( *(tp+7) * (s_07_p_16 - c_07_p_16) ) );
   *(up+7) = c_01_p_16 * (*(tp+4)) + c_07_p_16 * (*(tp+7)) 
           + c_05_p_16 * (*(tp+5)) + c_03_p_16 * (*(tp+6));     

/*---- Output butterfly ----------*/
   *vp     = *up     + *(up+7);
   *(vp+1) = *(up+1) + *(up+6);
   *(vp+2) = *(up+2) + *(up+5);
   *(vp+3) = *(up+3) + *(up+4);
   *(vp+4) = *(up+3) - *(up+4);
   *(vp+5) = *(up+2) - *(up+5);
   *(vp+6) = *(up+1) - *(up+6);
   *(vp+7) = *up     - *(up+7);   
}


/*------------------------------------------------------------*/
/* Function vec_idct_16 (vector)                              */
/*                                                            */
/* One-dimensional INVERSE DCT of a vector of size 16 pixel.  */
/* The output data (= the transform coefficients) replaces    */
/* the input data after the function call, i.e. input data    */
/* is overwritten.                                            */
/* vector  : 1-dimensional data array of length 16, used for  */
/*           input and output                                 */
/*------------------------------------------------------------*/

vec_idct_16 (vector)
  float *vector;
{
   float  tmp[16], upper[8], uptmp[8], lotmp[8], lower[8];
   float  *vp, *tp, *up, *utp, *lp, *ltp;
   
   vp = vector;
   tp = tmp;
   up = upper;
   lp = lower;
   utp = uptmp;
   ltp = lotmp;
   
/*---- Input butterfly ----------*/
   *tp      = *vp;
   *(tp+ 1) = *(vp+ 8);
   *(tp+ 2) = *(vp+ 4);   
   *(tp+ 3) = *(vp+12);
   *(tp+ 4) = *(vp+ 2);
   *(tp+ 5) = *(vp+10);
   *(tp+ 6) = *(vp+ 6);   
   *(tp+ 7) = *(vp+14);
   *(tp+ 8) = *(vp+ 1);
   *(tp+ 9) = *(vp+ 9);
   *(tp+10) = *(vp+ 5);   
   *(tp+11) = *(vp+13);
   *(tp+12) = *(vp+ 3);
   *(tp+13) = *(vp+11);
   *(tp+14) = *(vp+ 7);   
   *(tp+15) = *(vp+15);

/*---------------------------------------------------------*/
/*---- Transformation of the upper half = elements 0-7 ----*/
/*---------------------------------------------------------*/
   *up     = *tp;
   *(up+1) = *(tp+4);
   *(up+2) = *(tp+2);
   *(up+3) = *(tp+6);
   *(up+4) = *(tp+1);
   *(up+5) = *(tp+5);
   *(up+6) = *(tp+3);
   *(up+7) = *(tp+7);   
                        
   *utp     = c_01_p_04 * (*up + *(up+1)) + c_01_p_08 * (*(up+2)) + c_03_p_08 * (*(up+3));
   *(utp+1) = c_01_p_04 * (*up - *(up+1)) + s_01_p_08 * (*(up+2)) - s_03_p_08 * (*(up+3));
   *(utp+2) = c_01_p_04 * (*up - *(up+1)) - s_01_p_08 * (*(up+2)) + s_03_p_08 * (*(up+3));
   *(utp+3) = c_01_p_04 * (*up + *(up+1)) - c_01_p_08 * (*(up+2)) - c_03_p_08 * (*(up+3));
   *(utp+4) = s_01_p_16 * (*(up+4)) - s_07_p_16 * (*(up+7)) 
            + s_05_p_16 * (*(up+5)) - s_03_p_16 * (*(up+6));     
   *(utp+5) = c_01_pi_04 * ( (*(up+4) * (c_01_p_16 - s_01_p_16))
                           + (*(up+5) * (s_05_p_16 - c_05_p_16))
                           - (*(up+6) * (c_03_p_16 + s_03_p_16))
                           + (*(up+7) * (s_07_p_16 + c_07_p_16)) );
   *(utp+6) = c_01_pi_04 * ( (*(up+4) * (c_01_p_16 + s_01_p_16))
                           - (*(up+5) * (s_05_p_16 + c_05_p_16))
                           - (*(up+6) * (c_03_p_16 - s_03_p_16))
                           - (*(up+7) * (s_07_p_16 - c_07_p_16)) );
   *(utp+7) = c_01_p_16 * (*(up+4)) + c_07_p_16 * (*(up+7)) 
            + c_05_p_16 * (*(up+5)) + c_03_p_16 * (*(up+6));     

   *up     = *utp     + *(utp+7);
   *(up+1) = *(utp+1) + *(utp+6);
   *(up+2) = *(utp+2) + *(utp+5);
   *(up+3) = *(utp+3) + *(utp+4);
   *(up+4) = *(utp+3) - *(utp+4);
   *(up+5) = *(utp+2) - *(utp+5);
   *(up+6) = *(utp+1) - *(utp+6);
   *(up+7) = *utp     - *(utp+7);   

/*----------------------------------------------------------*/
/*---- Transformation of the lower half = elements 8-15 ----*/   
/*----------------------------------------------------------*/
   tp = tmp + 8;
      
   *ltp     = s_01_p_32 * (*tp)     - s_15_p_32 * (*(tp+7));
   *(ltp+1) = s_09_p_32 * (*(tp+1)) - s_07_p_32 * (*(tp+6));
   *(ltp+2) = s_05_p_32 * (*(tp+2)) - s_11_p_32 * (*(tp+5));
   *(ltp+3) = s_13_p_32 * (*(tp+3)) - s_03_p_32 * (*(tp+4));
   *(ltp+4) = c_03_p_32 * (*(tp+4)) + c_13_p_32 * (*(tp+3));
   *(ltp+5) = c_11_p_32 * (*(tp+5)) + c_05_p_32 * (*(tp+2));
   *(ltp+6) = c_07_p_32 * (*(tp+6)) + c_09_p_32 * (*(tp+1));
   *(ltp+7) = c_15_p_32 * (*(tp+7)) + c_01_p_32 * (*tp);
                                                                   
   *lp     = *ltp + *(ltp+1) + *(ltp+2) + *(ltp +3);
   *(lp+1) = c_03_pi_08 * (*(ltp+7) - *(ltp+6)) + c_01_pi_08 * (*(ltp+1) - *ltp)
           + s_03_pi_08 * (*(ltp+5) - *(ltp+4)) + s_01_pi_08 * (*(ltp+2) - *(ltp+3));
   *(lp+2) = c_01_pi_04 * 
           ( s_01_pi_08 * (*ltp     - *(ltp+1)) + s_03_pi_08 * (*(ltp+7) - *(ltp+6))
           + c_01_pi_08 * (*(ltp+3) - *(ltp+2)) + c_03_pi_08 * (*(ltp+5) - *(ltp+4))      
           + c_01_pi_08 * (*ltp     - *(ltp+1)) + c_03_pi_08 * (*(ltp+6) - *(ltp+7))
           + s_01_pi_08 * (*(ltp+2) - *(ltp+3)) + s_03_pi_08 * (*(ltp+5) - *(ltp+4)) );
   *(lp+3) = c_01_pi_04 * ( *(ltp+7) + *(ltp+6) - *(ltp+5) - *(ltp+4) 
                          + *(ltp+3) + *(ltp+2) - *(ltp+1) - *ltp );
   *(lp+4) = c_01_pi_04 * ( *(ltp+7) + *(ltp+6) - *(ltp+5) - *(ltp+4) 
                          - *(ltp+3) - *(ltp+2) + *(ltp+1) + *ltp );                           
   *(lp+5) = c_01_pi_04 * 
           ( c_01_pi_08 * (*(ltp+1) - *ltp    ) + c_03_pi_08 * (*(ltp+7) - *(ltp+6))
           + s_01_pi_08 * (*(ltp+3) - *(ltp+2)) + s_03_pi_08 * (*(ltp+4) - *(ltp+5))      
           + s_01_pi_08 * (*ltp     - *(ltp+1)) + s_03_pi_08 * (*(ltp+7) - *(ltp+6))
           + c_01_pi_08 * (*(ltp+3) - *(ltp+2)) + c_03_pi_08 * (*(ltp+5) - *(ltp+4)) );
   *(lp+6) = c_03_pi_08 * (*(ltp+4) - *(ltp+5)) + c_01_pi_08 * (*(ltp+2) - *(ltp+3))
           + s_03_pi_08 * (*(ltp+7) - *(ltp+6)) + s_01_pi_08 * (*ltp     - *(ltp+1));
   *(lp+7) = *(ltp+4) + *(ltp+5) + *(ltp+6) + *(ltp+7);

/*---- Output butterfly ----------*/
   *vp      = *(lp+7) + *up;
   *(vp+ 1) = *(lp+6) + *(up+1);
   *(vp+ 2) = *(lp+5) + *(up+2);
   *(vp+ 3) = *(lp+4) + *(up+3);
   *(vp+ 4) = *(lp+3) + *(up+4);
   *(vp+ 5) = *(lp+2) + *(up+5);
   *(vp+ 6) = *(lp+1) + *(up+6);
   *(vp+ 7) = *lp     + *(up+7);
   *(vp+ 8) = *(up+7) - *lp;
   *(vp+ 9) = *(up+6) - *(lp+1);
   *(vp+10) = *(up+5) - *(lp+2);
   *(vp+11) = *(up+4) - *(lp+3);
   *(vp+12) = *(up+3) - *(lp+4);
   *(vp+13) = *(up+2) - *(lp+5);
   *(vp+14) = *(up+1) - *(lp+6);
   *(vp+15) = *up     - *(lp+7);
}
/* -library_code_end */
