#include <RpImageArea.h>
#include <RpImageTile.h>
#include <RpConvolver.h>
#include <assert.h>

RpConvolver::RpConvolver( RpSampledImage *pSource, RpImageTile *pKernel )
{ 
   if ( pKernel && pSource )
   {
      if ( precedence( pSource->getType() ) > precedence( RpFilm ))
	 setType( RpFloat );
      else
         setType( RpFilm );
         
      setArea( pSource->getArea() );
      
      _pSource = (RpInputImage *) pSource;
      _pSource->registerReference();
         
      _pKernel = pKernel;   
      _pKernel->registerReference();
      	       
   } else {
   
      RpImageArea	area;
      area.x = 0;
      area.y = 0;
      area.z = 0;
      area.c = 0;
      area.width = 0;
      area.height = 0;
      
      _pSource = 0;
      _pKernel = 0;
      setArea( area );
      setType( RpFilm );

   }
   
   return;
}


RpConvolver::RpConvolver( RpInputImage *pSource, const RpImageArea& area, RpImageTile *pKernel )
{
   setArea( area );

   if ( pKernel && pSource )
   {
      if ( precedence( pSource->getType() ) > precedence( RpFilm ))
	 setType( RpFloat );
      else
         setType( RpFilm );
      
      _pSource = pSource;
      _pSource->registerReference();
         
      _pKernel = pKernel;   
      _pKernel->registerReference();
      	       
   } else {
      setType( RpFilm );
      _pSource = 0;
      _pKernel = 0;

   }
   
   return;
}



RpConvolver::~RpConvolver( void )
{
   if ( _pSource )
     _pSource->unregisterReference();
     
   if( _pKernel )
      _pKernel->unregisterReference();
      
   return;
}



float
RpConvolver::getCostEstimate( const RpImageArea& area )
{
   float	rRet = 0.0;
 
   if ( _pKernel && _pSource )
   {  
     rRet = (float) (area.width * area.height);
     rRet *= (float) (_pKernel->getArea().width * _pKernel->getArea().height);
     rRet *= 1.25;
   }
   
   return( rRet );
}



int
RpConvolver::fillTile( RpImageTile *pWriteHere )
{
   RpImageArea	reqArea;
   RpImageTile	*pTile;
   int		iRet = 0;
   
   if ( _pSource && _pKernel && pWriteHere )
   {
      setSourceArea( reqArea, _pKernel->getArea(), pWriteHere->getArea() );
      
      pTile = _pSource->newTile( reqArea );
      if ( pTile )
      {
         pTile->typecast( RpFloat );
         _pKernel->typecast( RpFloat );
         
         assert( pWriteHere->getType() == getType() );
         assert(( getType() == RpFilm )||( getType() == RpFloat ));
         
         if ( getType() == RpFloat )
            doConvFloat( pWriteHere, pTile );
         else
            doConvFilm( pWriteHere, pTile );
            
         pTile->deleteTile();
         applyFillStrategy( pWriteHere );

      } else
         iRet = -1;
      
   } else
      iRet = -1;
   
   return( iRet );   
}


void
RpConvolver::setSourceArea( RpImageArea& sourceArea, const RpImageArea& kernelArea, const RpImageArea& destArea )
{
   sourceArea.c = destArea.c;
   sourceArea.z = destArea.z;

   sourceArea.x = destArea.x + kernelArea.x;
   sourceArea.y = destArea.y + kernelArea.y;

   sourceArea.width = 
      destArea.x + 
      (signed) destArea.width + 
      kernelArea.x + 
      (signed) kernelArea.width -
      sourceArea.x;

   sourceArea.height =
      destArea.y + 
      (signed) destArea.height + 
      kernelArea.y + 
      (signed) kernelArea.height - 
      sourceArea.y;

   return;
}




//
// this is a serious processing bottleneck,
// so the code has been unwrapped in places and
// calculations have been optimized to eliminate 
// function calls.
//
void
RpConvolver::doConvFloat( RpImageTile *pWriteHere, const RpImageTile *pSource )
{
   register float		rRegister;
   register const float		*pSrc;
   register const float		*pKernel;
   float			*pDest;

   unsigned			iLoopImageY;
   unsigned			iLoopImageX;
   unsigned			iLoopKernelY;
   register unsigned		iLoopKernelX;

   assert( pWriteHere && pSource && _pKernel );
   
   assert( _pKernel->getBuffer() == _pKernel->getBufferAt( _pKernel->getArea().x, _pKernel->getArea().y ) );
   pKernel = (const float *) _pKernel->getBuffer();

   assert( pSource->getBuffer() == pSource->getBufferAt( pSource->getArea().x, pSource->getArea().y ) );
   pSrc = ( const float * ) pSource->getBuffer();

   assert( pWriteHere->getBuffer() == pWriteHere->getBufferAt( pWriteHere->getArea().x, pWriteHere->getArea().y ) );
   pDest = (float *) pWriteHere->getBuffer();

   for( iLoopImageY = pWriteHere->getArea().height; iLoopImageY; iLoopImageY-- )
   {
                
      for ( iLoopImageX = pWriteHere->getArea().width; iLoopImageX; iLoopImageX-- )
      {
	 rRegister = 0.0;

	 for( iLoopKernelY = _pKernel->getArea().height; iLoopKernelY; iLoopKernelY-- )
	 {

              for( iLoopKernelX = _pKernel->getArea().width; iLoopKernelX; iLoopKernelX-- )
              {
        	 rRegister += (*pSrc) * (*pKernel);
        	 pSrc++;
        	 pKernel++;
              }

              pSrc -= _pKernel->getArea().width;
              pSrc += pSource->getArea().width;
	 }
         
         //
         // position source pointer to beginning of next conv. calculation
         //
	 pSrc -= ( pSource->getArea().width ) * _pKernel->getArea().height;
	 pSrc++;
	 
	 //
	 // reset kernel pointer to the beginning
	 //
	 pKernel -= ( _pKernel->getArea().width * _pKernel->getArea().height );
	 
	 //
	 // record pixel value
	 //
	 *pDest = rRegister;
	 pDest++;

      }
      
      pSrc -= pWriteHere->getArea().width;
      pSrc += pSource->getArea().width;
      
   }
   return;
}



//
// this is a serious processing bottleneck,
// so the code has been unwrapped in places and
// calculations have been optimized to eliminate 
// function calls.
//
void
RpConvolver::doConvFilm( RpImageTile *pWriteHere, const RpImageTile *pSource )
{
   register float		rRegister;
   register const float		*pSrc;
   register const float		*pKernel;
   unsigned short		*pDest;

   unsigned			iLoopImageY;
   unsigned			iLoopImageX;
   unsigned			iLoopKernelY;
   register unsigned		iLoopKernelX;

   assert( pWriteHere && pSource && _pKernel );
   
   assert( _pKernel->getBuffer() == _pKernel->getBufferAt( _pKernel->getArea().x, _pKernel->getArea().y ) );
   pKernel = (const float *) _pKernel->getBuffer();

   assert( pSource->getBuffer() == pSource->getBufferAt( pSource->getArea().x, pSource->getArea().y ) );
   pSrc = ( const float * ) pSource->getBuffer();

   assert( pWriteHere->getBuffer() == pWriteHere->getBufferAt( pWriteHere->getArea().x, pWriteHere->getArea().y ) );
   pDest = (unsigned short *) pWriteHere->getBuffer();

   for( iLoopImageY = pWriteHere->getArea().height; iLoopImageY; iLoopImageY-- )
   {
                
      for ( iLoopImageX = pWriteHere->getArea().width; iLoopImageX; iLoopImageX-- )
      {
	 rRegister = 0.0;

	 for( iLoopKernelY = _pKernel->getArea().height; iLoopKernelY; iLoopKernelY-- )
	 {

              for( iLoopKernelX = _pKernel->getArea().width; iLoopKernelX; iLoopKernelX-- )
              {
        	 rRegister += (*pSrc) * (*pKernel);
        	 pSrc++;
        	 pKernel++;
              }

              pSrc -= _pKernel->getArea().width;
              pSrc += pSource->getArea().width;
	 }
         
         //
         // position source pointer to beginning of next conv. calculation
         //
	 pSrc -= ( pSource->getArea().width  * _pKernel->getArea().height );
	 pSrc++;
	 
	 //
	 // reset kernel pointer to the beginning
	 //
	 pKernel -= ( _pKernel->getArea().width * _pKernel->getArea().height );
	 
	 //
	 // record pixel value
	 //
	 rRegister *= ( RpFilmWhite - RpFilmBlack );
	 rRegister += RpFilmBlack;
	 rRegister = ( rRegister > RpFilmMax ) ? ( RpFilmMax ) : ( rRegister );
	 *pDest = (unsigned short)((rRegister < RpFilmMin ) ? ( RpFilmMin ) : ( rRegister ));
	 pDest++;

      }
      
      pSrc -= pWriteHere->getArea().width;
      pSrc += pSource->getArea().width;
      
   }
   return;
}

// ------------------------------------------------------------------------

RpImageTile *
RpConvolver::newKernel( const float *fpSource, unsigned uWidth, unsigned uHeight )
{
   RpImageTile 		*p;
   RpImageArea		area;
   float		*pPixel;
   unsigned		uLoop;
   
   area.x = -( uWidth >> 1 );
   area.y = -( uHeight >> 1 );
   area.width = uWidth;
   area.height = uHeight;
   area.z = area.c = 0;
   
   p = RpImageTile::newTile( area, RpFloat );
   assert( p );

   pPixel = (float *) p->getBuffer();
   uLoop = uWidth * uHeight;
   while( uLoop )
   {
      --uLoop;
      *pPixel = *fpSource;
      ++fpSource;
      ++pPixel;
   }
   
   return( p );   
}
