/*
    libfame - Fast Assembly MPEG Encoder Library
    Copyright (C) 2000-2001 Vivien Chappelier
                            Damien Vincent

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "fame.h"
#include "fame_shape.h"

static void int_init(fame_shape_t *shape,
		     int mb_width,
		     int mb_height,
		     unsigned int flags);
static void int_close(fame_shape_t *shape);
static void int_enter(fame_shape_t *shape,
		      unsigned char *mask,
		      unsigned char *ref,
		      unsigned char alpha_th);
static fame_bab_t int_encode_intra_shape(fame_shape_t *shape,
					 int mb_x,
					 int mb_y,
					 unsigned char **bab,
					 unsigned char *pattern);

FAME_CONSTRUCTOR(fame_shape_t)
{
  FAME_OBJECT(this)->name = "shape coder";
  FAME_SHAPE(this)->init = int_init;
  FAME_SHAPE(this)->close = int_close;
  FAME_SHAPE(this)->enter = int_enter;
  FAME_SHAPE(this)->encode_intra_shape = int_encode_intra_shape;
  return(this);
}

static unsigned char th[256] =
{
  5,  6, 6,  8,  6,  7,  7,  8,  6,  5,  7,  6,  8,  8,  8,  8,
  6,  5, 5,  9,  8,  8,  8,  8,  7,  6,  8,  8,  8,  8,  8,  9,
  6,  7, 5,  8,  7,  9,  8,  8,  7,  8,  8,  8,  8,  8,  8,  9,
  7,  8, 8,  8,  8,  10, 8,  9,  8,  10, 8,  11, 8,  11, 9,  10,
  6,  7, 7,  8,  7,  8,  8,  8,  7,  8,  8,  8,  9,  8,  10, 9,
  7,  8, 8,  8,  8,  7,  8,  9,  8,  10, 8,  9,  8,  11, 9,  10,
  7,  8, 8,  10, 8,  10, 10, 11, 8,  8,  10, 9,  10, 11, 11, 10,
  8,  8, 8,  11, 8,  9,  11, 12, 8,  9,  11, 12, 11, 12, 10, 11,
  6,  7, 7,  9,  7,  6,  8,  8,  5,  8,  8,  8,  6,  8,  10, 9,
  8,  8, 8,  10, 8,  10, 8,  9,  8,  10, 10, 11, 10, 11, 9,  10,
  8,  8, 6,  10, 8,  8,  10, 9,  8,  8,  10, 9,  8,  9,  9,  10,
  9,  8, 10, 11, 8,  13, 9,  10, 10, 11, 11, 14, 11, 12, 10, 11,
  7,  8, 8,  8,  8,  8,  8,  9,  8,  8,  8,  9,  8,  9,  11, 10,
  9,  8, 8,  9,  8,  11, 9,  10, 10, 11, 9,  10, 11, 12, 10, 11,
  8,  8, 8,  11, 10, 11, 11, 10, 8,  11, 9,  12, 11, 12, 14, 11,
  10, 9, 11, 12, 11, 12, 12, 11, 9,  12, 12, 13, 12, 13, 11, 14
};

#define SIZE_BAB16 20
#define SIZE_BAB8 12
#define SIZE_BAB4 8
#define SIZE_DS1DEMI 8
#define SIZE_DS1QUART 4

static unsigned char pbv0[4][4] =
{
  {0, 0, 0, 0},
  {0, 0, 0, 0},
  {0, 0, 0, 0},
  {0, 0, 0, 0}
};

#define INTERPOLATION(tableau,i,j,interp,cf,instr1,instr2,instr3,instr4)    \
{									    \
  tmp =									    \
    tableau[i-1][j+1] + tableau[i-1][j] +				    \
    tableau[i][j-1]   + tableau[i+1][j-1] +				    \
    tableau[i+2][j]   + tableau[i+2][j+1] +				    \
    tableau[i+1][j+2] +	tableau[i][j+2] +				    \
    ((tableau[i][j] + tableau[i][j+1] +                                     \
      tableau[i+1][j+1] + tableau[i+1][j])<<1);	                            \
   /* Interpolation of P1 */						    \
   cf =									    \
     (tableau[i-1][j+1]<<0) + (tableau[i-1][j]<<1) +			    \
     (tableau[i][j-1]<<2)   + (tableau[i+1][j-1]<<3) +			    \
     (tableau[i+2][j]<<4)   + (tableau[i+2][j+1]<<5) +			    \
     (tableau[i+1][j+2]<<6) + (tableau[i][j+2]<<7);			    \
   interp = tmp + (tableau[i][j]<<1);					    \
   instr1;								    \
   /* Interpolation of P2 */						    \
   cf = ((cf<<2)&255) +	(tableau[i+1][j+2]<<0) + (tableau[i][j+2]<<1);	    \
   interp = tmp + (tableau[i][j+1]<<1);					    \
   instr2;								    \
   /* Interpolation of P3 */						    \
   cf = ((cf<<2)&255) +	(tableau[i+2][j]<<0) + (tableau[i+2][j+1]<<1);	    \
   interp = tmp + (tableau[i+1][j+1]<<1);				    \
   instr3;								    \
   /* Interpolation of P4 */						    \
   cf = ((cf<<2)&255) + (tableau[i][j-1]<<0) + (tableau[i+1][j-1]<<1);	    \
   interp = tmp + (tableau[i+1][j]<<1);					    \
   instr4;								    \
}

/* Returns the number of different elements between array input1 and input2 */
static unsigned char mean_absolute_binary_error(unsigned char *input1,
						int pitch1,
						unsigned char *input2,
						int pitch2,
						int size)
{
  int j,i;
  unsigned char error = 0;

  for(i=0; i<size; i++)
  {
    for(j=0; j<size; j++)
      error += (input2[j] ^ input1[j]) & 1;
    input1 += pitch1;
    input2 += pitch2;
  }

  return error;
}





static void int_init(fame_shape_t *shape, int mb_width, int mb_height, unsigned int flags)
{
  shape->mb_width = mb_width;
  shape->mb_height = mb_height;
  shape->flags = flags;
}

static void int_close(fame_shape_t *shape)
{
}

static void int_enter(fame_shape_t *shape,
		      unsigned char *input,
		      unsigned char *ref,
		      unsigned char alpha_th)
{
  shape->recon = ref;
  shape->input = input;
  shape->alpha_th = alpha_th;
}

static fame_bab_t int_encode_intra_shape(fame_shape_t *shape,
					 int mb_x, int mb_y,
					 unsigned char **bab,
					 unsigned char *pattern)
{
  /* tmp values or counters */
  int i, j;
  unsigned char tmp;
  unsigned char tmp_downsample[8][8];
  unsigned char *input;
  unsigned char *output;

  /* the interval to point at the next line of the picture (pitch) */
  int pitch, pitch4;
  /* index of the current block in the picture */
  int offset;

  /* the way the bab is coded */
  int is_bab_not_coded, is_bab_all_coded, is_bab_coded;

  /* a modified alpha_th */
  unsigned char m_alpha_th;

  /* Average of 2x2 pixels (s8) and average of 4x4 pixels (s4) */
  /* used for downsampling */
  unsigned char s8, s4;
  /* Reconstructed block (after upsampling) : -1 * 0 1 2 ... 15 * 16 */
  unsigned char tmp_recon8x8[20][20];
  unsigned char tmp_recon4x4[12][12];
  /* filter context for upsampling */
  unsigned char cf;
  /* Interpolation for upsampling */
  unsigned char interp;
  /* error between a bab and the bab from the input picture */
  unsigned char error;

  *bab = NULL;
  m_alpha_th = (shape->alpha_th>>4);
  pitch = (shape->mb_width << 4);
  pitch4 = pitch<<2;
  offset = (mb_y << 4) * pitch + (mb_x << 4);

#define bab16x16 shape->bab16x16
#define bab8x8 shape->bab8x8
#define bab4x4 shape->bab4x4

  /****************************
   * not coded or all coded ? *
   ****************************/
  if(!shape->input) {
    FAME_WARNING("No shape provided! Assuming opaque macroblock\n");
    is_bab_all_coded = 1;
    is_bab_not_coded = 0;
  } else {
    input = shape->input + offset;
    is_bab_not_coded = 1;
    is_bab_all_coded = 1;
    for(i = 0; i < 4; i++) {
      for(j = 0; j < 4; j++) {
	error = mean_absolute_binary_error(input, pitch, &(pbv0[0][0]), 4, 4);
	is_bab_not_coded &= (error>m_alpha_th)?0:1;
	is_bab_all_coded &= (16-error>m_alpha_th)?0:1;
	input += 4;
      }
      input += pitch4 - 16;
    }
  }
  
  if(is_bab_not_coded)
  {
    output = shape->recon + offset;
    for(i = 0; i < 16; i++) {
      memset(output, 0, 16);
      output += pitch;
    }
    *pattern = 0;
    return bab_not_coded;
  }
  
  if(is_bab_all_coded)
  {
    output = shape->recon + offset;
    for(i = 0; i < 16; i++) {
      memset(output, 1, 16);
      output += pitch;
    }
    *pattern = 15;
    return bab_all_coded;
  }

  /***************
   * shape coded *
   ***************/

  /* --- Down-Sampling --- */

  /* Down-sampling : Cr=1 */
  input = shape->input + offset;
  for(i=2; i<18; i++)
  {
    for(j=2; j<18; j++)
    {
      bab16x16[i][j] = (*input)?1:0;
      input ++;
    }
    input += pitch - 16;
  }

  /* Down-sampling : Cr=1/2 */
  input = shape->input + offset;
  for(i=2; i<10; i++)
  {
    for(j=2; j<10; j++)
    {
      s8 = (input[0] >> 2) + (input[pitch+0] >> 2) +
	   (input[1] >> 2) + (input[pitch+1] >> 2);
      bab8x8[i][j] = (s8>=126)?1:0;
      tmp_downsample[i-2][j-2] = s8; /* save to downsample again and faster */ 
      input += 2;
    }
    input += (pitch<<1) - 16;
  }

  /* Down-sampling : Cr=1/4 */
  input = &(tmp_downsample[0][0]);
  for(i=2; i<6; i++)
  {
    for(j=2; j<6; j++)
    {
      s4 = (input[0] >> 2) + (input[SIZE_DS1DEMI+0] >> 2) +
	   (input[1] >> 2) + (input[SIZE_DS1DEMI+1] >> 2);
      bab4x4[i][j] = (s4>=126)?1:0;
      input += 2;
    }
    input += SIZE_DS1DEMI*2 - 8;
  }

  /* --- fetch borders for upsampling (p29 and p34) --- */

  /* To fill the borders, we need the previous reconstructed binary blocks */
  output = shape->recon + offset;
  if(mb_y)
  {
    /* top border exists */
    output -= pitch; 
    output -= pitch;

    /* top left border (->output=(i=-2, j=0)) */
    if(mb_x) {
      /* copy from the previous reconstructed block */
      output -= 2;   
      bab16x16[0][0] = bab8x8[0][0] = bab4x4[0][0] = output[0];
      bab16x16[0][1] = bab8x8[0][1] = bab4x4[0][1] = output[1];
      bab16x16[1][0] = bab8x8[1][0] = bab4x4[1][0] = output[pitch+0];
      bab16x16[1][1] = bab8x8[1][1] = bab4x4[1][1] = output[pitch+1];
      output += 2;
    } else {
      bab16x16[0][0] = bab8x8[0][0] = bab4x4[0][0] = 0;
      bab16x16[0][1] = bab8x8[0][1] = bab4x4[0][1] = 0;
      bab16x16[1][0] = bab8x8[1][0] = bab4x4[1][0] = 0;
      bab16x16[1][1] = bab8x8[1][1] = bab4x4[1][1] = 0;
    }

    /* top border (->output=(i=-2, j=0)) */
    for(j=2; j<18; j++) {
      bab16x16[0][j] = output[0];
      bab16x16[1][j] = output[pitch];
      output++;
    }
    output -= 16;
    for(j=2; j<10; j++) {
      bab8x8[0][j] = output[0] | output[1];
      bab8x8[1][j] = output[pitch+0] | output[pitch+1];
      output+=2;
    }
    output -= 16;
    for(j=2; j<6; j++) {
      bab4x4[0][j] = ( (output[0] + output[1] + output[2] + output[3])>=2 )?1:0;
      bab4x4[1][j] = ( (output[pitch+0] + output[pitch+1] + output[pitch+2] + output[pitch+3])>=2 )?1:0;
      output+=4;
    }

    /* top right border (->output=(i=-2, j=16)) */
    if(mb_x < shape->mb_width-1) {
      /* copy from previous reconstructed block */
      bab16x16[0][18] = bab8x8[0][10] = bab4x4[0][6] = output[0];
      bab16x16[0][19] = bab8x8[0][11] = bab4x4[0][7] = output[1];
      bab16x16[1][18] = bab8x8[1][10] = bab4x4[1][6] = output[pitch+0];
      bab16x16[1][19] = bab8x8[1][11] = bab4x4[1][7] = output[pitch+1];
    } else {
      bab16x16[0][18] = bab8x8[0][10] = bab4x4[0][6] = 0;
      bab16x16[0][19] = bab8x8[0][11] = bab4x4[0][7] = 0;
      bab16x16[1][18] = bab8x8[1][10] = bab4x4[1][6] = 0;
      bab16x16[1][19] = bab8x8[1][11] = bab4x4[1][7] = 0;
    }
  } else {
    memset(bab16x16, 0, 2*20);
    memset(bab8x8, 0, 2*12);
    memset(bab4x4, 0, 2*8);
  }

  output = shape->recon + offset;
  if(mb_x) {
    /* left border exists */
    output -= 2;
    for(i=2; i<18; i++) {
      bab16x16[i][0] = output[0];
      bab16x16[i][1] = output[1];
      output += pitch;
    }
    output -= (pitch<<4);
    for(i=2; i<10; i++) {
      bab8x8[i][0] = output[0] | output[pitch+0];
      bab8x8[i][1] = output[1] | output[pitch+1];
      output += (pitch<<1);
    }
    output -= (pitch<<4);
    for(i=2; i<6; i++) {
      bab4x4[i][0] = output[0] + output[pitch+0];
      bab4x4[i][1] = output[1] + output[pitch+1];
      output += (pitch<<1);
      bab4x4[i][0] += output[0] + output[pitch+0];
      bab4x4[i][1] += output[1] + output[pitch+1];
      output += (pitch<<1);
      bab4x4[i][0] = (bab4x4[i][0]>=2)?1:0;
      bab4x4[i][1] = (bab4x4[i][1]>=2)?1:0;
    }
    output += 2;
  } else {
    for(i=2; i<18; i++) {
      bab16x16[i][0] = 0;
      bab16x16[i][1] = 0;
    }
    for(i=2; i<10; i++) {
      bab8x8[i][0] = 0;
      bab8x8[i][1] = 0;
    }
    for(i=2; i<6; i++) {
      bab4x4[i][0] = 0;
      bab4x4[i][1] = 0;
    }
  }

  /* replicate padding for right & bottom borders */
  for(j=2; j<18; j++)
  {
    bab16x16[j][19] = bab16x16[j][18] = bab16x16[j][17];
    bab16x16[19][j] = bab16x16[18][j] = bab16x16[17][j];
  }
  bab16x16[18][0] = bab16x16[18][1] = bab16x16[18][2];
  bab16x16[19][0] = bab16x16[19][1] = bab16x16[19][2];
  bab16x16[18][19] = bab16x16[18][18] = bab16x16[18][17];
  bab16x16[19][19] = bab16x16[19][18] = bab16x16[19][17];

  for(j=2; j<10; j++)
  {
    bab8x8[11][j] = bab8x8[10][j] = bab8x8[9][j];
    bab8x8[j][11] = bab8x8[j][10] = bab8x8[j][9];
  }
  bab8x8[10][0] = bab8x8[10][1] = bab8x8[10][2];
  bab8x8[11][0] = bab8x8[11][1] = bab8x8[11][2];
  bab8x8[10][11] = bab8x8[10][10] = bab8x8[10][9];
  bab8x8[11][11] = bab8x8[11][10] = bab8x8[11][9];

  for(j=2; j<6; j++)
  {
    bab4x4[7][j] = bab4x4[6][j] = bab4x4[5][j];
    bab4x4[j][7] = bab4x4[j][6] = bab4x4[j][5];
  }
  bab4x4[6][0] = bab4x4[6][1] = bab4x4[6][2];
  bab4x4[7][0] = bab4x4[7][1] = bab4x4[7][2];
  bab4x4[6][7] = bab4x4[6][6] = bab4x4[6][5];
  bab4x4[7][7] = bab4x4[7][6] = bab4x4[7][5];


  /* --- Up-sampling (Interpolation) --- */

  /* Interpolation of bab4x4 (upsampling) */
  output = &tmp_recon4x4[1][1];
  for(i=1; i<6; i++)
  {
    for(j=1; j<6; j++)
    {
      INTERPOLATION(bab4x4, i, j, interp, cf,
		    output[0] = (interp>th[cf])?1:0,
		    output[1] = (interp>th[cf])?1:0,
		    output[1+SIZE_BAB8] = (interp>th[cf])?1:0,
		    output[0+SIZE_BAB8] = (interp>th[cf])?1:0
		    );
      output += 2;
    }
    output += SIZE_BAB8*2 - 10;
  }
  /* Add the borders to the tmp_recon4x4 */
  for(j=0; j<SIZE_BAB8; j++) {
    tmp_recon4x4[0][j] = bab8x8[0][j];
    tmp_recon4x4[1][j] = bab8x8[1][j];
  }
  for(i=2; i<SIZE_BAB8-2; i++) {
    tmp_recon4x4[i][0] = bab8x8[i][0];
    tmp_recon4x4[i][1] = bab8x8[i][1];
  }
  for(j=2; j<10; j++)
  {
    tmp_recon4x4[11][j] = tmp_recon4x4[10][j] = tmp_recon4x4[9][j];
    tmp_recon4x4[j][11] = tmp_recon4x4[j][10] = tmp_recon4x4[j][9];
  }
  tmp_recon4x4[10][0] = tmp_recon4x4[10][1] = tmp_recon4x4[10][2];
  tmp_recon4x4[11][0] = tmp_recon4x4[11][1] = tmp_recon4x4[11][2];
  tmp_recon4x4[10][11] = tmp_recon4x4[10][10] = tmp_recon4x4[10][9];
  tmp_recon4x4[11][11] = tmp_recon4x4[11][10] = tmp_recon4x4[11][9];
  /* Interpolation to real size (16x16) */
  output = &tmp_recon8x8[1][1];
  for(i=1; i<10; i++)
  {
    for(j=1; j<10; j++)
    {
      INTERPOLATION(tmp_recon4x4, i, j, interp, cf,
		    output[0] = (interp>th[cf])?1:0,
		    output[1] = (interp>th[cf])?1:0,
		    output[1+SIZE_BAB16] = (interp>th[cf])?1:0,
		    output[0+SIZE_BAB16] = (interp>th[cf])?1:0
		    );
      output += 2;
    }
    output += SIZE_BAB16*2 - 18;
  }
  /* Has the bab4x4 accepted quality ? */ 
  input = shape->input + offset;
  is_bab_coded = 1;
  for(i=0; i<16; i+=4)
  {
    for(j=0; j<16; j+=4)
    {
      error = mean_absolute_binary_error(input, pitch, &tmp_recon8x8[i+2][j+2], SIZE_BAB16, 4);
      is_bab_coded &= (error>m_alpha_th)?0:1;
      input += 4;
    }
    input += pitch4 - 16;
  }

  if(shape->flags & FAME_SHAPE_LOSSLESS)
    is_bab_coded = 0;

  if(is_bab_coded)
  {
    output = shape->recon + offset;
    *pattern = 0;
    for(i=0; i<16; i++) {
      for(j=0; j<16; j++) {
        output[j] = tmp_recon8x8[i+2][j+2];
	*pattern |= (output[j] << (((i>>2)&2)+(j>>3)));
      }
      output += pitch;
    }

    /* correct replicate padding for CAE (bottom left corner padded from above) */
    bab4x4[7][0] = bab4x4[6][0] = bab4x4[5][0];
    bab4x4[7][1] = bab4x4[6][1] = bab4x4[5][1];

    *bab = &bab4x4[0][0];

    return bab_border_4x4;
  }

  /* Interpolation of bab8x8 (upsampling) */
  output = &tmp_recon8x8[1][1];
  for(i=1; i<10; i++)
  {
    for(j=1; j<10; j++)
    {
      INTERPOLATION(bab8x8, i, j, interp, cf,
		    output[0] = (interp>th[cf])?1:0,
		    output[1] = (interp>th[cf])?1:0,
		    output[1+SIZE_BAB16] = (interp>th[cf])?1:0,
		    output[0+SIZE_BAB16] = (interp>th[cf])?1:0
		    );
      output += 2;
    }
    output += SIZE_BAB16*2 - 18;
  }
  /* Has the bab8x8 accepted quality ? */ 
  input = shape->input + offset;
  is_bab_coded = 1;
  for(i=0; i<16; i+=4)
  {
    for(j=0; j<16; j+=4)
    {
      error = mean_absolute_binary_error(input, pitch, &tmp_recon8x8[i+2][j+2], SIZE_BAB16, 4);
      is_bab_coded &= (error>m_alpha_th)?0:1;
      input += 4;
    }
    input += pitch4 - 16;
  }

  if(shape->flags & FAME_SHAPE_LOSSLESS)
    is_bab_coded = 0;

  if(is_bab_coded)
  {
    output = shape->recon + offset;
    *pattern = 0;
    for(i=0; i<16; i++) {
      for(j=0; j<16; j++) {
        output[j] = tmp_recon8x8[i+2][j+2];
	*pattern |= (output[j] << (((i>>2)&2)+(j>>3)));
      }
      output += pitch;
    }

    /* correct replicate padding for CAE (bottom left corner padded from above) */
    bab8x8[11][0] = bab8x8[10][0] = bab8x8[9][0];
    bab8x8[11][1] = bab8x8[10][1] = bab8x8[9][1];
    
    *bab = &bab8x8[0][0];

    return bab_border_8x8;
  }

  /* No Interpolation */
  input = shape->input + offset;
  output = shape->recon + offset;
  *pattern = 0;
  for(i = 0; i < 16; i++) {
    for(j = 0; j < 16; j++) {
      output[j] = input[j] & 1;
      *pattern |= (output[j] << (((i>>2)&2)+(j>>3)));
    }
    output += pitch;
    input += pitch;
  }
  /* correct replicate padding for CAE (bottom left corner padded from above) */
  bab16x16[19][0] = bab16x16[18][0] = bab16x16[17][0];
  bab16x16[19][1] = bab16x16[18][1] = bab16x16[17][1];


  *bab = &bab16x16[0][0];

  return bab_border_16x16;   
}

