/*
    libfame - Fast Assembly MPEG Encoder Library
    Copyright (C) 2000-2001 Damien Vincent

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

#include <stdio.h>
#include <stdlib.h>
#include "fame.h"
#include "fame_motion.h"
#include "fame_motion_pmvfast.h"
#include "mad_int.h"

#undef DEBUG
#undef STAT

#ifdef DEBUG
static FILE *debug_log;
#endif
#ifdef STAT
static FILE *stat_log;
#endif

typedef struct
{
  int num_picture;

  int num_intra;
  int num_median;
  int num_early;
  int num_16x16;
  int num_8x8;

  int num_largediamond;
  int num_smalldiamond;

  int num_eval;
} stat_motion_pmvfast;
stat_motion_pmvfast motionstat;


/******************** PUBLIC DEFINITIONS ********************/

static void pmvfast_init(fame_motion_t *motion,
			 int mb_width,
			 int mb_height,
			 unsigned int flags);

static void pmvfast_close(fame_motion_t *motion);
static void pmvfast_enter(fame_motion_t *motion,
			  fame_yuv_t **ref,
			  fame_yuv_t *current,
			  unsigned char *shape,
			  int search_range);

static fame_motion_coding_t pmvfast_estimation(fame_motion_t *motion,
					       int mb_x, int mb_y,
					       fame_motion_vector_t *vectors);

static void pmvfast_leave(fame_motion_t *motion);

FAME_CONSTRUCTOR(fame_motion_pmvfast_t)
{
  fame_motion_t_constructor(FAME_MOTION(this));
  FAME_OBJECT(this)->name = "predictive motion estimation";
  this->FAME_OVERLOADED(init) = FAME_MOTION(this)->init;
  FAME_MOTION(this)->init = pmvfast_init;
  this->FAME_OVERLOADED(close) = FAME_MOTION(this)->close;
  FAME_MOTION(this)->close = pmvfast_close;
  this->FAME_OVERLOADED(enter) = FAME_MOTION(this)->enter;
  FAME_MOTION(this)->enter = pmvfast_enter;
  this->FAME_OVERLOADED(leave) = FAME_MOTION(this)->leave;
  FAME_MOTION(this)->leave = pmvfast_leave;
  FAME_MOTION(this)->estimation = pmvfast_estimation;
  return(this);
}

/******************** PRIVATE DEFINITIONS ********************/

#define MEDIAN(a,b,c) ((b)<(a))?(((c)>(a))?(a):(((c)<(b))?(b):(c))):(((c)<(a))?(a):(((c)>(b))?(b):(c)))

typedef struct
{
  int dx;
  int dy;
  int index_direction; /* the index of the direction : index = index(dx,dy) */
} direction_t;

typedef struct
{
  int nbre;
  direction_t *directions;
} tab_direction_t;


#define NULL_MOTION 1
#define INFINITE_ERROR 0xFFFFU


#define MOTION_INTRA 0x0001
#define MOTION_INTER 0x0002


#define SIZE_MB 16
#define SIZE_SB 8


#define THRESHOLD0 256
#define LOWER_LIMIT_THRESHOLD1 512
#define UPPER_LIMIT_THRESHOLD1 1024
#define UPPER_LIMIT_THRESHOLD2 1792

#define THRESHOLD_SMALLDIAMOND 1536
#define THRESHOLD8x8 256

static inline void get_error(fame_yuv_t **ref,
			     unsigned char *current,
			     unsigned char *shape,
			     fame_motion_vector_t *vectors,
			     int *offset,
			     int pitch,
			     compute_error_t eval_error,
			     int number);

static inline int check_vector(fame_yuv_t **ref,
			       unsigned char *current,
			       unsigned char *shape,
			       int x,
			       int y,
			       int width,
			       int height,
			       fame_motion_vector_t *pvector,
			       fame_motion_vector_t *vectors,
			       int *offset,
			       int pitch,
			       compute_error_t eval_error);

static void find_macroblockvector(fame_yuv_t **ref,
				  unsigned char *current,
				  unsigned char *shape,
				  int offset[4],
				  int x,
				  int y,
				  int width,
				  int height,
				  int pitch,
				  tab_direction_t *table,
				  int search_range,
				  int step,
				  compute_error_t eval_error,
				  fame_motion_vector_t *mv);

static void find_blockvector(fame_yuv_t **ref,
			     unsigned char *current,
			     unsigned char *shape,
			     int offset,
			     int x,
			     int y,
			     int width,
			     int height,
			     int pitch,
			     tab_direction_t *table,
			     int search_range,
			     int step,
			     compute_error_t eval_error,
			     fame_motion_vector_t *mv);

/* motion tables */

/* large diamond:
 *     4
 *   5   3
 * 6   1   2
 *   7   9
 *     8
 */

static direction_t td_block_largediamond0[9] =
{
  { 0, 0, 1},  { 2, 0, 2},  { 1, 1, 3},
  { 0, 2, 4},  {-1, 1, 5},  {-2, 0, 6},
  {-1,-1, 7},  { 0,-2, 8},  { 1,-1, 9}
};
static direction_t td_block_largediamond1[8] =
{
  { 2, 0, 2},  { 1, 1, 3},  { 0, 2, 4},
  {-1, 1, 5},  {-2, 0, 6},  {-1,-1, 7},
  { 0,-2, 8},  { 1,-1, 9}
};
static direction_t td_block_largediamond2[5] =
{
  { 2, 0, 2},  { 1, 1, 3},  { 0, 2, 4},
  { 0,-2, 8},  { 1,-1, 9}
};
static direction_t td_block_largediamond3[5] =
{
  { 2, 0, 2},  { 1, 1, 3},  { 0, 2, 4},
  {-2, 0, 6},  { 0,-2, 8},
};
static direction_t td_block_largediamond4[5] =
{
  { 2, 0, 2},  { 1, 1, 3},  { 0, 2, 4},
  {-1, 1, 5},  {-2, 0, 6}
};
static direction_t td_block_largediamond5[5] =
{
  { 2, 0, 2},  { 0, 2, 4},  {-1, 1, 5},
  {-2, 0, 6},  { 0,-2, 8}
};
static direction_t td_block_largediamond6[5] =
{
  { 0, 2, 4},  {-1, 1, 5},  {-2, 0, 6},
  {-1,-1, 7},  { 0,-2, 8}
};
static direction_t td_block_largediamond7[5] =
{
  { 2, 0, 2},  { 0, 2, 4},  {-2, 0, 6},
  {-1,-1, 7},  { 0,-2, 8}
};
static direction_t td_block_largediamond8[5] =
{
  { 2, 0, 2},  {-2, 0, 6},  {-1,-1, 7},
  { 0,-2, 8},  { 1,-1, 9}
};
static direction_t td_block_largediamond9[5] =
{
  { 2, 0, 2},  { 0, 2, 4},  {-2, 0, 6},
  { 0,-2, 8},  { 1,-1, 9}
};

static tab_direction_t td_block_largediamond[10] =
{
  {9,td_block_largediamond0}, {8,td_block_largediamond1},
  {5,td_block_largediamond2}, {5,td_block_largediamond3},
  {5,td_block_largediamond4}, {5,td_block_largediamond5},
  {5,td_block_largediamond6}, {5,td_block_largediamond7},
  {5,td_block_largediamond8}, {5,td_block_largediamond9}
};

/* small diamond:
 *   3
 * 4 1 2
 *   5
 */

static direction_t td_block_smalldiamond0[5] =
{
  { 0, 0, 1}, { 1, 0, 2}, { 0, 1, 3},
  {-1, 0, 4}, { 0,-1, 5}
};
static direction_t td_block_smalldiamond1[4] =
{
  { 1, 0, 2}, { 0, 1, 3},
  {-1, 0, 4}, { 0,-1, 5}  
};
static direction_t td_block_smalldiamond2[3] =
{
  { 1, 0, 2}, { 0, 1, 3},
  { 0,-1, 5}  
};
static direction_t td_block_smalldiamond3[3] =
{
  { 1, 0, 2}, { 0, 1, 3},
  {-1, 0, 4}
};
static direction_t td_block_smalldiamond4[3] =
{
  { 0, 1, 3},
  {-1, 0, 4}, { 0,-1, 5}  
};
static direction_t td_block_smalldiamond5[3] =
{
  { 1, 0, 2},
  {-1, 0, 4}, { 0,-1, 5}  
};

static tab_direction_t td_block_smalldiamond[6] =
{
  {5,td_block_smalldiamond0}, {4,td_block_smalldiamond1},
  {3,td_block_smalldiamond2}, {3,td_block_smalldiamond3},
  {3,td_block_smalldiamond4}, {3,td_block_smalldiamond5}
};

/* gradient descent:
 * 5 4 3
 * 6 1 2
 * 7 8 9
 */

static direction_t td_block_bbgds0[9] =
{
  { 0, 0, 1},  { 1, 0, 2},  { 1, 1, 3},
  { 0, 1, 4},  {-1, 1, 5},  {-1, 0, 6},
  {-1,-1, 7},  { 0,-1, 8},  { 1,-1, 9}
};
static direction_t td_block_bbgds1[8] =
{
  { 1, 0, 2},  { 1, 1, 3},  { 0, 1, 4},
  {-1, 1, 5},  {-1, 0, 6},  {-1,-1, 7},
  { 0,-1, 8},  { 1,-1, 9}
};
static direction_t td_block_bbgds2[3] =
{
  { 1, 0, 2},  { 1, 1, 3},  { 1,-1, 9}
};
static direction_t td_block_bbgds3[5] =
{
  { 1, 0, 2},  { 1, 1, 3},  { 0, 1, 4},
  {-1, 1, 5},  { 1,-1, 9}
};
static direction_t td_block_bbgds4[3] =
{
  { 1, 1, 3},  { 0, 1, 4},  {-1, 1, 5}
};
static direction_t td_block_bbgds5[5] =
{
  { 1, 1, 3},  { 0, 1, 4},  {-1, 1, 5},
  {-1, 0, 6},  {-1,-1, 7}
};
static direction_t td_block_bbgds6[3] =
{
  {-1, 1, 5},  {-1, 0, 6},  {-1,-1, 7}
};
static direction_t td_block_bbgds7[5] =
{
  {-1, 1, 5},  {-1, 0, 6},  {-1,-1, 7},
  { 0,-1, 8},  { 1,-1, 9}
};
static direction_t td_block_bbgds8[3] =
{
  {-1,-1, 7},  { 0,-1, 8},  { 1,-1, 9}
};
static direction_t td_block_bbgds9[5] =
{
  { 1, 0, 2},  { 1, 1, 3},  {-1,-1, 7},
  { 0,-1, 8},  { 1,-1, 9}
};

static tab_direction_t td_block_bbgds[10] =
{
  {9,td_block_bbgds0}, {8,td_block_bbgds1}, {3,td_block_bbgds2}, {5,td_block_bbgds3},
  {3,td_block_bbgds4}, {5,td_block_bbgds5}, {3,td_block_bbgds6}, {5,td_block_bbgds7},
  {3,td_block_bbgds8}, {5,td_block_bbgds9}
};

/******************** PUBLIC FUNCTIONS ********************/
static void pmvfast_init(fame_motion_t *motion,
			 int mb_width,
			 int mb_height,
			 unsigned int flags)
{
  FAME_MOTION_PMVFAST(motion)->FAME_OVERLOADED(init)(motion,
						     mb_width,
						     mb_height,
						     flags);
#if DEBUG
  debug_log = fopen("pmvfast_debug.log", "wb");
#endif
#if STAT
  stat_log = fopen("pmvfast_stat.log", "wb");
  motionstat.num_picture = 0;
#endif

  FAME_MOTION_PMVFAST(motion)->vectors = (fame_motion_vector_t *)
    malloc(mb_width*2*mb_height*2*sizeof(fame_motion_vector_t));
}

static void pmvfast_close(fame_motion_t *motion)
{
  FAME_MOTION_PMVFAST(motion)->FAME_OVERLOADED(close)(motion);

#if DEBUG
  fclose(debug_log);
#endif

  free(FAME_MOTION_PMVFAST(motion)->vectors);
}

static void pmvfast_enter(fame_motion_t *motion,
			  fame_yuv_t **ref,
			  fame_yuv_t *current,
			  unsigned char *shape,
			  int search_range)
{
  FAME_MOTION_PMVFAST(motion)->FAME_OVERLOADED(enter)(motion,
						      ref,
						      current,
						      shape,
						      search_range);
  
#if DEBUG
  fprintf(debug_log, "********** NEW PICTURE **********\n");
#endif
#if STAT
  motionstat.num_eval = 0;
  motionstat.num_intra = 0;
  motionstat.num_median = 0;
  motionstat.num_early = 0;
  motionstat.num_16x16 = 0;
  motionstat.num_8x8 = 0;
  motionstat.num_largediamond = 0;
  motionstat.num_smalldiamond = 0;
#endif
}


static void pmvfast_leave(fame_motion_t *motion)
{
  FAME_MOTION_PMVFAST(motion)->FAME_OVERLOADED(leave)(motion);

#if STAT
  fprintf(stat_log, "\n********** PICTURE %d **********\n", motionstat.num_picture);
  fprintf(stat_log, "Number of SAD8x8 : %d\n", motionstat.num_eval);
  fprintf(stat_log, "Use of median vector: %d times\n", motionstat.num_median);
  fprintf(stat_log, "Early exit : %d times\n", motionstat.num_early);
  fprintf(stat_log, "Large diamond : %d times\n", motionstat.num_largediamond);
  fprintf(stat_log, "Small diamond : %d times\n", motionstat.num_smalldiamond);
  motionstat.num_picture++;
#endif
}


static inline void get_error(fame_yuv_t **ref,
			     unsigned char *current,
			     unsigned char *shape,
			     fame_motion_vector_t *vectors,
			     int *offset,
			     int pitch,
			     compute_error_t eval_error,
			     int number)
{
  int i;
  int residual, motion;

  for(i = 0; i < number; i++) {
    residual = (vectors[i].dx & 1) + ((vectors[i].dy & 1) << 1);
    motion = (vectors[i].dx >> 1) + (vectors[i].dy >> 1) * pitch;
    vectors[i].error = eval_error(ref[residual]->y+motion+offset[i],
				  current+offset[i],
				  shape+offset[i],
				  pitch);
  }
#if STAT
  motionstat.num_eval+=number;
#endif
}

static inline int check_vector(fame_yuv_t **ref,
			       unsigned char *current,
			       unsigned char *shape,
			       int x,
			       int y,
			       int width,
			       int height,
			       fame_motion_vector_t *pvector,
			       fame_motion_vector_t *vectors,
			       int *offset,
			       int pitch,
			       compute_error_t eval_error)
{
  if((x<<1)+pvector->dx>0 &&
     (y<<1)+pvector->dy>0 &&
     (x<<1)+pvector->dx<((width-16)<<1) &&
     (y<<1)+pvector->dy<((height-16)<<1))
  {
    int i;
    int residual, motion;
    int errors[4];

    for(i = 0; i < 4; i++) {
      residual = (pvector[i].dx & 1) + ((pvector[i].dy & 1) << 1);
      motion = (pvector[i].dx >> 1) + (pvector[i].dy >> 1) * pitch;
      errors[i] = eval_error(ref[residual]->y+motion+offset[i],
			     current+offset[i],
			     shape+offset[i],
			     pitch);
#if STAT
      motionstat.num_eval+=number;
#endif
    }

    if(errors[0]+errors[1]+errors[2]+errors[3] <
       vectors[0].error+vectors[1].error+vectors[2].error+vectors[3].error)
    {
      /* use checked vector */
      memcpy(vectors, pvector, 4*sizeof(fame_motion_vector_t));
      vectors[0].error = errors[0];
      vectors[1].error = errors[1];
      vectors[2].error = errors[2];
      vectors[3].error = errors[3];
      return(0);
    }
  }
  return(1);
}

static void find_macroblockvector(fame_yuv_t **ref,
				  unsigned char *current,
				  unsigned char *shape,
				  int offset[4],
				  int x,
				  int y,
				  int width,
				  int height,
				  int pitch,
				  tab_direction_t *table,
				  int search_range,
				  int step,
				  compute_error_t eval_error,
				  fame_motion_vector_t *mv)
{
  int i;
  int last_motion;
  tab_direction_t *current_table;
  int test_dx, test_dy, test_total;
  int best_dx, best_dy, best_total;
  int test_error0, test_error1, test_error2, test_error3;
  int subpel;

  subpel = 1; /* default (half-pel) */

  last_motion = 1;
  current_table = &(table[last_motion]);
  best_total = mv[0].error + mv[1].error + mv[2].error + mv[3].error;

  /* Start the macroblock motion vector search with the small diamond patern */
  while(1)
  {
    last_motion = NULL_MOTION;
    best_dx = 0;
    best_dy = 0;

    /* Search the best motion vector from the current point */
    for(i = 0; i < current_table->nbre; i++)
    {
      test_dx = current_table->directions[i].dx << step;
      test_dy = current_table->directions[i].dy << step;
      if (((x<<subpel)+test_dx+mv->dx>=0) &&
	  ((y<<subpel)+test_dy+mv->dy>=0) &&
	  ((x<<subpel)+test_dx+mv->dx<=((width-16)<<subpel)) &&
	  ((y<<subpel)+test_dy+mv->dy<=((height-16)<<subpel)) &&
	  mv->dx+test_dx < search_range &&
	  mv->dx+test_dx > -search_range &&
	  mv->dy+test_dy < search_range &&
	  mv->dy+test_dy > -search_range) {
	int motion, residual;
	unsigned char *location;

	/* Find the SAD for the blocks (8x8) */
	motion = ((mv->dx+test_dx) >> subpel) + ((mv->dy+test_dy) >> subpel) * pitch;
	residual = ((mv->dx+test_dx) & ((1<<subpel)-1)) | (((mv->dy+test_dy) & ((1<<subpel)-1)) << subpel);
	location = ref[residual]->y+motion;
	test_error0 = eval_error(location+offset[0], current+offset[0], shape+offset[0], pitch);
	test_error1 = eval_error(location+offset[1], current+offset[1], shape+offset[1], pitch);
	test_error2 = eval_error(location+offset[2], current+offset[2], shape+offset[2], pitch);
	test_error3 = eval_error(location+offset[3], current+offset[3], shape+offset[3], pitch);
	test_total = test_error0 + test_error1 + test_error2 + test_error3;
#if STAT
	motionstat.num_eval+=4;
#endif
      } else {
	test_total = INFINITE_ERROR;
	test_error0 = test_error1 = test_error2 = test_error3 = INFINITE_ERROR;
      }
#if DEBUG
      fprintf(debug_log, "errorBBGDS=%u\n",test_total);
#endif

      /* Check if the current SAD (for the macroblock) if lesser than the SAD of the previous "best" macroblock */
      if(test_total < best_total)
      {
	last_motion = current_table->directions[i].index_direction;
	best_dx = test_dx;
	best_dy = test_dy;
	best_total = test_total;
	mv[0].error = test_error0;
	mv[1].error = test_error1;
	mv[2].error = test_error2;
	mv[3].error = test_error3;
      }
    }

    /* Updates the motion vector and the location in the window ("current") */
    if((best_dx | best_dy) != 0)
    {
      mv->dx += best_dx;
      mv->dy += best_dy;
      mv[3].dx = mv[2].dx = mv[1].dx = mv->dx;
      mv[3].dy = mv[2].dy = mv[1].dy = mv->dy;
      current_table = &(table[last_motion]);
    } else
      return;
  }
}

static void find_blockvector(fame_yuv_t **ref,
			     unsigned char *current,
			     unsigned char *shape,
			     int offset,
			     int x,
			     int y,
			     int width,
			     int height,
			     int pitch,
			     tab_direction_t *table,
			     int search_range,
			     int step,
			     compute_error_t eval_error,
			     fame_motion_vector_t *mv)
{
  int i;
  int last_motion;
  tab_direction_t *current_table;
  int test_dx, test_dy, test_error;
  int best_dx, best_dy, best_error;
  int subpel;

  subpel = 1; /* default (half-pel) */

  last_motion = 1;
  current_table = &(table[last_motion]);
  best_error = mv->error;

  /* Start the macroblock motion vector search with the small diamond patern */
  while(1)
  {
    last_motion = NULL_MOTION;
    best_dx = 0;
    best_dy = 0;

    /* Search the best motion vector from the current point */
    for(i = 0; i < current_table->nbre; i++)
    {
      test_dx = current_table->directions[i].dx << step;
      test_dy = current_table->directions[i].dy << step;
      if (((x<<subpel)+test_dx+mv->dx>=0) &&
	  ((y<<subpel)+test_dy+mv->dy>=0) &&
	  ((x<<subpel)+test_dx+mv->dx<=((width-8)<<subpel)) &&
	  ((y<<subpel)+test_dy+mv->dy<=((height-8)<<subpel)) &&
	  mv->dx+test_dx < search_range &&
	  mv->dx+test_dx > -search_range &&
	  mv->dy+test_dy < search_range &&
	  mv->dy+test_dy > -search_range) {
	int motion, residual;

	/* Find the SAD for the block (8x8) */
	motion = ((mv->dx+test_dx) >> subpel) + ((mv->dy+test_dy) >> subpel) * pitch;
	residual = ((mv->dx+test_dx) & ((1<<subpel)-1)) | (((mv->dy+test_dy) & ((1<<subpel)-1)) << subpel);
	test_error = eval_error(ref[residual]->y+motion+offset, current+offset, shape+offset, pitch);
#if STAT
	motionstat.num_eval+=4;
#endif
      } else
	test_error = INFINITE_ERROR;
#if DEBUG
      fprintf(debug_log, "error block_vector=%u\n",test_error);
#endif

      /* Check if the current SAD (for the macroblock) if lesser than the SAD of the previous "best" macroblock */
      if(test_error < best_error)
      {
	last_motion = current_table->directions[i].index_direction;
	best_dx = test_dx;
	best_dy = test_dy;
	best_error = test_error;
	mv->error = test_error;
      }
    }

    /* Updates the motion vector and the location in the window ("current") */
    if((best_dx | best_dy) != 0)
    {
      mv->dx += best_dx;
      mv->dy += best_dy;
      current_table = &(table[last_motion]);
    } else
      return;
  }
}

static fame_motion_coding_t pmvfast_estimation(fame_motion_t *motion,
					       int mb_x, int mb_y,
					       fame_motion_vector_t *vectors)
{
  int i, k;
  int pitch;
  int x, y, width, height;
  int offset[4];
  compute_error_t eval_error;

  fame_motion_vector_t *pvector;
  fame_motion_vector_t *pvector_left, *pvector_topleft;
  fame_motion_vector_t *pvector_top, *pvector_topright;
  unsigned int weight_left, weight_top, weight_topright;

  unsigned char *shape;
  unsigned char *current;
  fame_yuv_t **ref;

  int use_median;
  int threshold0;
  int threshold1;
  int threshold2;
  int sad_inter4v, sad_inter, mad_inter, count;
  int range;

#if DEBUG
  fprintf(debug_log, "\n***** macroblock : mb_y=%u mb_x=%u *****\n", mb_y, mb_x);
#endif

  /* ***** Initialization ***** */
  eval_error = motion->MAE8x8;
  x = mb_x << 4;
  y = mb_y << 4;
  width  = motion->mb_width << 4;
  height = motion->mb_height << 4;
  pitch = width;
  shape = motion->shape;
  current = motion->current->y;
  ref = motion->ref;
  range = motion->search_range;

  offset[0] = y * width + x;
  offset[1] = y * width + x+8;
  offset[2] = (y+8) * width + x;
  offset[3] = (y+8) * width + x+8;
   
  if(motion->shape) {
    vectors[0].count = mad_withmask(current+offset[0], shape+offset[0], pitch, &vectors[0].deviation);
    vectors[1].count = mad_withmask(current+offset[1], shape+offset[1], pitch, &vectors[1].deviation);
    vectors[2].count = mad_withmask(current+offset[2], shape+offset[2], pitch, &vectors[2].deviation);
    vectors[3].count = mad_withmask(current+offset[3], shape+offset[3], pitch, &vectors[3].deviation);
  } else {
    vectors[0].count = mad_withoutmask(current+offset[0], pitch, &vectors[0].deviation);
    vectors[1].count = mad_withoutmask(current+offset[1], pitch, &vectors[1].deviation);
    vectors[2].count = mad_withoutmask(current+offset[2], pitch, &vectors[2].deviation);
    vectors[3].count = mad_withoutmask(current+offset[3], pitch, &vectors[3].deviation);
  }
    
  /* integer sample search */

  /* Step1 : vectors around the current macroblock */
  pvector = FAME_MOTION_PMVFAST(motion)->vectors + (mb_y*motion->mb_width + mb_x)*4;
  pvector_left     = pvector - 4;
  pvector_topleft  = pvector - motion->mb_width - 4;
  pvector_top      = pvector - motion->mb_width;
  pvector_topright = pvector - motion->mb_width + 4;

  /* Compute the weighted mean vector :                             */
  /*   dx = (f(e1)*dx1 + f(e2)*dx2 + f(e3)*dx3) / (dx1 + dx2 + dx3) */

  /*
  weight_left = 65536 - (unsigned int)(macroblock_vector_left->error);
  weight_left = 65536 - (unsigned int)(macroblock_vector_top->error);
  weight_topright = 65536 - (unsigned int)(macroblock_vector_topright->error);
  macroblock_vector_barycentre.dx = (weight_left * macroblock_vector_left->dx +
				     weight_top * macroblock_vector_top->dx +
				     weight_topright * macroblock_vector_topright->dx
				     ) / (weight_left + weight_top + weight_topright);
  macroblock_vector_barycentre.dy = (weight_left * macroblock_vector_left->dy +
				     weight_top * macroblock_vector_top->dy +
				     weight_topright * macroblock_vector_topright->dy
				     ) / (weight_left + weight_top + weight_topright);
  */

  /* saturate prediction to borders */
  if((x<<1)+vectors[0].dx<0) vectors[0].dx = (-x)<<1;
  if((y<<1)+vectors[0].dy<0) vectors[0].dy = (-y)<<1;
  if((x<<1)+vectors[0].dx>((width-16)<<1)) vectors[0].dx = (width-16-x)<<1;
  if((y<<1)+vectors[0].dy>((height-16)<<1)) vectors[0].dy = (height-16-y)<<1;
  
  /* Step2 : Calculate the thresholds */
  threshold1 = INFINITE_ERROR;
  if(mb_x>0)
    threshold1 = fame_min(threshold1, pvector_left[0].error+pvector_left[1].error+pvector_left[2].error+pvector_left[3].error);
  if(mb_y>0)
    threshold1 = fame_min(threshold1, pvector_top[0].error+pvector_top[1].error+pvector_top[2].error+pvector_top[3].error);
  if(mb_y>0 && mb_x<motion->mb_width-1)
    threshold1 = fame_min(threshold1, pvector_topright[0].error+pvector_topright[1].error+pvector_topright[2].error+pvector_topright[3].error);

  threshold0 = pvector[0].count + pvector[1].count + pvector[2].count + pvector[3].count;
  threshold2 = threshold1 + threshold0;

  if(threshold1<LOWER_LIMIT_THRESHOLD1)
    threshold1 = LOWER_LIMIT_THRESHOLD1;
  if(threshold1>LOWER_LIMIT_THRESHOLD1)
    threshold1 = UPPER_LIMIT_THRESHOLD1;

  if(threshold2>UPPER_LIMIT_THRESHOLD2)
    threshold2 = UPPER_LIMIT_THRESHOLD2;

#if DEBUG
  fprintf(debug_log, "threshold0 = %u\n", threshold0);
  fprintf(debug_log, "threshold1 = %u\n", threshold1);
  fprintf(debug_log, "threshold2 = %u\n", threshold2);
#endif

  /* Step3 : Process a set of vectors whose matching probability is very high*/
  /*         i.e. median, left, top, topright vector                         */

  /* Check the median vector */
  vectors[3].dx = vectors[2].dx = vectors[1].dx = vectors[0].dx;
  vectors[3].dy = vectors[2].dy = vectors[1].dy = vectors[0].dy;
  get_error(ref, current, shape, vectors, offset, pitch, eval_error, 4);
  use_median = 1;
  /* decrease error for predicted vector */
  /* TODO: check if needed (threshold0)
  vectors[3].error -= vectors[3].count >> 1;
  vectors[2].error -= vectors[2].count >> 1;
  vectors[1].error -= vectors[1].count >> 1;
  vectors[0].error -= vectors[0].count >> 1;
  */

  memcpy(pvector, vectors, 4*sizeof(fame_motion_vector_t));
    
  if(vectors[0].error+
     vectors[1].error+
     vectors[2].error+
     vectors[3].error<threshold0)
  {
    /* keep predicted vector */
#if STAT
    motionstat.num_median++;
#endif
    return(motion_inter);
  }

#if DEBUG
  fprintf(debug_log, "Median vector : dx=%d dy=%d error=%d\n",
	  vectors[0].dx, vectors[0].dy, vectors[0].error);
#endif

  /* Check the left vector */
  if(mb_x>0)
    use_median &= check_vector(ref, current, shape,
			       x, y, width, height,
			       pvector_left, vectors,
			       offset, pitch, eval_error);

  /* Check the top vector */
  if(mb_y>0)
    use_median &= check_vector(ref, current, shape,
			       x, y, width, height,
			       pvector_top, vectors,
			       offset, pitch, eval_error);

  /* Check the topright vector */
  if(mb_y>0 && mb_x<motion->mb_width-1)
  use_median &= check_vector(ref, current, shape,
			     x, y, width, height,
			     pvector_topright, vectors,
			     offset, pitch, eval_error);

  /* TODO : check the left, top, top right vectors of the frame t-1 */ 

#if DEBUG
  fprintf(debug_log, "Best vector of the set : dy=%d  dx=%d  error=%u\n",
	  pvector[0].dx, pvector[0].dy, pvector[0].error);
#endif

  /* Step4 : check early exit */
  if(pvector[0].error+pvector[1].error+pvector[2].error+pvector[3].error<threshold1)
  {
#if DEBUG
    fprintf(debug_log, "Early exit\n");
#endif
    memcpy(vectors, pvector, 4*sizeof(fame_motion_vector_t));
#if STAT
    motionstat.num_early++;
#endif
    return(motion_inter);
  }

  /* Step5 : The previous attempts were not successfull
     -> apply the diamond search algorithm with the initial vector equal to the best previous vector found */
  if(use_median && threshold2<THRESHOLD_SMALLDIAMOND)
  {
#if DEBUG
    fprintf(debug_log, "Choice for 16x16 mb search : small diamond\n");
#endif
    find_macroblockvector(ref, current, shape, offset,
			  x, y, width, height, pitch,
			  td_block_smalldiamond,
			  range, 1, eval_error, pvector);
#if STAT
    motionstat.num_smalldiamond++;
#endif
  }
  else
  {
#if DEBUG
    fprintf(debug_log, "Choice for 16x16 mb search : large diamond\n");
#endif
    find_macroblockvector(ref, current, shape, offset,
			  x, y, width, height, pitch,
			  td_block_largediamond,
			  range, 1, eval_error, pvector);
#if STAT
    motionstat.num_largediamond++;
#endif
  }

#if DEBUG
  fprintf(debug_log, "Best 16x16 vector found (integer pixel) : dx=%d  dy=%d  error=%u\n",
	  pvector->dx, pvector->dy, pvector->error);
#endif

  if(motion->flags & FAME_MOTION_BLOCK_SEARCH) {
    /* subvector (8x8) search */
    memcpy(vectors, pvector, 4*sizeof(fame_motion_vector_t));
    for(k = 0; k < 4; k++) { /* TODO: k depends on shape */
      /* integer sample search */
      find_blockvector(ref, current, shape, offset[k],
		       x, y, width, height, pitch,
		       td_block_bbgds,
		       range, 1, eval_error, &vectors[k]);
      /* half sample search */
      find_blockvector(ref, current, shape, offset[k],
		       x, y, width, height, pitch,
		       td_block_bbgds,
		       range, 0, eval_error, &vectors[k]);
    }

#if DEBUG
    for(i=0; i<4; i++)
      fprintf(debug_log, "Best 8x8 vector found (integer pixel) for the block %d: dx=%d  dy=%d  error=%u\n",
	      i, vectors[i].dx, vectors[i].dy, vectors[i].error);
#endif
  }

  /* half sample search */
  find_macroblockvector(ref, current, shape, offset,
			x, y, width, height, pitch,
			td_block_bbgds,
			range, 0, eval_error, pvector);

#if DEBUG
  fprintf(debug_log, "After half pixel search on the macroblock : dx=%d dy=%d error=%u\n",
	  vectors->dx, vectors->dy, vectors[0].error+vectors[1].error+vectors[2].error+vectors[3].error);
#endif

  /* ***** Make the intra/inter mode decision ***** */
  sad_inter4v = vectors[0].error + vectors[1].error + vectors[2].error + vectors[3].error;
  sad_inter = pvector[0].error + pvector[1].error + pvector[2].error + pvector[3].error;
  mad_inter = pvector[0].deviation + pvector[1].deviation + 
              pvector[2].deviation + pvector[3].deviation;
  count = pvector[0].count + pvector[1].count + pvector[2].count + pvector[3].count;

#if DEBUG
  fprintf(debug_log, "Best 16x16 vector found : dx=%d  dy=%d  error=%u\n",
	  pvector->dx, pvector->dy, pvector[0].error+pvector[1].error+pvector[2].error+pvector[3].error);
  for(i=0; i<4; i++)
  {
    fprintf(debug_log, "Best 8x8 vector found for the block %d : dx=%d  dy=%d  error=%u\n",
	    i, vectors[i].dx, vectors[i].dy, vectors[i].error);
  }
#endif

  /* inter4v/inter mode decision */
  if((motion->flags & FAME_MOTION_BLOCK_SEARCH) &&
     (sad_inter4v + ((count>>1)+1) < sad_inter)) {
#if DEBUG
    fprintf(debug_log, "4 vectors\n");
#endif
    /* inter4v prediction */
    sad_inter = sad_inter4v;
    memcpy(pvector, vectors, 4*sizeof(fame_motion_vector_t));
  } else
    memcpy(vectors, pvector, 4*sizeof(fame_motion_vector_t));

  /* intra/inter mode decision */
  /*        -> 1  - COMPUTE THE VARIANCE OF THE MACROBLOCK                */
  /*           (estimated by absolute difference and not square diff.)    */
  /*           The number of bits (at a given quality) needed by the DCT  */
  /*           depends on the variance (in a first approximation)         */
  /*        -> 2 - COMPARE WITH THE COVARIANCE GIVEN BY THE MOTION VECTOR */
  /*           The number of bits to code residual macroblock             */
  /*           depends on the covariance (in a first approximation)       */
  /*        -> If (1) < (2) - 2*N : Choose INTRA                          */
  /*           Substract 2*N to favour INTER mode when there is no        */
  /*           significant difference                                     */
  if(mad_inter + count + count < sad_inter) {
#if DEBUG
    fprintf(debug_log, "Coding = intra\n");
#endif
    return(motion_intra);
  } else {
#if DEBUG
    fprintf(debug_log, "Coding = inter\n");
#endif
    return(motion_inter);
  }
}

/* End of motion_pmvfast.c */
