/*
			(c) Copyright 1998-2000 - Tord Jansson
			======================================

		This file is part of the BladeEnc MP3 Encoder, based on
		ISO's reference code for MPEG Layer 3 compression, and might
		contain smaller or larger sections that are directly taken
		from ISO's reference code.

		All changes to the ISO reference code herein are either
		copyrighted by Tord Jansson (tord.jansson@swipnet.se)
		or sublicensed to Tord Jansson by a third party.

	BladeEnc is free software; you can redistribute this file
	and/or modify it under the terms of the GNU Lesser General Public
	License as published by the Free Software Foundation; either
	version 2.1 of the License, or (at your option) any later version.

	2000-03-13  Andr Piotrowski
*/





#include <stdlib.h>
#include <assert.h>
#include "common.h"
#include "encoder.h"
#include "l3psy.h"
#include "l3side.h"





#define maximum(x,y) ( (x>y) ? x : y )
#define minimum(x,y) ( (x<y) ? x : y )





/*	========================================================================================  */
/*      module instance variables                                                             */
/*	========================================================================================  */
/*
	The static variables "r", "phi_sav", "new_", "old" and "oldest" have
	to be remembered for the unpredictability measure.  For "r" and
	"phi_sav", the first index from the left is the channel select and
	the second index is the "age" of the data.
*/

static	int						new_, old, oldest;
static	int						use_long_blocks[2];


static	int						*numlines_l;
static	double					*minval;
static	double					*qthr_l;
								/* the non-zero entries of norm_l[i] * s3_l[i][j] */
static	FLOAT					normed_s3_l [900];   /* a bit more space than needed [799|855|735] */
static	int						lo_s3_l     [CBANDS];
static	int						hi_s3_l		[CBANDS];

static	int						*numlines_s;
static	double					*qthr_s;
static	double					*SNR_s;
static	FLOAT					normed_s3_s [500];   /* a bit more space than needed [445|395|378] */
static	int						lo_s3_s     [CBANDS_s];
static	int						hi_s3_s		[CBANDS_s];

static	int						*cbw_l;
static	int						*bu_l;
static	int						*bo_l;
static	double					*w1_l;
static	double					*w2_l;

static	int						*cbw_s;
static	int						*bu_s;
static	int						*bo_s;
static	double					*w1_s;
static	double					*w2_s;

static	double					nb_1        [2][CBANDS];
static	double					nb_2        [2][CBANDS];

static	double					cw          [HBLKSIZE];
static	double					eb          [CBANDS];

static	FLOAT					window      [BLKSIZE];
static	FLOAT					r			[2][2][6];
static	FLOAT					phi_sav		[2][2][6];

static	FLOAT					window_s    [BLKSIZE_s];
static	FLOAT					energy_s    [3][BLKSIZE_s];
static	FLOAT					phi_s       [3][52];

static	double					ratio       [2][SBMAX_l];
static	double					ratio_s     [2][SBMAX_s][3];

static	int						cbmax_l;
static	int						cbmax_s;





static	void L3para_read (int sfreq_idx);

static	void spread_your_wings
(
	int						cbmax,			/* number of lines and rows           */
	const double			bval[],			/* input values to compute the matrix */
	FLOAT					normed_s3[],	/* the resulting non-zero entries     */
	int						lo_s3[],
	int						hi_s3[]
);

static	void fft
(
	FLOAT					x_real[],
	FLOAT					x_imag[],
	FLOAT					energy[],
	FLOAT					phi[],
	int						N_ORG
);





/*	========================================================================================  */
/*      psycho_anal_init                                                                      */
/*	========================================================================================  */

void psycho_anal_init (int sfreq_idx)
{
	unsigned int			ch, i, j, sblock;



	/*
		clear the ratio arrays
	*/
	for (ch = 0;  ch < 2;  ch++)
	{
		for (i = 0;  i < SBMAX_l;  i++)
			ratio[ch][i] = 0.0;

		for (i = 0;  i < SBMAX_s;  i++)
			for (sblock = 0;  sblock < 3;  sblock++)
				ratio_s[ch][i][sblock] = 0.0;
	}



	/*
		reset the r, phi_sav "ring buffer" indices
	*/
	old = 1 - (new_ = oldest = 0);



	/*
		reset states used in unpredictability measure
	*/
	for (ch = 0;  ch < 2;  ch++)
		for (i = 0;  i < 2;  i++)
			for (j = 0;  j < 6;  j++)
			{
				      r[ch][i][j] = 0.0;
				phi_sav[ch][i][j] = 0.0;
			}



	for (ch = 0;  ch < 2;  ch++)
		use_long_blocks[ch] = TRUE;



	/*
		calculate HANN window coefficients
	*/
    for (i = 0;  i < BLKSIZE;  i++)
		window[i] = 0.5 * (1 - cos (2.0 * PI * (i-0.5) / BLKSIZE));

    for (i = 0;  i < BLKSIZE_s;  i++)
		window_s[i] = 0.5 * (1 - cos (2.0 * PI * (i-0.5) / BLKSIZE_s));



	/*
		read layer 3 specific parameters
	*/
    L3para_read (sfreq_idx);



	/*
		some speed up - was done again and again in psycho_anal()
		Set unpredicatiblility of remaining spectral lines to 0.4
	*/
	for (i = 206;  i < HBLKSIZE;  i++)
		cw[i] = 0.4;
}





/*	========================================================================================  */
/*      psycho_anal                                                                           */
/*	========================================================================================  */
									
void psycho_anal
(
	FLOAT					*buffer,
	int						buffer_idx,
	int						ch,
	int						lay,
	double					ratio_d[SBMAX_l],
	double					ratio_ds[SBMAX_s][3],
	double					*pe,
	gr_info					*cod_info
)
{
	unsigned int			b, j, k, idx, fin;
	double					r_prime, phi_prime;
	double					/*temp1, temp2,*/ temp3;
	FLOAT					*s3_ptr;

	int						sfb, sblock;

	double					thr[CBANDS];
	FLOAT					cb[CBANDS];
	FLOAT					wsamp_r[HBLKSIZE];
	FLOAT					wsamp_i[HBLKSIZE];
	FLOAT					energy[HBLKSIZE];
	FLOAT					phi[6];



	for (j = 0;  j < SBMAX_l;  j++)
		ratio_d[j] = ratio[ch][j];

	for (j = 0;  j < SBMAX_s;  j++)
		for (sblock = 0;  sblock < 3;  sblock++)
			ratio_ds[j][sblock] = ratio_s[ch][j][sblock];
	


	/*
		flip the r, phi_sav "ring buffer" indices
	*/
	if (ch == 0)
		old = 1 - (new_ = oldest = old);



	/*
		compute unpredicatability of first six spectral lines
	*/
#if 0
	for (j = 0, k = 0, idx = 0;  j < BLKSIZE/2;  j++)
	{
		wsamp_r[j] = window[k++] * buffer[idx++];
		wsamp_i[j] = window[k++] * buffer[idx++];
	}
#else
	j = 0;  k = 0;
	idx = buffer_idx;
	fin = (idx+BLKSIZE) & 2047;
	if (idx >= fin)
	{
		while (idx < 2048)
		{
			wsamp_r[j] = window[k++] * buffer[idx++];
			wsamp_i[j] = window[k++] * buffer[idx++];
			j++;
		}
		idx = 0;
	}
	while (idx < fin)
	{
		wsamp_r[j] = window[k++] * buffer[idx++];
		wsamp_i[j] = window[k++] * buffer[idx++];
		j++;
	}
#endif

	fft(wsamp_r, wsamp_i, energy, phi, BLKSIZE);  /* long FFT */

	for (j = 0;  j < 6;  j++)
	{
		/*
			calculate unpredictability measure cw
		*/
		double	r1, phi1;
		                    r_prime = 2.0 *       r[ch][old][j] -       r[ch][oldest][j];
		                  phi_prime = 2.0 * phi_sav[ch][old][j] - phi_sav[ch][oldest][j];
		      r[ch][new_][j] =   r1 = sqrt((double) energy[j]);
		phi_sav[ch][new_][j] = phi1 =                  phi[j];

		temp3 = r1 + fabs(r_prime);
		if (temp3 != 0.0)
		{
#if 0
			temp1 = r1 * cos(phi1) - r_prime * cos(phi_prime);
			temp2 = r1 * sin(phi1) - r_prime * sin(phi_prime);
			cw[j] = sqrt(temp1*temp1 + temp2*temp2) / temp3;
#else
			/*
				Lets take a look at the argument to the square root

						temp1^^2 + temp2^^2

					=		r1^^2 * cos^^2(phi1) + r_prime^^2 * cos^^2(phi_prime) - 2 * r1 * r_prime * cos(phi1) * cos(phi_prime)
						+	r1^^2 * sin^^2(phi1) + r_prime^^2 * sin^^2(phi_prime) - 2 * r1 * r_prime * sin(phi1) * sin(phi_prime)

					=	r1^^2  +  r_prime^^2  -  2 * r1 * r_prime * (cos(phi1) * cos(phi_prime) + sin(phi1) * sin(phi_prime))

					=	energy[j]  +  r_prime^^2  -  2 * r1 * r_prime * cos(phi1 - phi_prime)

					=	energy[j]  +  r_prime * (r_prime -  2 * r1 * cos(phi1 - phi_prime))

				Computing the last expression is faster, since we dont need to calculate temp1 and temp2 ...
			*/

			cw[j] = sqrt(energy[j] + r_prime * (r_prime - 2 * r1 * cos(phi1 - phi_prime))) / temp3;
#endif
		}
		else
			cw[j] = 0;
	}



	/*
		compute unpredictability of next 200 spectral lines
	*/
	for (sblock = 0;  sblock < 3;  sblock++)
	{
#if 0
		for (j = 0, k = 0, idx = 128*(2+sblock);  j < BLKSIZE_s/2;  j++)
		{	/* window data with HANN window */
			wsamp_r[j] = window_s[k++] * buffer[idx++];
			wsamp_i[j] = window_s[k++] * buffer[idx++];
		}
#else
		j = 0;  k = 0;
		idx = (buffer_idx + 128*(2+sblock)) & 2047;
		fin = (idx+BLKSIZE_s) & 2047;
		if (idx >= fin)
		{
			while (idx < 2048)
			{
				wsamp_r[j] = window_s[k++] * buffer[idx++];
				wsamp_i[j] = window_s[k++] * buffer[idx++];
				j++;
			}
			idx = 0;
		}
		while (idx < fin)
		{
			wsamp_r[j] = window_s[k++] * buffer[idx++];
			wsamp_i[j] = window_s[k++] * buffer[idx++];
			j++;
		}
#endif

		fft(wsamp_r, wsamp_i, energy_s[sblock], phi_s[sblock], BLKSIZE_s);  /* short FFT*/
	}
 
	for (j = 6, k = 2;  j < 206;  j += 4, k++)
	{
		/*
			calculate unpredictability measure cw
		*/
		double	r1, phi1;

#if 0   /* original dist10 */
	#define _OLDEST 2
	#define _OLD    0
	#define _NEW    1
#else
	#define _OLDEST 0
	#define _OLD    1
	#define _NEW    2
#endif

	 	  r_prime = 2.0 * sqrt((double) energy_s[_OLD][k]) - sqrt((double) energy_s[_OLDEST][k]);
		phi_prime = 2.0 *                  phi_s[_OLD][k]  -                  phi_s[_OLDEST][k];
		       r1 = sqrt((double) energy_s[_NEW][k]);
		     phi1 =                  phi_s[_NEW][k];

		temp3 = r1 + fabs(r_prime);
		if (temp3 != 0.0)
		{
#if 0
			temp1 = r1 * cos(phi1) - r_prime * cos(phi_prime);
			temp2 = r1 * sin(phi1) - r_prime * sin(phi_prime);
			cw[j] = sqrt(temp1*temp1 + temp2*temp2) / temp3;
#else
			cw[j] = sqrt(energy_s[_NEW][k] + r_prime * (r_prime - 2 * r1 * cos(phi1 - phi_prime))) / temp3;
#endif
		}
		else
			cw[j] = 0.0;

		cw[j+1] = cw[j+2] = cw[j+3] = cw[j];
	}



	j = 0;
	for (b = 0;  b < cbmax_l;  b++)
	{
		eb[b] = 0.0;
		cb[b] = 0.0;

		/*
			Calculate the energy and the unpredictability in the threshold
			calculation partitions

			cbmax_l holds the number of valid numlines_l entries
		*/
		k = numlines_l[b];
		do {
			eb[b] += energy[j];
			cb[b] += cw[j] * energy[j];
		} while (j++, --k);
	}


	*pe = 0.0;

	s3_ptr = normed_s3_l;
	for (b = 0;  b < cbmax_l;  b++)
	{
		FLOAT					nb;
		FLOAT					ecb = 0.0;
		double					ctb = 0.0;
		double					SNR_l;
		double					cbb, tbb;

		/*
			convolve the partitioned energy and unpredictability
			with the spreading function, normed_s3_l[b][k]
		*/
		for (k = lo_s3_l[b];  k < hi_s3_l[b];  k++)
		{
			ecb += *s3_ptr   * eb[k];  /* sprdngf for Layer III */
			ctb += *s3_ptr++ * cb[k];
		}


		if (ecb != 0.0)
		{
			/*
				calculate the tonality of each threshold calculation partition,
				calculate the SNR in each threshhold calculation partition
			*/
			cbb = ctb/ecb;
			if (cbb < 0.01)
				cbb = 0.01;
			tbb = -0.299 - 0.43 * log(cbb);  /* conv1=-0.299, conv2=-0.43 */

			/*
				Limiting tbb to be less than or equal to 1 is the same as 
				limiting TMN*tbb + NMT*(1-tbb) to be less than or equal to TMN.

				Limiting tbb to be greater than or equal to 0 is a fault of ISO
				because this leads always to SNR_l >= NMT.
				Therefore, most of the minVal table is trash.

				minval: "Im your minimum value, there should be no other minima beside me..."

				Keep this faulty state --- it sounds a bit better!  2000-03-13 AP
			*/

#if 1 /* 0 lets minval be THE minimum value */
			if (tbb < 0.0)  tbb = 0.0;
#endif

/* TMN=29.0,NMT=6.0 for all calculation partitions */
#define 	NMT		 6.0
#define		TMN 	29.0

			SNR_l = (TMN - NMT) * tbb + NMT;
			SNR_l = maximum (minval[b], minimum (SNR_l, TMN));
		

			/*
				calculate the threshold for the partition
			*/
		    nb = ecb * exp(-SNR_l * LN_TO_LOG10);   /* our ecb is already normed */
		}
		else
			nb = 0.0;  /* cbb==0 => -0.299-0.43*cbb<0 => tbb=0 => nb=0 */

		/*
			pre-echo control
		*/
		thr[b] = maximum (qthr_l[b], minimum (nb, nb_2[ch][b]));
		nb_2[ch][b] = minimum (2.0 * nb, 16.0 * nb_1[ch][b]);
	    nb_1[ch][b] = nb;


		/*
			calculate perceptual entropy

			thr[b] -> thr[b]+1.0 : for non sound partition
		*/
		if (eb[b] > thr[b])
			*pe += numlines_l[b] * log ((eb[b]+1.0) / (thr[b]+1.0));
	}



#define switch_pe  1800
	


	if (*pe < switch_pe)
	{
		/* no attack : use long blocks */
		if (use_long_blocks[ch])
			cod_info->block_type = NORM_TYPE;
		else
		{
			cod_info->block_type = STOP_TYPE;
			use_long_blocks[ch] = TRUE;
		}

		/*
			threshold calculation (part 2)
		*/
		for (sfb = 0;  sfb < SBMAX_l;  sfb++)
		{
			int		bu = bu_l[sfb];
			int		bo = bo_l[sfb];
			double	en = w1_l[sfb] * eb[bu] + w2_l[sfb] * eb[bo];
			for (b = bu+1;  b < bo;  b++)
				en += eb[b];

			if (en != 0.0)
			{
				double	thm = w1_l[sfb] * thr[bu] + w2_l[sfb] * thr[bo];
				for (b = bu+1;  b < bo;  b++)
					thm += thr[b];

				ratio[ch][sfb] = thm/en;
			}
			else
				ratio[ch][sfb] = 0.0;
		}
	}
	else
	{
		/* attack : use short blocks */
		if (use_long_blocks[ch])
		{
			cod_info->block_type = START_TYPE;
			use_long_blocks[ch] = FALSE;
		}
		else
			cod_info->block_type = SHORT_TYPE;

		/* threshold calculation for short blocks */

		for (sblock = 0;  sblock < 3;  sblock++)
		{
			j = 0;
			for (b = 0;  b < cbmax_s;  b++)
			{
				eb[b] = 0.0;

				/*
					Calculate the energy and the unpredictability in the threshold
					calculation partitions

					cbmax_s holds the number of valid numlines_s entries
				*/
				k = numlines_s[b];
				do {
					eb[b] += energy_s[sblock][j];
				} while (j++, --k);
			}

			s3_ptr = normed_s3_s;
			for (b = 0;  b < cbmax_s;  b++)
			{
				FLOAT					nb;
				FLOAT					ecb = 0.0;

				for (k = lo_s3_s[b];  k < hi_s3_s[b];  k++)
					ecb += *s3_ptr++ * eb[k];

				nb = ecb /* * norm_s[b] */ * exp((double) SNR_s[b] * LN_TO_LOG10);
				thr[b] = maximum(qthr_s[b], nb);
			}

			for (sfb = 0;  sfb < SBMAX_s;  sfb++)
			{
				int		bu = bu_s[sfb];
				int		bo = bo_s[sfb];
				double	en = w1_s[sfb] * eb[bu] + w2_s[sfb] * eb[bo];
				for (b = bu+1;  b < bo;  b++)
					en += eb[b];
				if (en != 0.0)
				{
					double	thm = w1_s[sfb] * thr[bu] + w2_s[sfb] * thr[bo];
					for (b = bu+1;  b < bo;  b++)
						thm += thr[b];

					ratio_s[ch][sfb][sblock] = thm/en;
				}
				else
					ratio_s[ch][sfb][sblock] = 0.0;
			}
		}
	} 
	

	cod_info->window_switching_flag = (cod_info->block_type != NORM_TYPE);
	cod_info->mixed_block_flag = 0;
}





/*	========================================================================================  */
/*      L3para_read                                                                           */
/*	========================================================================================  */

void L3para_read (int sfreq_idx)
{
	double					*bval_l, *bval_s;
	l3_parm_block			*parm;	


	parm = l3_parm + sfreq_idx;


	/*
		Set long block data pointer
	*/
	cbmax_l    = parm->long_data.cbmax_l;
	numlines_l = parm->long_data.numlines_l;
	minval     = parm->long_data.minval;
	qthr_l     = parm->long_data.qthr_l;
	bval_l     = parm->long_data.bval_l;

	/*
		Compute the normed spreading function norm_l[i] * s3_l[i][j]
	*/
	spread_your_wings (cbmax_l, bval_l, normed_s3_l, lo_s3_l, hi_s3_l);


	/*
		Set short block data pointer
	*/
	cbmax_s    = parm->short_data.cbmax_s;
	numlines_s = parm->short_data.numlines_s;
	qthr_s     = parm->short_data.qthr_s;
	SNR_s      = parm->short_data.SNR_s;
	bval_s     = parm->short_data.bval_s;

	/*
		Compute the normed spreading function norm_s[i] * s3_s[i][j]
	*/
	spread_your_wings (cbmax_s, bval_s, normed_s3_s, lo_s3_s, hi_s3_s);


	/*
		Set long block data pointer for converting threshold
		calculation partitions to scale factor bands
	*/
	cbw_l = parm->long_thres.cbw_l;
	bu_l  = parm->long_thres.bu_l;
	bo_l  = parm->long_thres.bo_l;
	w1_l  = parm->long_thres.w1_l;
	w2_l  = parm->long_thres.w2_l;


	/*
		Set short block data pointer for converting threshold
		calculation partitions to scale factor bands
	*/
	cbw_s = parm->short_thres.cbw_s;
	bu_s  = parm->short_thres.bu_s;
	bo_s  = parm->short_thres.bo_s;
	w1_s  = parm->short_thres.w1_s;
	w2_s  = parm->short_thres.w2_s;
}





/*	========================================================================================  */
/*      spread your wings                                                        [Queen]      */
/*	========================================================================================  */
/*
	Compute the normed spreading function,
	the normed value of the spreading function,
	centered at band j, for band i, store for later use

	Since this is a band matrix, we store only the non-zero entries
	in linear order in the single dimension array normed_s3.

	The array has to be accessed in linear order, too, starting with line 0,
	up to line cbmax-1. For line b, the current entries represent

		norm[b] * s3[b][lo_s3[b]]  ...  norm[b] * s3[b][hi_s3[b]-1]

	Normally, we could easily compute the norm [building the reciprocal of the line sum].
	Alas, dist10 uses the infinite band matrix to build the line sum, so our norm would
	differ at the last few lines too much.
*/

static	void spread_your_wings
(
	int						cbmax,			/* number of lines and rows           */
	const double			bval[],			/* input values to compute the matrix */
	FLOAT					s3_ptr[],		/* the resulting non-zero entries     */
	int						lo_s3[],
	int						hi_s3[]
)
{

	double					a0, zero_point, lo_limit, hi_limit;
	double					x, y;

	double					s3[CBANDS];
	double					line_sum;
	double					arg;
	int						i, j;
	int						non_zero_part;


	a0         = sqrt (250.0);   /* exact value instead of 15.811389 */
	zero_point = 7.5 / a0;       /* exact value instead of 0.474     */
	x          = (60.0 + a0) / 250.0;
	y          = 17.5 * sqrt (1.0 - 1.0 / (250.0 * x * x));
	lo_limit   = (7.5 - y) * x;
	hi_limit   = (7.5 + y) * x;      /* btw.  250 = 17.5^^2 - 7.5^^2 */


#if SHOW_S3_LIMIT   /* just in case youre curious */
	static	int				shown = 0;
	if (!shown)
	{
		printf ("\n  a0:  %.20f", a0);
		printf ("\n  zp:  %.20f", zero_point);
		printf ("\n  x :  %.20f", x);
		printf ("\n  y :  %.20f", y);
		printf ("\n  lo:  %.20f", lo_limit);
		printf ("\n  hi:  %.20f", hi_limit);
		printf ("\n  fz:  %.20f", -17.5 * sqrt(zero_point * zero_point + 1.0) + 7.5 * zero_point + a0);
		printf ("\n  fl:  %.20f", -17.5 * sqrt(lo_limit   * lo_limit   + 1.0) + 7.5 * lo_limit   + a0);
		printf ("\n  fh:  %.20f", -17.5 * sqrt(hi_limit   * hi_limit   + 1.0) + 7.5 * hi_limit   + a0);
		printf ("\n\n");

		shown++;
	}
#endif

	for (i = 0;  i < cbmax;  i++)
	{
		non_zero_part = FALSE;
		hi_s3[i] = cbmax;   /* we preset this value for the case that the line ends with a non-zero entry */

		line_sum = 0;

		for (j = 0;  j < cbmax;  j++)
		{
			if (j >= i)  arg = (bval[i] - bval[j]) * 3.0;
			else         arg = (bval[i] - bval[j]) * 1.5;

			if (arg > 0.5  &&  arg < 2.5)
				x = 8.0 * (arg - 0.5) * (arg - 2.5);
			else
				x = 0.0;

			arg += zero_point;

			if (arg > lo_limit  &&  arg < hi_limit)
			{
				y = 17.5 * sqrt(arg * arg + 1.0) - 7.5 * arg - a0;
				s3[j] = exp((x - y) * LN_TO_LOG10);
				line_sum += s3[j];

				if (! non_zero_part)
				{
					lo_s3[i] = j;
					non_zero_part = TRUE;   /* the first non-zero entry ends the non_zero_part */
				}
			}
			else   /* otherwise y choosen like above is greater than or equal to 60 [flipped sign!] */
			{
				if (non_zero_part)   /* only zeroes will follow */
				{
					hi_s3[i] = j;
					break;   /* so cut the computing for this line */
				}
			}
		}

		for (j = lo_s3[i];  j < hi_s3[i];  j++)
			*s3_ptr++ = s3[j] / line_sum;
	}
}





static	void fft
(
	FLOAT					x_real[],
	FLOAT					x_imag[],
	FLOAT					energy[],
	FLOAT					phi[],
	int						N_ORG
)
{
	int						i, j, k, off;
	int						ip, le, le1;
	int						N;
	int						*pSwap, a, b;

	double					t_real, t_imag, u_real, u_imag;
	double					t1, t2, t3, t4, t5, t6;

	static double			w_real[BLKSIZE/2], w_imag[BLKSIZE/2];

	static	int				swap_l[BLKSIZE/2+1];
	static	int				swap_s[BLKSIZE_s/2+1];

	static	int				fInit_fft = 0;



	if (fInit_fft == 0)
	{
		for (i = 0;  i < BLKSIZE/2;  i++)
		{
			w_real[i] =  cos (PI * i / (BLKSIZE/2));
			w_imag[i] = -sin (PI * i / (BLKSIZE/2));
		}

		j = 0;
		for (i = 0;  i < BLKSIZE/2-1;  i++)
		{
			swap_l[i] = j;  k = BLKSIZE/4;  while (k <= j) {j -= k;  k >>= 1;}  j += k;
		}
		swap_l[i] = i;  swap_l[i+1] = i+1;

		j = 0;
		for (i = 0;  i < BLKSIZE_s/2-1;  i++)
		{
			swap_s[i] = j;  k = BLKSIZE_s/4;  while (k <= j) {j -= k;  k >>= 1;}  j += k;
		}
		swap_s[i] = i;  swap_s[i+1] = i+1;

		fInit_fft++;
	}



	/*
		The real sequence is prepacked to the half length
	*/
	N = N_ORG >> 1;



	off = BLKSIZE/N;
	for (le = N;  le > 1;  le = le1)
	{
		le1 = le >> 1;

			/* special case: k=0; u_real=1.0; u_imag=0.0; j=0 */
			for (i = 0;  i < N;  i += le)
			{
				ip = i + le1;
				t_real = x_real[i] - x_real[ip];
				t_imag = x_imag[i] - x_imag[ip];
				x_real[i] += x_real[ip];
				x_imag[i] += x_imag[ip];
				x_real[ip] = t_real;
				x_imag[ip] = t_imag;
			}

		k = off;
		for (j = 1;  j < le1;  j++)
		{
			u_real = w_real[k];
			u_imag = w_imag[k];
			for (i = j;  i < N;  i += le)
			{
				ip = i + le1;
				t_real = x_real[i] - x_real[ip];
				t_imag = x_imag[i] - x_imag[ip];
				x_real[i] += x_real[ip];
				x_imag[i] += x_imag[ip];
				x_real[ip] = t_real*u_real - t_imag*u_imag;
				x_imag[ip] = t_imag*u_real + t_real*u_imag;
			}
			k += off;
		}

		off += off;
	}



	/*
		We dont reorder the data to the correct ordering,
		but access the data by the bit reverse order index array.
	*/
	pSwap = (N_ORG == BLKSIZE) ? swap_l : swap_s;



	/*
		unpacking the sequence
	*/
	t_real = x_real[0];
	t_imag = x_imag[0];

	x_real[0] = t_real+t_imag;
	x_imag[0] = 0.0;
	x_real[N] = t_real-t_imag;
	x_imag[N] = 0.0;

	k = off = BLKSIZE/N_ORG;
	for (i = 1;  i < N/2;  i++)
	{
		a = pSwap[i];
		b = pSwap[N-i];

		t1 = x_real[a] + x_real[b];
		t2 = x_real[a] - x_real[b];
		t3 = x_imag[a] + x_imag[b];
		t4 = x_imag[a] - x_imag[b];
		t5 = t2*w_imag[k] + t3*w_real[k];
		t6 = t3*w_imag[k] - t2*w_real[k];

		x_real[a] = (t1+t5) / 2.0;
		x_imag[a] = (t6+t4) / 2.0;
		x_real[b] = (t1-t5) / 2.0;
		x_imag[b] = (t6-t4) / 2.0; 

		k += off;
	}

	/* x_real[N/2] doesnt change */
	/* x_real[N/2] changes the sign in case of a normal fft */
	x_imag[pSwap[i]] *= -1.0;



	/*
		calculating the energy and phase, phi
	*/
	for (i = 0;  i <= N;  i++)
	{
		a = pSwap[i];

		energy[i] = x_real[a]*x_real[a] + x_imag[a]*x_imag[a];
		if (energy[i] <= 0.0005)
		{
			energy[i] = 0.0005;        /* keep the identity */
			x_real[a] = sqrt(0.0005);  /* sqrt(energy[i]) * cos(phi[i]) == x_real[i] */
			x_imag[a] = 0.0;           /* sqrt(energy[i]) * sin(phi[i]) == x_imag[i] */
		}

		if ((N_ORG == BLKSIZE  &&  i < 6)  ||  (N_ORG != BLKSIZE  &&  i >= 2  &&  i < 52))
			phi[i] = atan2 ((double) x_imag[a], (double) x_real[a]);
	}
}





