/*
 *	for new GOGO-no-coda (1999/09)
 *	Copyright (C) 1999 shigeo
 *	special thanks to Keiichi SAKAI, URURI
 */

#define MIE
/*
** FFT and FHT routines
**  Copyright 1988, 1993; Ron Mayer
**  
**  fht(fz,n);
**      Does a hartley transform of "n" points in the array "fz".
**      
** NOTE: This routine uses at least 2 patented algorithms, and may be
**       under the restrictions of a bunch of different organizations.
**       Although I wrote it completely myself; it is kind of a derivative
**       of a routine I once authored and released under the GPL, so it
**       may fall under the free software foundation's restrictions;
**       it was worked on as a Stanford Univ project, so they claim
**       some rights to it; it was further optimized at work here, so
**       I think this company claims parts of it.  The patents are
**       held by R. Bracewell (the FHT algorithm) and O. Buneman (the
**       trig generator), both at Stanford Univ.
**       If it were up to me, I'd say go do whatever you want with it;
**       but it would be polite to give credit to the following people
**       if you use this anywhere:
**           Euler     - probable inventor of the fourier transform.
**           Gauss     - probable inventor of the FFT.
**           Hartley   - probable inventor of the hartley transform.
**           Buneman   - for a really cool trig generator
**           Mayer(me) - for authoring this particular version and
**                       including all the optimizations in one package.
**       Thanks,
**       Ron Mayer; mayer@acuson.com
**
*/

#include <math.h>
#include "common.h"
#include "haveunit.h"

#if 0
static FLOAT costab[20]=
    {
     .00000000000000000000000000000000000000000000000000,
     .70710678118654752440084436210484903928483593768847,
     .92387953251128675612818318939678828682241662586364,
     .98078528040323044912618223613423903697393373089333,
     .99518472667219688624483695310947992157547486872985,
     .99879545620517239271477160475910069444320361470461,
     .99969881869620422011576564966617219685006108125772,
     .99992470183914454092164649119638322435060646880221,
     .99998117528260114265699043772856771617391725094433,
     .99999529380957617151158012570011989955298763362218,
     .99999882345170190992902571017152601904826792288976,
     .99999970586288221916022821773876567711626389934930,
     .99999992646571785114473148070738785694820115568892,
     .99999998161642929380834691540290971450507605124278,
     .99999999540410731289097193313960614895889430318945,
     .99999999885102682756267330779455410840053741619428
    };
static FLOAT sintab[20]=
    {
     1.0000000000000000000000000000000000000000000000000,
     .70710678118654752440084436210484903928483593768846,
     .38268343236508977172845998403039886676134456248561,
     .19509032201612826784828486847702224092769161775195,
     .09801714032956060199419556388864184586113667316749,
     .04906767432741801425495497694268265831474536302574,
     .02454122852291228803173452945928292506546611923944,
     .01227153828571992607940826195100321214037231959176,
     .00613588464915447535964023459037258091705788631738,
     .00306795676296597627014536549091984251894461021344,
     .00153398018628476561230369715026407907995486457522,
     .00076699031874270452693856835794857664314091945205,
     .00038349518757139558907246168118138126339502603495,
     .00019174759731070330743990956198900093346887403385,
     .00009587379909597734587051721097647635118706561284,
     .00004793689960306688454900399049465887274686668768
    };
#endif
extern void (*fht)(float *fz, int n);
void (*fft1k)(float *x_real, float *energy, float (*abx)[2], float *window, int *savebuf);
void (*fft256)(float *x_real, float *energy, float (*abx)[2], float *window, int *savebuf);


#if 0
/* define in fft.nas (8byte饤󤵤Ƥ) */
extern float costab_fft[20],sintab_fft[20];

void fht_C(float *fz, int n)
{
 int i,k,k1,k2,k3,k4,kx;
	float *fi,*fn,*gi;
	float t_c,t_s;
#if 0
	/* fft1k(), fft256()ذư */
 for (k1=1,k2=0;k1<n;k1++)
    {
		float a;
     for (k=n>>1; (!((k2^=k)&k)); k>>=1);
     if (k1>k2)
	{
	     a=fz[k1];fz[k1]=fz[k2];fz[k2]=a;
	}
    }
#endif
  for (fi=fz,fn=fz+n;fi<fn;fi+=4)
     {
		float f0,f1,f2,f3;
      f1     = fi[0 ]-fi[1 ];
      f0     = fi[0 ]+fi[1 ];
      f3     = fi[2 ]-fi[3 ];
      f2     = fi[2 ]+fi[3 ];
      fi[2 ] = (f0-f2);	
      fi[0 ] = (f0+f2);
      fi[3 ] = (f1-f3);	
      fi[1 ] = (f1+f3);
     }
 k=0;
	do{
		float s1,c1;
     k  += 2;
     k1  = 1  << k;
     k2  = k1 << 1;
     k4  = k2 << 1;
     k3  = k2 + k1;
     kx  = k1 >> 1;
	 fi  = fz;
	 gi  = fi + kx;
	 fn  = fz + n;
		do{
			float g0,f0,f1,g1,f2,g2,f3,g3;
	     f1      = fi[0 ] - fi[k1];
	     f0      = fi[0 ] + fi[k1];
	     f3      = fi[k2] - fi[k3];
	     f2      = fi[k2] + fi[k3];
	     fi[k2]  = f0	  - f2;
	     fi[0 ]  = f0	  + f2;
	     fi[k3]  = f1	  - f3;
	     fi[k1]  = f1	  + f3;
	     g1      = gi[0 ] - gi[k1];
	     g0      = gi[0 ] + gi[k1];
	     g3      = SQRT2  * gi[k3];
	     g2      = SQRT2  * gi[k2];
	     gi[k2]  = g0	  - g2;
	     gi[0 ]  = g0	  + g2;
	     gi[k3]  = g1	  - g3;
	     gi[k1]  = g1	  + g3;
	     gi     += k4;
	     fi     += k4;
	    } while (fi<fn);

		t_c = costab_fft[k];
		t_s = sintab_fft[k];
     c1 = 1;
     s1 = 0;
		for (i=1;i<kx;i++){
			float c2,s2;
			float t = c1;
         c1 = t*t_c - s1*t_s;
         s1 = t*t_s + s1*t_c;
         c2 = c1*c1 - s1*s1;
         s2 = 2*(c1*s1);
			fn = fz + n;	/* ư */
	     fi = fz +i;
	     gi = fz +k1-i;
			do{
				float a,b,g0,f0,f1,g1,f2,g2,f3,g3;
		 b       = s2*fi[k1] - c2*gi[k1];
		 a       = c2*fi[k1] + s2*gi[k1];
		 f1      = fi[0 ]    - a;
		 f0      = fi[0 ]    + a;
		 g1      = gi[0 ]    - b;
		 g0      = gi[0 ]    + b;
		 b       = s2*fi[k3] - c2*gi[k3];
		 a       = c2*fi[k3] + s2*gi[k3];
		 f3      = fi[k2]    - a;
		 f2      = fi[k2]    + a;
		 g3      = gi[k2]    - b;
		 g2      = gi[k2]    + b;
		 b       = s1*f2     - c1*g3;
		 a       = c1*f2     + s1*g3;
		 fi[k2]  = f0        - a;
		 fi[0 ]  = f0        + a;
		 gi[k3]  = g1        - b;
		 gi[k1]  = g1        + b;
		 b       = c1*g2     - s1*f3;
		 a       = s1*g2     + c1*f3;
		 gi[k2]  = g0        - a;
		 gi[0 ]  = g0        + a;
		 fi[k3]  = f1        - b;
		 fi[k1]  = f1        + b;
		 gi     += k4;
		 fi     += k4;
		} while (fi<fn);
        }
    } while (k4<n);
}
#endif /* #if 0 */

void fht_3DN(float *fz,int n);
void fht_SSE(float *fz,int n);
void fht_FPU(float *fz,int n);	/* by  */
#ifdef USE_E3DN
void fht_E3DN(float *fz,int n);
#endif

void setup_fht(int useUNIT){
#ifdef USE_E3DN
	if(useUNIT & tE3DN){
		SETUP_DSP("use:fht_E3DN\n");
		fht=fht_E3DN;
	}else
#endif
	if(useUNIT & t3DN){
		SETUP_DSP("use:fht_3DN\n");
		fht=fht_3DN;
	}else
	if(useUNIT & tSSE){
		SETUP_DSP("use:fht_SSE\n");
		fht=fht_SSE;
	}else
	{
		SETUP_DSP("use:fht_FPU\n");
		fht=fht_FPU;
#if 0
		SETUP_DSP("use:fht_C\n");
		fht=fht_C;
#endif
	}
}

/*
 *	abx ax[i]=abx[i][0], bx[i]=abx[i][1]ȤʤäƤ
 *	¿ʬ®
 *      fht()ƽNǤˤۤ®ǤȤư
 *	fft()fft1k(),fft256()ʬΥby K.SAKAI
 */
#define	N 1024
void fft1k_C(float *x_real, float *energy, float (*abx)[2], float *window, int *savebuf)
{
	float	a,b;
	int	i, j, k, k1, k2;

	/* l3psy.cL3psycho_energy() */
	for( i = 0; i < N; i++ ){
		x_real[i] = window[i] * savebuf[i];
	}
	/* subs.cfht() */
	for(k1 = 1, k2 = 0; k1 < N; k1++){
		for(k = (N>>1); !((k2^=k)&k); k >>= 1);

		if (k1 > k2){
			a = x_real[k1];
			x_real[k1] = x_real[k2];
			x_real[k2] = a;
		}
	}

	fht(x_real, N);

 energy[0] = x_real[0] * x_real[0];
	abx[0][0] = abx[0][1] = x_real[0];

	/* l3psy.cǻȤΤ i==6 ޤǤʤΤ */
	for( i = 1, j = N-1; i < 6; i++, j-- ){
		a = abx[i][0] = x_real[i];
		b = abx[i][1] = x_real[j];
		energy[i]=(a*a + b*b) * 0.5;
		if (energy[i] < 0.0005){
			energy[i] = 0.0005;
			abx[i][0] = abx[i][1] = 0.022360679774997897; /* 0.0005 */
		}
	}
   
	for( ; i < N/2; i++, j-- ){
		a = x_real[i];
		b = x_real[j];
		energy[i]=(a*a + b*b) * 0.5;
#if 0	/* 礭ͤΤۤʤΤǾάƤ褤Ȼפ... */
		if (energy[i] < 0.0005){
     energy[i] = 0.0005;
   }
#endif
 }
 energy[N/2] = x_real[N/2] * x_real[N/2];
}
#undef	N

#define	N 256
void fft256_C(float *x_real, float *energy, float (*abx)[2], float *window, int *savebuf)
{
	float	a, b;
	int	i, j, k, k1, k2;

	/* l3psy.cL3psycho_energy() */
	for(i = 0; i < N; i++){
		x_real[i] = window[i] * savebuf[i];
	}
	/* subs.cfht() */
	for(k1 = 1, k2 = 0; k1 < N; k1++){
		for (k = (N>>1); !((k2^=k)&k); k >>= 1);

		if(k1 > k2){
			a = x_real[k1];
			x_real[k1] = x_real[k2];
			x_real[k2] = a;
		}
	}

	fht(x_real, N);

	energy[0] = x_real[0] * x_real[0];
	abx[0][0] = abx[0][1] = x_real[0];
   
	for (i=1,j=N-1;i<N/2;i++,j--){
		a = abx[i][0] = x_real[i];
		b = abx[i][1] = x_real[j];
		energy[i]=(a*a + b*b) * 0.5;
		if (energy[i] < 0.0005){
     energy[i] = 0.0005;
			abx[i][0] = abx[i][1] = 0.022360679774997897; /* 0.0005 */
   }
 }
	energy[N/2] = x_real[N/2] * x_real[N/2];
	abx[N/2][0] = abx[N/2][1] = x_real[N/2];
}
#undef	N

void fft1k_FPU(float *x_real, float *energy, float (*abx)[2], float *window, int *savebuf);
void fft256_FPU(float *x_real, float *energy, float (*abx)[2], float *window, int *savebuf);
void fft1k_SSE(float *x_real, float *energy, float (*abx)[2], float *window, int *savebuf);
void fft256_SSE(float *x_real, float *energy, float (*abx)[2], float *window, int *savebuf);
void fft1k_3DN(float *x_real, float *energy, float (*abx)[2], float *window, int *savebuf);
void fft256_3DN(float *x_real, float *energy, float (*abx)[2], float *window, int *savebuf);

void setup_fft(int useUNIT)
{
	if(useUNIT & t3DN){
		SETUP_DSP("use:fft1k_3DN\n");
		fft1k = fft1k_3DN;
		SETUP_DSP("use:fft256_3DN\n");
		fft256 = fft256_3DN;
	}else if(useUNIT & tSSE){
		SETUP_DSP("use:fft1k_SSE\n");
		fft1k = fft1k_SSE;
		SETUP_DSP("use:fft256_SSE\n");
		fft256 = fft256_SSE;
	}else{
#if 1
		SETUP_DSP("use:fft1k_FPU\n");
		fft1k = fft1k_FPU;
		SETUP_DSP("use:fft256_FPU\n");
		fft256 = fft256_FPU;
#else
		SETUP_DSP("use:fft1k_C\n");
		fft1k = fft1k_C;
		SETUP_DSP("use:fft256_C\n");
		fft256 = fft256_C;
#endif
	}
}

/*
 *	s != 0 ꤹ
 *	s & 3 == 0 ꤹ
 */

void fft_side_C( float in[2][1024], int s, float *ret)
{
	float energy;
	float a,b;
	int i,j;
#ifdef MIE
	/* energyϤʤ礭ͤäʤΤϾάƤ(¿ʬ) */

	energy = 0;

	for (i=s,j=1024-s;i<512;i++,j--){
		a = in[0][i] - in[1][i];
		b = in[0][j] - in[1][j];
		energy += a*a + b*b;
	}
	a = in[0][512] - in[1][512];
	*ret = energy * 0.25 + a * a * 0.5;
#else
	if(!s){
		energy = (in[0][0] - in[1][0]) * (in[0][0] - in[1][0])*0.5;
		s++;
	}else
		energy=0.0;
	for (i=s,j=1024-s;i<512;i++,j--){
		float t;
		a = in[0][i] - in[1][i];
		b = in[0][j] - in[1][j];
		t=(a*a + b*b)*0.25;

		if (t < 0.0005){
			t = 0.0005;
		}
		energy += t;
	}
	*ret = energy + (in[0][512] - in[1][512]) * (in[0][512] - in[1][512])*0.5;
#endif
}

void (*fft_side)( float in[2][1024], int s, float *ret);
void fft_side_3DN( float in[2][1024], int s, float *ret );
void fft_side_SSE( float in[2][1024], int s, float *ret );
#ifdef USE_E3DN
void fft_side_E3DN( float in[2][1024], int s, float *ret );
#endif

void setup_fft_side(int useUNIT){
#ifdef USE_E3DN
  if( useUNIT & tE3DN ){
    SETUP_DSP("use:fft_side_E3DN\n");
    fft_side = fft_side_E3DN;
  }else
#endif
  if( useUNIT & t3DN ){
    SETUP_DSP("use:fft_side_3DN\n");
    fft_side = fft_side_3DN;
  }else
  if( useUNIT & tSSE ){
    SETUP_DSP("use:fft_side_SSE\n");
    fft_side = fft_side_SSE;
  }else
  {
    SETUP_DSP("use:fft_side_C\n");
    fft_side = fft_side_C;
  }
}
