/*---------------------------------------------------------------------------*\
	vtecho_fp.c
	Echo canceller kernel module using floating point echo canceller
	Author: Ben Kramer, 14 February 2006
	        Ron Lee, 16 July 2006

\*---------------------------------------------------------------------------*/

/*---------------------------------------------------------------------------*\

	Copyright (C) 2005, 2006 Voicetronix www.voicetronix.com.au

	This library is free software; you can redistribute it and/or
	modify it under the terms of the GNU General Public License
	as published by the Free Software Foundation; either version 2
	of the License, or (at your option) any later version.

	This library is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
	General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this library; if not, write to the Free Software
	Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
	MA  02110-1301  USA

\*---------------------------------------------------------------------------*/
/*---------------------------------------------------------------------------*\
\*---------------------------------------------------------------------------*/

/* Driver constants */
#define DRIVER_DESCRIPTION  "VoiceTronix floating point echo canceller"
#define DRIVER_AUTHOR       "VoiceTronix <support@voicetronix.com.au>"

#define NAME	"vtecho_fp"

#include <linux/init.h>
#include <linux/module.h>
#include <linux/proc_fs.h>

#include "vtcommon.h"
#include "vtmodule.h"
#include "vtecho_fp.h"


static struct vtecho reg;


/* macros to save/restore FPU, this allows floats to be used in ISR */
/* ref: http://www.aero.polimi.it/~rtai/documentation/articles/paolo-floatingpoint.html */

#define save_cr0_and_clts(x) __asm__ __volatile__ ("movl %%cr0,%0; clts" :"=r" (x))
#define restore_cr0(x)       __asm__ __volatile__ ("movl %0,%%cr0": :"r" (x));
#define save_fpenv(x)        __asm__ __volatile__ ("fnsave %0" : "=m" (x))
#define restore_fpenv(x)     __asm__ __volatile__ ("frstor %0" : "=m" (x));

inline double fabs(double x) { return __builtin_fabs(x); }


static int proc_read_adaptcount(char *page, char **start, off_t off,
                                int count, int *eof, void *data)
{ //{{{
	ECHO_SV *sv = data;
	int len = 0;

        //len += scnprintf(page + len, count - len, "%ld\n", sv->adapt_count);
        len += scnprintf(page + len, count - len,
                         "adapts[%ld] CUTOFF[%d] okcount[%d]\n",
                         sv->adapt_count, 130, sv->ok_count);
        len += scnprintf(page + len, count - len,
                         "Ls[%d] < Ly*Minus6DB[%d](Ly[%d]) : Le2[%d] < Le*MINUS3DB[%d] \n",
                         (int)sv->Ls, (int)(sv->Ly*MINUS6DB), (int)sv->Ly, (int)sv->Le2,
                         (int)(sv->Le*MINUS3DB)); //Le2 < MINUS3DB*Le
        if( len > 0 ) *eof = 1;
	return len;
} //}}}


void vtecho_fp_open(struct channel *chan)
{ //{{{
	ECHO_SV *sv = kzalloc(sizeof(ECHO_SV), GFP_KERNEL);
	int i;

	if( ! sv ){
		crit("FAILED to allocate state memory");
		return;
	}
	sv->echoy = kmalloc(sizeof(float) * (FRAME + ECHO_TAPS), GFP_KERNEL);
	if( ! sv->echoy ){
		crit("FAILED to allocate input buffer");
		kfree(sv);
		return;
	}
	for(i=0; i < ECHO_TAPS; ++i){
		sv->a[i] = 0.0;
		sv->a2[i] = 0.0;
		sv->a3[i] = 0.0;
		sv->echoy[i] = 0.0;
		sv->memLy[i] = 0.0;
	}
	for (; i < ECHO_TAPS + FRAME; ++i){
		sv->echoy[i] = 0.0;
	}

	sv->Ly = 130.0;
	sv->Le = 130.0;
	sv->Le2 = 130.0;
	sv->Le3 = 130.0;
	sv->Ls = 130.0;
	sv->lyi = 0;
	sv->hang = 0;
	sv->adapt = 1;
	sv->sup = 0;
	sv->dt = 0;
	sv->beta = BETA;
	sv->ok_count = 0;
	sv->Ey = 1.0;
	sv->y_oldest = 0.0;
	sv->seed = 1000;
	sv->adapt_count = 0;

	if( chan->procfs_root ) {
		struct proc_dir_entry *p = create_proc_read_entry("echo_adapt_count",
								  0444,
								  chan->procfs_root,
								  proc_read_adaptcount,
								  sv);
		if (p) {
			set_proc_owner(p, THIS_MODULE);
		} else
			crit("failed to create procfs echo_adapt_count");
	}
	chan->echocan = sv;
} //}}}

void vtecho_fp_close(struct channel *chan)
{ //{{{
	ECHO_SV *sv = chan->echocan;

	if (sv != NULL){
		if( chan->procfs_root ) {
			remove_proc_entry("echo_adapt_count", chan->procfs_root);
		}
		kfree(sv->echoy);
		kfree(sv);
		chan->echocan = NULL;
	}
} //}}}


static inline float float_dotprod_vanilla(float a[], float y[],unsigned int n)
{ //{{{
	float *ap = a, *yp = y, *ep = a + (n<<2);
	float inner = 0.0;
	for(; ap != ep; ++ap, ++yp) inner += *ap * *yp;
	return inner;
} //}}}

static inline void update_loop(float a[], float x, float y[], int n)
{ //{{{
	float *ap = a, *yp = y, *ep = a + n;
	for(; ap != ep; ++ap, ++yp) *ap += x * *yp;
} //}}}

void vtecho_fp_proc(struct channel *chan, short *output, short *input,
					  short *reference, int samples)
{ //{{{
	/* 
	output (e) => is to be transmited out the card
	input (y) => is what we are about to use to model the reference signal(s), aka 
		what we are about to send out the card 
	reference (s) => is what we have just received from the card, which may have echo in it 
	samples => the number of linear samples in each array
	*/

	// state variables for FPU
	unsigned long cr0;
	unsigned long linux_fpe[27];

	ECHO_SV *sv = chan->echocan;
	float echo;
	int   i;
	int   k;
	float *a;
	float *a2;
	float *a3;
	float *echoy;
	float *y;
	float Ly;
	float Le;
	float Le2;
	float Le3;
	float Ls;
	int   lyi;
	int   hang;
	float Ey;
	int   adapt;
	float beta;
	int ok_count;
	float y_oldest;
	float e2;
	float x;
	float tmp;

	save_cr0_and_clts(cr0);
	save_fpenv(linux_fpe);

	a = sv->a;
	a2 = sv->a2;
	a3 = sv->a3;
	echoy = sv->echoy;
	Ly = sv->Ly;
	Le = sv->Le;
	Le2 = sv->Le2;
	Le3 = sv->Le3;
	Ls = sv->Ls;
	lyi = sv->lyi;
	hang = sv->hang;
	Ey = sv->Ey;
	adapt = sv->adapt;
	beta = sv->beta;
	ok_count = sv->ok_count;
	y_oldest = sv->y_oldest;

	// Copy samples to echoy
	for(i=0;i<samples;i++){
		echoy[i+ECHO_TAPS]=(float)input[i];
	}
	y = &echoy[ECHO_TAPS];

	for(i=0;i<samples;i++){
		// Update short term power estimates
		Ly = (1.0-ALPHA)*Ly + ALPHA*(fabs(y[i])+1);
		Ls = (1.0-ALPHA)*Ls + ALPHA*(float)(abs(reference[i])+1);

		// model echo
		echo = float_dotprod_vanilla(a, &y[i-ECHO_TAPS+1], ECHO_TAPS/4);

		// take echo from reference and place as output
		tmp = (float)reference[i] - echo;
		if (tmp > 32767.0) tmp = 32767.0;
		if (tmp < -32767.0) tmp = -32767.0;
		output[i] = (short)tmp;
		//ret = (int)(echo*100);

		echo = 0.0;
		// Model echo for adaptive echo model
		echo = float_dotprod_vanilla(a2, &y[i-ECHO_TAPS+1], ECHO_TAPS/4);
		e2 = (float)reference[i] - echo;
		if (e2 >32767.0) e2 = 32767.0;
		if (e2 <-32767.0) e2 = -32767.0;

		// update echo pwr short term ests, note these float
		// around and are not frozen during doubletalk

		Le2 = (1.0-ALPHA)*Le2 + ALPHA*(fabs(e2)+1);
		Le = (1.0-ALPHA)*Le + ALPHA*(fabs(tmp)+1);

		// update energy in filter est
		Ey = Ey + (y[i]*y[i]) - y_oldest*y_oldest;
		y_oldest = y[i-ECHO_TAPS+1];

		// update filter if:
		// Ls (ref pwr) is at least 3dB less than Ly (input pwr)
		// and Ly > certain minimum

		if ((Ls < Ly*MINUS3DB) && (Ly > 130.0)  && (adapt)) {

			// if adap error est is at least 3dB better than
			// fixed, see if it stays that way for WINDOW
			// samples

			#ifndef FORCE_ADAPT
			if (Le2 < MINUS3DB*Le) {
			#endif
				ok_count++;

				// half way thru, take snap shot of adap coeffs
				if (ok_count == WINDOW/2) {
					for(k=0; k<ECHO_TAPS; k++)
						a3[k] = a2[k];
					Le3 = Le2;
				}

				// weve made it - update fixed with adap coeffs
				if (ok_count == WINDOW) {
					for(k=0; k<ECHO_TAPS; k++)
						a[k] = a3[k];
					Le = Le3;
					ok_count = 0;
					sv->adapt_count++;
				}
			#ifndef FORCE_ADAPT
			}
			else {
				// didnt make it for the whole window, reset
				ok_count = 0;
			}
			#endif

			// adaption
			x = 2*beta*e2/(Ey+1000);
			update_loop(a2,x,&y[i-ECHO_TAPS+1],ECHO_TAPS);
		}
		else {
			// window must be continuous 
			ok_count = 0;
		}
	}

	// Move history back
	for(i=0; i < ECHO_TAPS; ++i){
		echoy[i] = echoy[i+FRAME];
	}

	sv->Ly = Ly;
	sv->Ls = Ls;
	sv->Le = Le;
	sv->Le2 = Le2;
	sv->Le3 = Le3;
	sv->hang = hang;
	sv->lyi = lyi;
	sv->ok_count = ok_count;
	sv->Ey = Ey;
	sv->y_oldest = y_oldest;

	restore_fpenv(linux_fpe);
	restore_cr0(cr0);
} //}}}


int __init vtechofp_init(void)
{ //{{{
	info(DRIVER_DESCRIPTION " " VT_VERSION " for linux " UTS_RELEASE);

	reg.owner = THIS_MODULE;
	strncpy(reg.desc, "Floating Point Echo Canceller", ID_DATA_MAXSIZE);
	reg.vtecho_open = vtecho_fp_open;
	reg.vtecho_close = vtecho_fp_close;
	reg.vtecho_proc = vtecho_fp_proc;
	vt_echo_register(&reg);

	return 0;
} //}}}

void __exit vtechofp_exit(void)
{ //{{{
	vt_echo_unregister(&reg);
	info("module exit");
} //}}}

module_init(vtechofp_init);
module_exit(vtechofp_exit);

MODULE_DESCRIPTION(DRIVER_DESCRIPTION);
MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_VERSION(VT_VERSION);
MODULE_LICENSE("GPL");

