/*
   This file is part of Numerix.  Numerix is free software; you can
   redistribute it and/or modify it under the terms of the GNU General
   Public License as published by the Free Software Foundation; either
   version 2 of the License, or (at your option) any later version.

   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
   USA 
*/

/* +------------------------------------------------------------------------+
   |                                                                        |
   |                      Entiers de longueur arbitraire                    |
   |                                                                        |
   |                        FFT modulo m = BASE^n+1                         |
   |                                                                        |
   +------------------------------------------------------------------------+ */


/* M. Quercia, 31/01/2001 */

#include "macros-s.h"

                     /* +--------------------+
                        |  c <- a + b mod m  |
                        +--------------------+ */


/* void xn(sc_add)(naturel a, naturel b, naturel c, longueur n) */
#ifdef have_sn_sc_add
ENTER(sn_sc_add)

	movl   arg4,%ecx
	incl   %ecx         /* ecx = n+1 */

	movl   arg1,%esi
	movl   arg2,%edi
	movl   arg3,%ebx
	leal   (%esi,%ecx,4),%esi /* esi -> fin de a */
	leal   (%edi,%ecx,4),%edi /* esi -> fin de b */
	leal   (%ebx,%ecx,4),%ebx /* esi -> fin de c */
	negl   %ecx
	clc
	.align 4

	/* addition ordinaire */
.Lsn_sc_add_digits:
	movl   (%esi,%ecx,4),%eax
	adcl   (%edi,%ecx,4),%eax
	movl   %eax,(%ebx,%ecx,4)
	incl   %ecx
	jne    .Lsn_sc_add_digits
	jnc    .Lsn_sc_add_exit

	/* si dbordement, retranche la partie haute - 1 */
	movl   $1,-4(%ebx)
	decl   %eax
	movl   arg3,%ebx
	sbbl   %eax,(%ebx)
	leal   4(%ebx),%ebx
	sbbl   $1,(%ebx)
	jnb    .Lsn_sc_add_exit
.Lsn_sc_add_ret:
	leal   4(%ebx),%ebx
	sbbl   $0,(%ebx)
	jb     .Lsn_sc_add_ret

EXIT(sn_sc_add)
#endif

                     /* +--------------------+
                        |  c <- a - b mod m  |
                        +--------------------+ */


/* void xn(sc_sub)(naturel a, naturel b, naturel c, longueur n) */
#ifdef have_sn_sc_sub
ENTER(sn_sc_sub)

	movl   arg4,%ecx
	incl   %ecx         /* ecx = n+1 */

	movl   arg1,%esi
	movl   arg2,%edi
	movl   arg3,%ebx
	leal   (%esi,%ecx,4),%esi /* esi -> fin de a */
	leal   (%edi,%ecx,4),%edi /* esi -> fin de b */
	leal   (%ebx,%ecx,4),%ebx /* esi -> fin de c */
	negl   %ecx
	clc
	.align 4

	/* soustraction ordinaire */
.Lsn_sc_sub_digits:
	movl   (%esi,%ecx,4),%eax
	sbbl   (%edi,%ecx,4),%eax
	movl   %eax,(%ebx,%ecx,4)
	incl   %ecx
	jne    .Lsn_sc_sub_digits
	jnb    .Lsn_sc_sub_exit

	/* si dbordement, retranche la partie haute */
	movl   $0,-4(%ebx)
	negl   %eax
	movl   arg3,%ebx
	addl   %eax,(%ebx)
	jnc    .Lsn_sc_sub_exit
.Lsn_sc_sub_ret:
	leal   4(%ebx),%ebx
	adcl   $0,(%ebx)
	jc     .Lsn_sc_sub_ret

EXIT(sn_sc_sub)
#endif

                      /* +------------------+
                         |  c <- a*b mod m  |
                         +------------------+ */

/* void xn(sc_mul)(naturel a, naturel b, naturel c,  longueur n) */
#ifdef have_sn_sc_mul
ENTER(sn_sc_mul)

	/* rserve 2n+2 mots dans la pile pour calculer le produit */
	movl   arg4,%ecx
	incl   %ecx
	movl   %ecx,%edx
	negl   %ecx
	leal   (%esp,%ecx,8),%esp

	/* calcule le produit ou le carr */
	movl   arg1,%esi
	movl   arg2,%edi
	pushl  %esp
	pushl  %edx
	pushl  %edi
	cmpl   %esi,%edi
	jne    .Lsn_sc_mulk

	call   sn_karasqr
	addl   $12,%esp
	jmp    .Lsn_sc_mulk_done

.Lsn_sc_mulk:
	pushl  %edx
	pushl  %esi
	call   sn_karamul
	addl   $20,%esp
.Lsn_sc_mulk_done:
	
	/* retranche ce qui dpasse base^n */
	movl   arg4,%ecx
	movl   arg3,%edi
	leal   (%esp,%ecx,4),%ebx   /* ebx -> buff[n]  */
	leal   (%ebx,%ecx,4),%esi   /* esi -> buff[2n] */
	leal   (%edi,%ecx,4),%edi   /* edi -> c[n]     */
	negl   %ecx
	movl   %ecx,%edx
	clc
	.align 4

.Lsn_sc_mul_sub_xy:
	movl   (%ebx,%ecx,4),%eax
	sbbl   (%esi,%ecx,4),%eax
	movl   %eax,(%edi,%ecx,4)
	incl   %ecx
	jne    .Lsn_sc_mul_sub_xy
	pushf                     /* sauve la retenue */

	/* ajoute ce qui dpasse base^2n */
	movl   %edx,%ecx
	movl   (%esi),%eax
	addl   %eax,(%edi,%ecx,4)
	incl   %ecx
	movl   4(%esi),%eax
	adcl   %eax,(%edi,%ecx,4)
	movl   $0,%eax
	jnc    .Lsn_sc_mul_add_z_done
.Lsn_sc_mul_add_z:
	incl   %ecx
	jz     .Lsn_sc_mul_add_z_done
	adcl   $0,(%edi,%ecx,4)
	jc     .Lsn_sc_mul_add_z
.Lsn_sc_mul_add_z_done:
	adcl   $0,%eax

	popf                    /* retenue soustraction */
	sbbl   $0,%eax
	jnb    .Lsn_sc_mul_finish

	/* total ngatif, ajoute base^n+1 */
	movl   %edx,%ecx
	movl   $0,%eax
	stc
.Lsn_sc_mul_inc:
	adcl   $0,(%edi,%ecx,4)
	jnc    .Lsn_sc_mul_finish
	incl   %ecx
	jne    .Lsn_sc_mul_inc
	incl   %eax

	/* dernier chiffre */
.Lsn_sc_mul_finish:
	movl   %eax,(%edi)

EXIT(sn_sc_mul)
#endif

                     /* +---------------------+
                        |  b <- a << k mod m  |
                        +---------------------+ */


/* 0 < k < n*HW */
/* void xn(sc_shift)(naturel a, naturel b, longueur k, longueur n) */
#ifdef have_sn_sc_shift
ENTER(sn_sc_shift)

	movl   a,%esi

	/* rserve n+2 mots dans la pile pour le dcalage */
	movl   arg4,%ecx
	incl   %ecx
	movl   %ecx,%edx
	negl   %ecx
	leal   -4(%esp,%ecx,4),%esp
	movl   %esp,%edi

	/* partie fractionnaire du dcalage */
	movl   arg3,%ecx
	andl   $31,%ecx
	jz     .Lsn_sc_shift_copy

	xorl   %eax,%eax
	.align 4
.Lsn_sc_shift_bits:
	movl   -4(%esi,%edx,4),%ebx
	shldl  %cl,%ebx,%eax
	movl   %eax,(%edi,%edx,4)
	movl   %ebx,%eax
	decl   %edx
	jne    .Lsn_sc_shift_bits
	shll   %cl,%eax
	movl   %eax,(%edi)
	jmp    .Lsn_sc_shift_shift_done

	/* compte divisible par 32 : recopie */
.Lsn_sc_shift_copy:
	movl   $0,(%edi,%edx,4)
	.align 4
.Lsn_sc_shift_copy1:
	movl   -4(%esi,%edx,4),%eax
	movl   %eax,-4(%edi,%edx,4)
	decl   %edx
	jne    .Lsn_sc_shift_copy1
.Lsn_sc_shift_shift_done:

/*
                rduction modulo base^n+1, p = k/32 < n

                    p         n+2
                <-----> <---------------->
               +-------+--------+--------+
         x =   |0 ... 0|    u   |    v   |  = 0..0:u - v mod m
               +-------+--------+--------+
                <--------------> <------>
                     n             p+2
*/

	movl  arg2,%esi           /* esi -> b */
	movl  arg3,%ecx
	shrl  $1,%ecx
	shrl  $1,%ecx
	shrl  $1,%ecx
	shrl  $1,%ecx             /* ecx = p  */
	shrl  $1,%ecx             /* ecx = p  */
	movl  arg4,%edx
	leal  (%esp,%edx,4),%edi  /* edi -> v[p] */
	movl  $0,(%esi,%edx,4)    /* b[n] = 0    */
	subl  %ecx,%edx           /* edx = n-p   */

	/* recopie le bas de -v */
	leal  (%esi,%ecx,4),%esi  /* esi -> b[p] */
	negl   %ecx
	clc
	jecxz .Lsn_sc_sc_shift_vlow_done
	.align 4
.Lsn_sc_sc_shift_vlow:
	movl   $0,%eax
	sbbl   (%edi,%ecx,4),%eax
	movl   %eax,(%esi,%ecx,4)
	incl   %ecx
	jne    .Lsn_sc_sc_shift_vlow
.Lsn_sc_sc_shift_vlow_done:

	/* u - haut(v) */
	popl   %eax
	sbbl   (%edi),%eax
	movl   %eax,(%esi)
	decl   %edx            /* edx = n-p-1 */
	jne    .Lsn_sc_shift_medium_p
	movl   4(%edi),%eax
	jmp    .Lsn_sc_shift_uhigh_done

.Lsn_sc_shift_medium_p:
	popl   %eax
	sbbl   4(%edi),%eax
	movl   %eax,4(%esi)
	decl   %edx            /* edx = n-p-2 */
	jz     .Lsn_sc_shift_uhigh1

	/* copie la fin de u */
	movl   %edx,%ecx
	movl   $2,%edx
	.align 4
.Lsn_sc_shift_uhigh:
	popl   %eax
	sbbl   $0,%eax
	movl   %eax,(%esi,%edx,4)
	incl   %edx
	loop   .Lsn_sc_shift_uhigh
.Lsn_sc_shift_uhigh1:
	movl   $0,%eax
.Lsn_sc_shift_uhigh_done:

	/* dernier chiffre de v (si p=n-1) et retenue de la soustraction */
	movl   arg2,%esi
	adcl   %eax,(%esi)
	jnc    .Lsn_sc_shift_exit
.Lsn_sc_shift_last:
	leal   4(%esi),%esi
	adcl   $0,(%esi)
	jc     .Lsn_sc_shift_last
	
EXIT(sn_sc_shift)
#endif
