// file kernel/n/x86/burnikel.S: Burnikel-Ziegler division
/*-----------------------------------------------------------------------+
 |  Copyright 2005-2006, Michel Quercia (michel.quercia@prepas.org)      |
 |                                                                       |
 |  This file is part of Numerix. Numerix is free software; you can      |
 |  redistribute it and/or modify it under the terms of the GNU Lesser   |
 |  General Public License as published by the Free Software Foundation; |
 |  either version 2.1 of the License, or (at your option) any later     |
 |  version.                                                             |
 |                                                                       |
 |  The Numerix Library is distributed in the hope that it will be       |
 |  useful, but WITHOUT ANY WARRANTY; without even the implied warranty  |
 |  of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU  |
 |  Lesser General Public License for more details.                      |
 |                                                                       |
 |  You should have received a copy of the GNU Lesser General Public     |
 |  License along with the GNU MP Library; see the file COPYING. If not, |
 |  write to the Free Software Foundation, Inc., 59 Temple Place -       |
 |  Suite 330, Boston, MA 02111-1307, USA.                               |
 +-----------------------------------------------------------------------+
 |                                                                       |
 |                     Division de Burnikel et Ziegler                   |
 |                                                                       |
 +-----------------------------------------------------------------------*/


                               # +------------+
                               # |  Division  |
                               # +------------+

# entre :
#   a = naturel de longueur la     esi = &a, edx = la-lb
#   b = naturel de longueur lb     ebx = &b, ecx = lb
#   c = naturel de longueur la-lb  edi = &c
#
# contraintes : 
# deux <= lb < la, le bit de poids fort de b est non nul,
# a < BASE^(la-lb)*b
# a,b,c non confondus
#
# sortie :
# a <- a mod b
# c <- floor(a/b)
#
# registres modifis :
#   eax,ebx,ecx,edx,esi,edi,ebp <- ind.

#ifdef assembly_sn_burnidiv
#undef L
#define L(x) .Lsn_fburnidiv_##x
        ALIGN_32
#ifdef debug_burnidiv
.Lsn_fburnidiv_buggy:   
#else
.Lsn_fburnidiv:
#endif
        
        # petite division => fdiv_n2
        cmpl   $burnidiv_lim, %ecx
        jbe    .Lsn_fdiv_n2
        cmpl   $div_small_c_lim, %edx
        jbe    .Lsn_fdiv_n2

        # variables locales
        #undef _a_
        #undef _b_
        #undef _c_
        #undef _n_
        #undef _p_
        #undef _q_
        #undef _r_
        #undef _x_
        #define _x_  28(%esp)
        #define _a_  24(%esp)
        #define _b_  20(%esp)
        #define _c_  16(%esp)
        #define _n_  12(%esp)
        #define _p_   8(%esp)
        #define _q_   4(%esp)
        #define _r_    (%esp)

        # divise b en 2
        leal   28(,%ecx,4),%eax
        ALLOCA                          # rserve lb chiffres + 7 mots
        shrl   $1,      %ecx            # ecx <- p = lb/2
        movl   %ecx,    _p_
        adcl   $0,      %ecx            # ecx <- q = (lb+1)/2
        movl   %ecx,    _q_

        # dcoupe a en tranches de q chiffres
        movl   %edx,    %eax
        movl   %edx,    %ebp            # ebp <- la-lb
        xorl   %edx,    %edx            # edx:eax <- la-lb
        divl   %ecx                     # edx <- r, eax <- n
        testl  %edx,    %edx
        jnz    1f
        movl   %ecx,    %edx            # si r = 0, r <- q, n--
        decl   %eax
1:
        movl   %edx,    _r_
        movl   %eax,    _n_

        movl   %ebp,    %eax
        subl   %edx,    %eax            
        leal   (%esi,%eax,4), %esi      # esi <- &a[la-lb-r]
        leal   (%edi,%eax,4), %edi      # edi <- &c[la-lb-r]
        movl   %ebx,    _b_

        # boucle sur les tranches
        # arriver ici avec esi = &a, ebx = &b, edi = &c, edx = r
        ALIGN_4
L(tranche):

        # compare a1 et b1
        movl   %esi,    _a_
        movl   %edi,    _c_
        movl   _p_,     %ecx
        leal   (%esi,%ecx,4), %esi      # esi <- &a1
        leal   (%esi,%edx,4), %edi      # edi <- &a1[r]
        leal   (%ebx,%ecx,4), %ebx      # ebx <- &b1
        movl   _q_,     %ecx
1:
        movl -4(%edi,%ecx,4), %eax
        cmpl -4(%ebx,%ecx,4), %eax
        loope  1b
        jne    L(a1_ok)

        # si a1 = b1, c <- BASE^r - 1 et a1 <- a1 - b1*c
        xorl   %eax,    %eax
        movl   _q_,     %ecx
        cld;   REP(stosl)               # a1 <- a1 - BASE^r*b
        movl   _c_,     %edi
        movl   %edx,    %ecx            # ecx <- r
        movl   $-1,     %eax
        REP(stosl)                      # c <- BASE^r - 1
        movl   _q_,     %ecx            # ecx <- q
        call   .Lsn_finc_1              # a1 <- a1 + b
        adcl   %ecx,   (%esi)           # sauve la retenue
        jmp    L(div_done)

        # si a1 < b1, c <- floor(a1/b1), a1 <- a1 mod b1
        ALIGN_4
L(a1_ok):
        movl   _q_,     %ecx
        movl   _c_,     %edi
        call   .Lsn_fburnidiv           # effectue la division
L(div_done):

        # calcule c*b0
        movl   _b_,     %ebx
        movl   _p_,     %ecx
        movl   _c_,     %esi
        movl   _r_,     %edx
        leal   _x_,     %edi
        cmpl   %ecx,    %edx            # si r < p, change
        jae    1f
        xchgl  %esi,    %ebx
        xchgl  %edx,    %ecx
1:
        call   .Lsn_ftoommul            # x <- c*b0

        # a <- a0:r1 - x
        movl   _a_,     %esi
        movl   _p_,     %ecx
        movl   _q_,     %edx
        leal   _x_,     %ebx
        leal  1(%ecx,%edx,1), %edx      # edx <- lb+1
        addl   _r_,     %ecx            # ecx <- p+r
        call   .Lsn_fdec

        # correction tant que a < 0
        jnb    L(next)
1:
        movl   _c_,     %esi
2:
        subl   $1,     (%esi)           # c--
        leal   4(%esi), %esi
        jb     2b
        movl   _a_,     %esi
        movl   _b_,     %ebx
        movl   _p_,     %ecx
        addl   _q_,     %ecx
        call   .Lsn_finc_1              # a += b
        adcl   %ecx,   (%esi)           # dernire retenue
        jnb    1b

        # tranche suivante
L(next):
        movl   _a_,     %esi
        movl   _b_,     %ebx
        movl   _c_,     %edi
        movl   _q_,     %edx
        movl   %edx,    _r_
        leal   (,%edx,4), %eax
        subl   %eax,    %esi            # a -= q
        subl   %eax,    %edi            # c -= q
        decl   _n_
        jns    L(tranche)

        # termin
        movl   _p_,     %eax
        addl   _q_,     %eax
        leal  28(%esp,%eax,4), %esp     # nettoie la pile
        ret
        
                              # +---------------+
                              # |  Interface C  |
                              # +---------------+
        

# void xn(burnidiv)(chiffre *a, long lc, chiffre *b, long lb, chiffre *c)
#
# entre :
# a = naturel de longueur lc+lb
# b = naturel de longueur lb
# c = naturel de longueur lc
#
# contraintes : 
# lb >= 2, lc > 0, le bit de poids fort de b est non nul,
# a < BASE^lc*b
# a,b,c non confondus
#
# sortie :
# a <- a mod b
# c <- floor(a/b)

#ifdef debug_burnidiv
ENTER(sn_burnidiv_buggy)
#else
ENTER(sn_burnidiv)
#endif

        movl   arg1,    %esi            # esi <- &a
        movl   arg2,    %edx            # edx <- la-lb
        movl   arg3,    %ebx            # ebx <- &b
        movl   arg4,    %ecx            # ecx <- lb
        movl   arg5,    %edi            # edi <- &c
#ifdef debug_burnidiv
        call   .Lsn_fburnidiv_buggy     # effectue la division
#else
        call   .Lsn_fburnidiv      
#endif
        RETURN_WITH_SP
        
#endif /* assembly_sn_burnidiv */

        # cas o la version assembleur est dsactive ou dbogue :
        # sn_fburnidiv renvoie vers la version C
        
#if !defined(assembly_sn_burnidiv) || defined(debug_burnidiv)
        ALIGN_32
.Lsn_fburnidiv:
        pushl  %edi
        pushl  %ecx
        pushl  %ebx
        pushl  %edx
        pushl  %esi
        call   SUBR(sn_burnidiv)
        leal   20(%esp), %esp
        ret
        
#endif /* !defined(assembly_sn_burnidiv) || defined(debug_burnidiv) */
