/*
   This file is part of Numerix.  Numerix is free software; you can
   redistribute it and/or modify it under the terms of the GNU General
   Public License as published by the Free Software Foundation; either
   version 2 of the License, or (at your option) any later version.

   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
   USA 
*/

/* +------------------------------------------------------------------------+
   |                                                                        |
   |                      Entiers de longueur arbitraire                    |
   |                                                                        |
   |                           Racine carre                                |
   |                                                                        |
   +------------------------------------------------------------------------+ */

/* M. Quercia, 31/01/2001 */

#include "long_int.h"
#include "long_int-s.h"

              /* +-----------------------------------+
                 |  c <- 2*trunc(a^1/2), a <- a-c^2  |
                 +-----------------------------------+ */

/* BASE/4 <= a[la-1] < BASE/2, la pair */
#ifndef have_sn_hsqrt
void xn(hsqrt)(naturel a, longueur la, naturel c) {
  longueur i,k;
  ndouble n,q,u,v,c0,c1,c2,ret;
  zdouble s;

  /* Rmq : c2 ne risque pas d'tre utilis avant initialisation */
  /* la premire division se fait avec c0 = 0 et c2 est inutile */
#ifdef useless_init
  c2 = 0;
#endif

  /* calculer la racine du double-mot de tte par Newton */
  a += la-2; c+= la/2-1;
  n = ((ndouble)a[1] << HW) + (ndouble)a[0];
  u = BASE/2;
  do {v = u; u = (u + n/u)/2;} while (v > u);
  a[0] = n - v*v;
  c0 = 0;
  c1 = 2*v;
  c[0] = c1;

  /* calculer les chiffres suivants par division */
  for (k=2, a -= 2, c--; k <= la/2; k++, a -= 2, c--) {

    /* quotient approch q = trunc(a/2c), le vrai quotient est q ou q-1 */
    u  = (((ndouble)a[k]) << HW) + (ndouble)a[k-1];
    q  = u/c1;
    if (q >= BASE) q=BASE-1;
    u  = u - q*c1;
    if (u < BASE) {
      u  = (u << HW) + (ndouble)a[k-2];
      v  = q*c0;
      while (u < v) {q--; v -= u; u=c2;} /* k = 2 => c0 = 0 => c2 inutile */
    }

    /* a <- a - 2qc - q^2, 2c <- 2c + 2q */
    c[0] = q;
    for (ret=0, i=0; i < k; i++) {
      ret += q*((ndouble)c[i]);
      s = (ndouble)a[i] - (ret & (BASE-1));
      ret >>= HW;
      if (s < 0) ret++;
      a[i] = s;
    }
    s = (ndouble)a[k] - ret;
    c[0] = 2*q; if (q >= BASE/2) c[1]++;
    
    /* tant que a < 0 : a <- a+2b-1, q <- q-1 */
    while (s < 0) {
      c[0]--; if (c[0] == BASE-1) c[1]--;
      for (ret=0, i=0; i < k; i++) {
	ret += (ndouble)a[i] + (ndouble)c[i];
	a[i] = ret;
        ret >>= HW;
      }
      s += ret;
      c[0]--;
    }
    a[k] = s;

    /* mise  jour c0,c1,c2 */
    if (k == 2) {c0 = c[0]; c1 = c[1]; c2 = (c1 << HW) + c0;}
  }

}
#endif

           /* +----------------------------------------+
              |  c <- trunc(a^1/2), algorithme en n^2  |
              +----------------------------------------+ */

/* a[la-1] non nul, capacit(buff) >= la+2 */
#ifndef have_sn_sqrt_n2
void xn(sqrt_n2)(naturel a, longueur la, naturel c) {
  ndouble n,u,v;
  naturel buff;
  int sh;

  /* si la <= 2, calcul direct par Newton */
  if (la <= 2) {
    n = (((ndouble)((la == 2) ? a[1] : 0)) << HW) + (ndouble)a[0];
    for (u=1, v=n; v > 0; u <<= 1, v >>= 2);
    do {v = u; u = (u + n/u)/2;} while (v > u);
    c[0] = v;
    return;
  }

  /* dcaler a de sorte que le chiffre de tte ait un rang pair */
  /* et soit compris entre BASE/16 et BASE/4                    */
  for (sh = (la%2)*HW - 2, n=a[la-1]; n<BASE/4; n <<= 2, sh += 2);
  if (sh < 0) sh += 2*HW;
  buff = xn(alloc_tmp)(la+2);
  xn(shl)(a,la,sh,buff);
  la += (sh+2)/HW;

  /* c <- 2*sqrt(a) */
  xn(hsqrt)(buff,la,c);
  xn(free)(buff);

  /* dcaler c */
  xn(shr)(c,la/2,sh/2+1,c);
}
#endif

           /* +-----------------------------------------+
              |  c <- trunc(a^1/2) par division rapide  |
              +-----------------------------------------+ */

/* la > 2*klim, a[la-1] non nul */
#ifndef have_sn_sqrt_k
void xn(sqrt_k)(naturel a, longueur la, naturel c) {
  naturel b,buff;
  ndouble n;
  zdouble ret;
  longueur lc, bloc;
  longueur ib,jb,nb,pos,i,j,k,l;
  int sh;

  /* dcaler a de sorte que le chiffre de tte ait un rang pair */
  /* et soit compris entre BASE/16 et BASE/4                    */
  for (sh = (la%2)*HW - 2, n=a[la-1]; n<BASE/4; n <<= 2, sh += 2);
  if (sh < 0) sh += 2*HW;
  buff = xn(alloc_tmp)(2*la+4);
  xn(shl)(a,la,sh,buff+la+2);
  a = buff+la+2;
  la += (sh+2)/HW;
  lc = la/2;

  /* dtermine la taille des blocs de faon  avoir un nombre de */
  /* blocs lgrement infrieur  une puissance de 2             */
  for (bloc = lc; bloc >= klim; bloc >>= 1);
  bloc++;

  /* racine carre du double bloc de tte */
  a += la-2*bloc; b = c+lc; c = b-bloc;
  xn(hsqrt)(a,2*bloc,c);
  la = bloc;

  /* calculer les blocs suivants par division rapide */

  for (i=bloc; i < lc; ) {

    l = min(bloc,lc-i);
    a -= l; c -= l;
    la = xn(hquo)(a,la+l,b-bloc,bloc,c,l);
    i += l;
    if (i==lc) break;


    /* corrige le reste en doublant  chaque fois le nb. de blocs */
    ib = 2*((i+bloc-1)/bloc);
    for (jb=1, j=bloc; (ib&jb) == 0; jb <<= 1, j <<= 1) {

      if (ib/jb > 4) { /* double produit */
	k = j+l-bloc;
	xn(mul_k)(c,k,b-2*j,j, buff);
	pos = 2*j-bloc;
	if (la+pos < k+j) la=k+j-pos;
	ret = xn(dec)(a-pos,la+pos,buff,j+k);
	while(ret) {
	  xn(dec_1)(c,j,1);
	  while (la+pos < 2*j) a[la++] = BASE-1;
	  ret += xn(inc)(a-pos,la+pos,b-2*j,2*j);
	  la = bloc;
	}
      }

      else {         /* carr */
	k = j+l-bloc;
	xn(sqr_k)(c,k, buff);
	pos = j+k-bloc;
	if (la+pos < 2*k) la=2*k-pos;
	ret = xn(dec)(a-pos,la+pos,buff,2*k);
	xn(inc)(c,k+1,c,k);
	while(ret) {
	  xn(dec_1)(c,k+1,1);
	  while (la+pos < j+k) a[la++] = BASE-1;
	  ret += xn(inc)(a-pos,la+pos,b-j-k,j+k);
	  xn(dec_1)(c,k+1,1);
	  la = bloc;
	}
	break;
      }
    }
  }

  /* dernires corrections, arrte ds que le reste peut absorber */
  /* les retenues ventuelles sans atteindre zro                 */

  for (nb=0, k=bloc; k < lc; nb++, k<<=1);
  for (j=bloc; (nb); nb--, j <<=1) {

    i = j*((lc+j-1)/j);
    l = lc-i+bloc;
    while ((la > l) && a[la-1] == 0) la--;
    if ((la > l+1) || ((la == l+1) && (a[la-1] > nb))) break;
    la = max(la,bloc);
    k = lc-i+j;

    if (2*j < lc) { /* double produit */
      xn(mul_k)(c,k,b-2*j,j, buff);
      pos = 2*j-bloc;
      if (la+pos < k+j) la=k+j-pos;
      ret = xn(dec)(a-pos,la+pos,buff,j+k);
      while(ret) {
	xn(dec_1)(c,j,1);
	while (la+pos < 2*j) a[la++] = BASE-1;
	ret += xn(inc)(a-pos,la+pos,b-2*j,2*j);
	la = bloc;
      }
    }
    
    else {         /* carr */
      xn(sqr_k)(c,k, buff);
      pos = j+k-bloc;
      if (la+pos < 2*k) la=k+j-pos;
      ret = xn(dec)(a-pos,la+pos,buff,2*k);
      xn(inc)(c,k+1,c,k);
      while(ret) {
	xn(dec_1)(c,k+1,1);
	while (la+pos < j+k) a[la++] = BASE-1;
	ret += xn(inc)(a-pos,la+pos,b-j-k,j+k);
	xn(dec_1)(c,k+1,1);
	la = bloc;
      }
    }
  }

  /* si on a court-circuit des corrections dcale la fin de c */
  if (nb) {
    while (2*j < lc) j <<= 1;
    i = j*((lc+j-1)/j);
    k = lc-i+j;
    xn(inc)(c,k+1,c,k);
  }

  /* dcaler 2c */
  xn(shr)(c,lc,sh/2+1,c);
  xn(free)(buff);
}
#endif

                   /* +------------------------+
                      |  racine carre dans Z  |
                      +------------------------+ */

/* capacit(b) >= (la+1)/2+1 */
#ifndef have_sz_sqrt_k
void xz(sqrt_k)(entier *a, entier *c) {
  longueur la = Lg(a), lc=(la+1)/2;

  if (Signe(a)) xn(fatal_err)("\nsquare root of negative number\n");
  if (la == 0) {c->hd = 0; return;}

  if (la >= 2*klim) xn(sqrt_k) (a->val,la,c->val);
  else              xn(sqrt_n2)(a->val,la,c->val);
  make_head(c,lc,0);
}
#endif
