1
/* mpn_sqrtrem -- square root and remainder */
4
Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
6
This file is part of the GNU MP Library.
8
The GNU MP Library is free software; you can redistribute it and/or modify
9
it under the terms of the GNU Lesser General Public License as published by
10
the Free Software Foundation; either version 2.1 of the License, or (at your
11
option) any later version.
13
The GNU MP Library is distributed in the hope that it will be useful, but
14
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16
License for more details.
18
You should have received a copy of the GNU Lesser General Public License
19
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
25
/* Contributed by Paul Zimmermann.
26
See "Karatsuba Square Root", reference in gmp.texi. */
38
/* Square roots table. Generated by the following program:
40
main(){mpz_t x;int i;mpz_init(x);for(i=64;i<256;i++){mpz_set_ui(x,256*i);
41
mpz_sqrt(x,x);mpz_out_str(0,10,x);printf(",");if(i%16==15)printf("\n");}}
43
static const unsigned char approx_tab[192] =
45
128,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,
46
143,144,144,145,146,147,148,149,150,150,151,152,153,154,155,155,
47
156,157,158,159,160,160,161,162,163,163,164,165,166,167,167,168,
48
169,170,170,171,172,173,173,174,175,176,176,177,178,178,179,180,
49
181,181,182,183,183,184,185,185,186,187,187,188,189,189,190,191,
50
192,192,193,193,194,195,195,196,197,197,198,199,199,200,201,201,
51
202,203,203,204,204,205,206,206,207,208,208,209,209,210,211,211,
52
212,212,213,214,214,215,215,216,217,217,218,218,219,219,220,221,
53
221,222,222,223,224,224,225,225,226,226,227,227,228,229,229,230,
54
230,231,231,232,232,233,234,234,235,235,236,236,237,237,238,238,
55
239,240,240,241,241,242,242,243,243,244,244,245,245,246,246,247,
56
247,248,248,249,249,250,250,251,251,252,252,253,253,254,254,255
59
#define HALF_NAIL (GMP_NAIL_BITS / 2)
61
/* same as mpn_sqrtrem, but for size=1 and {np, 1} normalized */
63
mpn_sqrtrem1 (mp_ptr sp, mp_ptr rp, mp_srcptr np)
65
mp_limb_t np0, s, r, q, u;
68
ASSERT (np[0] >= GMP_NUMB_HIGHBIT / 2);
69
ASSERT (GMP_LIMB_BITS >= 16);
71
/* first compute a 8-bit approximation from the high 8-bits of np[0] */
72
np0 = np[0] << GMP_NAIL_BITS;
73
q = np0 >> (GMP_LIMB_BITS - 8);
74
/* 2^6 = 64 <= q < 256 = 2^8 */
75
s = approx_tab[q - 64]; /* 128 <= s < 255 */
76
r = (np0 >> (GMP_LIMB_BITS - 16)) - s * s; /* r <= 2*s */
85
while (2 * prec < GMP_LIMB_BITS)
87
/* invariant: s has prec bits, and r <= 2*s */
88
r = (r << prec) + (np0 >> (GMP_LIMB_BITS - prec));
94
u = (u << prec) + (np0 >> (GMP_LIMB_BITS - prec));
106
ASSERT (2 * prec == GMP_LIMB_BITS); /* GMP_LIMB_BITS must be a power of 2 */
108
/* normalize back, assuming GMP_NAIL_BITS is even */
109
ASSERT (GMP_NAIL_BITS % 2 == 0);
110
sp[0] = s >> HALF_NAIL;
111
u = s - (sp[0] << HALF_NAIL); /* s mod 2^HALF_NAIL */
112
r += u * ((sp[0] << (HALF_NAIL + 1)) + u);
113
r = r >> GMP_NAIL_BITS;
117
return r != 0 ? 1 : 0;
121
#define Prec (GMP_NUMB_BITS >> 1)
123
/* same as mpn_sqrtrem, but for size=2 and {np, 2} normalized
124
return cc such that {np, 2} = sp[0]^2 + cc*2^GMP_NUMB_BITS + rp[0] */
126
mpn_sqrtrem2 (mp_ptr sp, mp_ptr rp, mp_srcptr np)
128
mp_limb_t qhl, q, u, np0;
131
ASSERT (np[1] >= GMP_NUMB_HIGHBIT / 2);
134
mpn_sqrtrem1 (sp, rp, np + 1);
136
while (rp[0] >= sp[0])
141
/* now rp[0] < sp[0] < 2^Prec */
142
rp[0] = (rp[0] << Prec) + (np0 >> Prec);
146
q += (qhl & 1) << (Prec - 1);
147
qhl >>= 1; /* if qhl=1, necessary q=0 as qhl*2^Prec + q <= 2^Prec */
148
/* now we have (initial rp[0])<<Prec + np0>>Prec = (qhl<<Prec + q) * (2sp[0]) + u */
149
sp[0] = ((sp[0] + qhl) << Prec) + q;
151
rp[0] = ((u << Prec) & GMP_NUMB_MASK) + (np0 & (((mp_limb_t) 1 << Prec) - 1));
152
/* subtract q * q or qhl*2^(2*Prec) from rp */
153
cc -= mpn_sub_1 (rp, rp, 1, q * q) + qhl;
154
/* now subtract 2*q*2^Prec + 2^(2*Prec) if qhl is set */
157
cc += sp[0] != 0 ? mpn_add_1 (rp, rp, 1, sp[0]) : 1;
158
cc += mpn_add_1 (rp, rp, 1, --sp[0]);
164
/* writes in {sp, n} the square root (rounded towards zero) of {np, 2n},
165
and in {np, n} the low n limbs of the remainder, returns the high
166
limb of the remainder (which is 0 or 1).
167
Assumes {np, 2n} is normalized, i.e. np[2n-1] >= B/4
168
where B=2^GMP_NUMB_BITS. */
170
mpn_dc_sqrtrem (mp_ptr sp, mp_ptr np, mp_size_t n)
172
mp_limb_t q; /* carry out of {sp, n} */
173
int c, b; /* carry out of remainder */
176
ASSERT (np[2 * n - 1] >= GMP_NUMB_HIGHBIT / 2);
179
c = mpn_sqrtrem2 (sp, np, np);
184
q = mpn_dc_sqrtrem (sp + l, np + 2 * l, h);
186
mpn_sub_n (np + 2 * l, np + 2 * l, sp + l, h);
187
q += mpn_divrem (sp, 0, np + l, n, sp + l, h);
189
mpn_rshift (sp, sp, l, 1);
190
sp[l - 1] |= (q << (GMP_NUMB_BITS - 1)) & GMP_NUMB_MASK;
193
c = mpn_add_n (np + l, np + l, sp + l, h);
194
mpn_sqr_n (np + n, sp, l);
195
b = q + mpn_sub_n (np, np, np + n, 2 * l);
196
c -= (l == h) ? b : mpn_sub_1 (np + 2 * l, np + 2 * l, 1, b);
197
q = mpn_add_1 (sp + l, sp + l, h, q);
201
c += mpn_addmul_1 (np, sp, n, 2) + 2 * q;
202
c -= mpn_sub_1 (np, np, n, 1);
203
q -= mpn_sub_1 (sp, sp, n, 1);
212
mpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr np, mp_size_t nn)
214
mp_limb_t *tp, s0[1], cc, high, rl;
221
/* If OP is zero, both results are zero. */
225
ASSERT (np[nn - 1] != 0);
226
ASSERT (rp == NULL || MPN_SAME_OR_SEPARATE_P (np, rp, nn));
227
ASSERT (rp == NULL || ! MPN_OVERLAP_P (sp, (nn + 1) / 2, rp, nn));
228
ASSERT (! MPN_OVERLAP_P (sp, (nn + 1) / 2, np, nn));
231
if (nn == 1 && (high & GMP_NUMB_HIGHBIT))
232
return mpn_sqrtrem1 (sp, rp, np);
233
count_leading_zeros (c, high);
236
c = c / 2; /* we have to shift left by 2c bits to normalize {np, nn} */
237
tn = (nn + 1) / 2; /* 2*tn is the smallest even integer >= nn */
240
if (nn % 2 != 0 || c > 0)
242
tp = TMP_ALLOC_LIMBS (2 * tn);
243
tp[0] = 0; /* needed only when 2*tn > nn, but saves a test */
245
mpn_lshift (tp + 2 * tn - nn, np, nn, 2 * c);
247
MPN_COPY (tp + 2 * tn - nn, np, nn);
248
rl = mpn_dc_sqrtrem (sp, tp, tn);
249
/* We have 2^(2k)*N = S^2 + R where k = c + (2tn-nn)*GMP_NUMB_BITS/2,
250
thus 2^(2k)*N = (S-s0)^2 + 2*S*s0 - s0^2 + R where s0=S mod 2^k */
251
c += (nn % 2) * GMP_NUMB_BITS / 2; /* c now represents k */
252
s0[0] = sp[0] & (((mp_limb_t) 1 << c) - 1); /* S mod 2^k */
253
rl += mpn_addmul_1 (tp, sp, tn, 2 * s0[0]); /* R = R + 2*s0*S */
254
cc = mpn_submul_1 (tp, s0, 1, s0[0]);
255
rl -= (tn > 1) ? mpn_sub_1 (tp + 1, tp + 1, tn - 1, cc) : cc;
256
mpn_rshift (sp, sp, tn, c);
261
if (c < GMP_NUMB_BITS)
269
mpn_rshift (rp, tp, tn, c);
271
MPN_COPY_INCR (rp, tp, tn);
277
rp = TMP_ALLOC_LIMBS (nn);
279
MPN_COPY (rp, np, nn);
280
rn = tn + (rp[tn] = mpn_dc_sqrtrem (sp, rp, tn));
283
MPN_NORMALIZE (rp, rn);