1
/* mpz_powm(res,base,exp,mod) -- Set RES to (base**exp) mod MOD.
3
Copyright 1991, 1993, 1994, 1996, 1997, 2000, 2001 Free Software Foundation,
4
Inc. Contributed by Paul Zimmermann.
6
This file is part of the GNU MP Library.
8
The GNU MP Library is free software; you can redistribute it and/or modify
9
it under the terms of the GNU Lesser General Public License as published by
10
the Free Software Foundation; either version 2.1 of the License, or (at your
11
option) any later version.
13
The GNU MP Library is distributed in the hope that it will be useful, but
14
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16
License for more details.
18
You should have received a copy of the GNU Lesser General Public License
19
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
21
MA 02111-1307, USA. */
32
/* Set c <- tp/R^n mod m.
33
tp should have space for 2*n+1 limbs; clobber its most significant limb. */
34
#if ! WANT_REDC_GLOBAL
38
redc (mp_ptr cp, mp_srcptr mp, mp_size_t n, mp_limb_t Nprim, mp_ptr tp)
44
tp[2 * n] = 0; /* carry guard */
46
for (j = 0; j < n; j++)
49
cy = mpn_addmul_1 (tp, mp, n, q);
50
mpn_incr_u (tp + n, cy);
55
mpn_sub_n (cp, tp, mp, n);
60
/* Compute t = a mod m, a is defined by (ap,an), m is defined by (mp,mn), and
61
t is defined by (tp,mn). */
63
reduce (mp_ptr tp, mp_srcptr ap, mp_size_t an, mp_srcptr mp, mp_size_t mn)
69
qp = TMP_ALLOC_LIMBS (an - mn + 1);
71
mpn_tdiv_qr (qp, tp, 0L, ap, an, mp, mn);
77
/* Return the group order of the ring mod m. */
99
unsigned long int q = t / d;
118
/* average number of calls to redc for an exponent of n bits
119
with the sliding window algorithm of base 2^k: the optimal is
120
obtained for the value of k which minimizes 2^(k-1)+n/(k+1):
123
128 156* 159 171 200 261
124
256 309 307* 316 343 403
125
512 617 607* 610 632 688
126
1024 1231 1204 1195* 1207 1256
127
2048 2461 2399 2366 2360* 2396
128
4096 4918 4787 4707 4665* 4670
132
/* Use REDC instead of usual reduction for sizes < POWM_THRESHOLD. In REDC
133
each modular multiplication costs about 2*n^2 limbs operations, whereas
134
using usual reduction it costs 3*K(n), where K(n) is the cost of a
135
multiplication using Karatsuba, and a division is assumed to cost 2*K(n),
136
for example using Burnikel-Ziegler's algorithm. This gives a theoretical
137
threshold of a*KARATSUBA_SQR_THRESHOLD, with a=(3/2)^(1/(2-ln(3)/ln(2))) ~
139
/* For now, also disable REDC when MOD is even, as the inverse can't handle
140
that. At some point, we might want to make the code faster for that case,
141
perhaps using CRR. */
143
#ifndef POWM_THRESHOLD
144
#define POWM_THRESHOLD ((8 * KARATSUBA_SQR_THRESHOLD) / 3)
147
#define HANDLE_NEGATIVE_EXPONENT 1
148
#undef REDUCE_EXPONENT
152
mpz_powm (mpz_ptr r, mpz_srcptr b, mpz_srcptr e, mpz_srcptr m)
153
#else /* BERKELEY_MP */
154
pow (mpz_srcptr b, mpz_srcptr e, mpz_srcptr m, mpz_ptr r)
155
#endif /* BERKELEY_MP */
157
mp_ptr xp, tp, qp, gp, this_gp;
158
mp_srcptr bp, ep, mp;
159
mp_size_t bn, es, en, mn, xn;
161
unsigned long int enb;
162
mp_size_t i, K, j, l, k;
163
int m_zero_cnt, e_zero_cnt;
166
#if HANDLE_NEGATIVE_EXPONENT
186
/* Exponent is zero, result is 1 mod m, i.e., 1 or 0 depending on if
188
SIZ(r) = (mn == 1 && mp[0] == 1) ? 0 : 1;
190
TMP_FREE (marker); /* we haven't really allocated anything here */
193
#if HANDLE_NEGATIVE_EXPONENT
194
MPZ_TMP_INIT (new_b, mn + 1);
196
if (! mpz_invert (new_b, b, m))
207
/* Reduce exponent by dividing it by phi(m) when m small. */
208
if (mn == 1 && mp[0] < 0x7fffffffL && en * BITS_PER_MP_LIMB > 150)
210
MPZ_TMP_INIT (new_e, 2);
211
mpz_mod_ui (new_e, e, phi (mp[0]));
216
use_redc = mn < POWM_THRESHOLD && mp[0] % 2 != 0;
219
/* invm = -1/m mod 2^BITS_PER_MP_LIMB, must have m odd */
220
modlimb_invert (invm, mp[0]);
225
/* Normalize m (i.e. make its most significant bit set) as required by
226
division functions below. */
227
count_leading_zeros (m_zero_cnt, mp[mn - 1]);
231
new_mp = TMP_ALLOC_LIMBS (mn);
232
mpn_lshift (new_mp, mp, mn, m_zero_cnt);
237
/* Determine optimal value of k, the number of exponent bits we look at
239
count_leading_zeros (e_zero_cnt, PTR(e)[en - 1]);
240
enb = en * BITS_PER_MP_LIMB - e_zero_cnt; /* number of bits of exponent */
243
while (2 * enb > K * (2 + k * (3 + k)))
249
tp = TMP_ALLOC_LIMBS (2 * mn + 1);
250
qp = TMP_ALLOC_LIMBS (mn + 1);
252
gp = __GMP_ALLOCATE_FUNC_LIMBS (K / 2 * mn);
254
/* Compute x*R^n where R=2^BITS_PER_MP_LIMB. */
257
/* Handle |b| >= m by computing b mod m. FIXME: It is not strictly necessary
258
for speed or correctness to do this when b and m have the same number of
259
limbs, perhaps remove mpn_cmp call. */
260
if (bn > mn || (bn == mn && mpn_cmp (bp, mp, mn) >= 0))
262
/* Reduce possibly huge base while moving it to gp[0]. Use a function
263
call to reduce, since we don't want the quotient allocation to
264
live until function return. */
267
reduce (tp + mn, bp, bn, mp, mn); /* b mod m */
269
mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn); /* unnormnalized! */
273
reduce (gp, bp, bn, mp, mn);
278
/* |b| < m. We pad out operands to become mn limbs, which simplifies
279
the rest of the function, but slows things down when the |b| << m. */
283
MPN_COPY (tp + mn, bp, bn);
284
MPN_ZERO (tp + mn + bn, mn - bn);
285
mpn_tdiv_qr (qp, gp, 0L, tp, 2 * mn, mp, mn);
289
MPN_COPY (gp, bp, bn);
290
MPN_ZERO (gp + bn, mn - bn);
294
/* Compute xx^i for odd g < 2^i. */
296
xp = TMP_ALLOC_LIMBS (mn);
297
mpn_sqr_n (tp, gp, mn);
299
redc (xp, mp, mn, invm, tp); /* xx = x^2*R^n */
301
mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
303
for (i = 1; i < K / 2; i++)
305
mpn_mul_n (tp, this_gp, xp, mn);
308
redc (this_gp, mp, mn, invm, tp); /* g[i] = x^(2i+1)*R^n */
310
mpn_tdiv_qr (qp, this_gp, 0L, tp, 2 * mn, mp, mn);
313
/* Start the real stuff. */
315
i = en - 1; /* current index */
316
c = ep[i]; /* current limb */
317
sh = BITS_PER_MP_LIMB - e_zero_cnt; /* significant bits in ep[i] */
318
sh -= k; /* index of lower bit of ep[i] to take into account */
320
{ /* k-sh extra bits are needed */
325
sh += BITS_PER_MP_LIMB;
332
for (j = 0; c % 2 == 0; j++)
335
MPN_COPY (xp, gp + mn * (c >> 1), mn);
338
mpn_sqr_n (tp, xp, mn);
340
redc (xp, mp, mn, invm, tp);
342
mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
345
while (i > 0 || sh > 0)
348
l = k; /* number of bits treated */
356
sh += BITS_PER_MP_LIMB;
361
l += sh; /* last chunk of bits from e; l < k */
366
c &= ((mp_limb_t) 1 << l) - 1;
368
/* This while loop implements the sliding window improvement--loop while
369
the most significant bit of c is zero, squaring xx as we go. */
370
while ((c >> (l - 1)) == 0 && (i > 0 || sh > 0))
372
mpn_sqr_n (tp, xp, mn);
374
redc (xp, mp, mn, invm, tp);
376
mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
380
c = (c << 1) + ((ep[i] >> sh) & 1);
385
sh = BITS_PER_MP_LIMB - 1;
386
c = (c << 1) + (ep[i] >> sh);
390
/* Replace xx by xx^(2^l)*x^c. */
393
for (j = 0; c % 2 == 0; j++)
396
/* c0 = c * 2^j, i.e. xx^(2^l)*x^c = (A^(2^(l - j))*c)^(2^j) */
400
mpn_sqr_n (tp, xp, mn);
402
redc (xp, mp, mn, invm, tp);
404
mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
406
mpn_mul_n (tp, xp, gp + mn * (c >> 1), mn);
408
redc (xp, mp, mn, invm, tp);
410
mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
413
j = l; /* case c=0 */
416
mpn_sqr_n (tp, xp, mn);
418
redc (xp, mp, mn, invm, tp);
420
mpn_tdiv_qr (qp, xp, 0L, tp, 2 * mn, mp, mn);
426
/* Convert back xx to xx/R^n. */
427
MPN_COPY (tp, xp, mn);
428
MPN_ZERO (tp + mn, mn);
429
redc (xp, mp, mn, invm, tp);
430
if (mpn_cmp (xp, mp, mn) >= 0)
431
mpn_sub_n (xp, xp, mp, mn);
438
cy = mpn_lshift (tp, xp, mn, m_zero_cnt);
440
mpn_tdiv_qr (qp, xp, 0L, tp, mn + (cy != 0), mp, mn);
441
mpn_rshift (xp, xp, mn, m_zero_cnt);
445
MPN_NORMALIZE (xp, xn);
447
if ((ep[0] & 1) && SIZ(b) < 0 && xn != 0)
449
mp = PTR(m); /* want original, unnormalized m */
450
mpn_sub (xp, mp, mn, xp, xn);
452
MPN_NORMALIZE (xp, xn);
456
MPN_COPY (PTR(r), xp, xn);
458
__GMP_FREE_FUNC_LIMBS (gp, K / 2 * mn);