1
/*============================================================================
3
Copyright (C) 2007, William Hart
5
This file is part of FLINT.
7
FLINT is free software; you can redistribute it and/or modify
8
it under the terms of the GNU General Public License as published by
9
the Free Software Foundation; either version 2 of the License, or
10
(at your option) any later version.
12
FLINT is distributed in the hope that it will be useful,
13
but WITHOUT ANY WARRANTY; without even the implied warranty of
14
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
GNU General Public License for more details.
17
You should have received a copy of the GNU General Public License
18
along with FLINT; if not, write to the Free Software
19
Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
21
===============================================================================*/
26
#include "mpz_extras.h"
28
#include "mpn_extras.h"
29
#include "F_mpn_mul-tuning.h"
30
#include "memory-manager.h"
31
#include "longlong_wrapper.h"
38
Memory manager to allocate a single mpz_t. It returns a pointer to the mpz_t.
39
mpz_t's should be released in the order they were allocated.
42
#define RESALLOC 100 //allocate this many mpz_t's at once to save on overheads
44
mpz_t** reservoir; // Array of pointers to mpz_t's in the reservoir
45
unsigned long rescount=0; //Next available mpz_t in reservoir
46
unsigned long currentalloc=0; //total number of mpz_t's in reservoir
48
mpz_t* F_mpz_alloc(void)
50
static int initialised = 0;
51
static mpz_t** tempres;
54
//allocate another block of mpz_t's if none are currently allocated, or the reservoir is depleted
55
if (rescount==currentalloc) // need more limb_memp_t's
59
reservoir = (mpz_t**)malloc(RESALLOC*sizeof(mpz_t*)); //allocate space for the array of pointers
60
reservoir[0] = (mpz_t*)malloc(RESALLOC*sizeof(mpz_t)); //allocate space for the mpz_t's
61
for (unsigned long i=0; i<RESALLOC-1; i++)
63
reservoir[i+1]=reservoir[i]+1; //initialise the array
64
mpz_init(*reservoir[i]); //initialise the mpz_t's
66
mpz_init(*reservoir[RESALLOC-1]);
69
currentalloc = RESALLOC;
72
//copy old reservoir into larger one
74
reservoir = (mpz_t**)malloc((currentalloc+RESALLOC)*sizeof(mpz_t*));
75
reservoir[currentalloc] = (mpz_t*)malloc(RESALLOC*sizeof(mpz_t));
76
memcpy(reservoir,tempres,currentalloc*sizeof(mpz_t*));
77
for (unsigned long i=currentalloc; i<RESALLOC+currentalloc-1; i++)
79
reservoir[i+1]=reservoir[i]+1; //initialise the array
80
mpz_init(*reservoir[i]); //initialise the mpz_t's
82
mpz_init(*reservoir[currentalloc+RESALLOC-1]);
84
currentalloc+=RESALLOC;
90
alloc_d = reservoir[rescount];
96
Release a mpz_t back into the reservoir
99
void F_mpz_release(void)
105
sets res to a*b modulo p
107
Does not assume a and b are reduced mod p
110
void F_mpz_mulmod(mpz_t res, mpz_t a, mpz_t b, mpz_t p)
112
mpz_t* temp = F_mpz_alloc();
114
mpz_fdiv_r(*temp,a,p);
116
mpz_mul(res,*temp,res);
117
mpz_fdiv_r(res,res,p);
125
Sets res to a*b modulo p
126
Does not assume a and b are reduced mod p
129
unsigned long F_mpz_mulmod_ui(mpz_t res, mpz_t a, mpz_t b, unsigned long p)
131
unsigned long p1, p2, al, bl;
133
al = mpz_fdiv_r_ui(res, a, p);
134
bl = mpz_fdiv_r_ui(res, b, p);
136
umul_ppmm(p2, p1, al, bl);
138
unsigned long norm, q, r;
140
if (p2 >= p) p2 %= p;
142
#if UDIV_NEEDS_NORMALIZATION
143
count_lead_zeros(norm, p);
144
udiv_qrnnd(q, r, (p2<<norm) + (p1>>(FLINT_BITS-norm)), p1<<norm, p<<norm);
146
udiv_qrnnd(q, r, p2, p1, p);
155
Sets res to the square root of a modulo p for a prime p
156
Returns 0 if a is not a square modulo p
159
int F_mpz_sqrtmod(mpz_t res, mpz_t a, mpz_t p)
162
mpz_t* p1 = F_mpz_alloc();
163
mpz_t* two = F_mpz_alloc();
164
mpz_t* b = F_mpz_alloc();
165
mpz_t* g = F_mpz_alloc();
166
mpz_t* bpow = F_mpz_alloc();
167
mpz_t* gpow = F_mpz_alloc();
168
mpz_t* mk = F_mpz_alloc();
170
if (mpz_kronecker(a,p)!=1)
173
return 0; //return 0 if a is not a square mod p
176
if ((mpz_cmp_ui(a,0)==0)||(mpz_cmp_ui(a,1)==0))
182
if ((mpz_tstbit(p,0)==1)||(mpz_tstbit(p,1)==1))
185
mpz_fdiv_q_2exp(*p1,*p1,2);
186
mpz_powm(res,a,*p1,p);
193
r = mpz_remove(*p1,*p1,*two);
194
mpz_powm(*b,a,*p1,p);
197
if (mpz_ui_kronecker(k,p) == -1) break;
200
mpz_powm(*g,*mk,*p1,p);
201
mpz_add_ui(*p1,*p1,1);
202
mpz_divexact_ui(*p1,*p1,2);
203
mpz_powm(res,a,*p1,p);
204
if (!mpz_cmp_ui(*b,1)) return 1;
206
while (mpz_cmp_ui(*b,1))
209
for (m=1; (m<=r-1) && (mpz_cmp_ui(*bpow,1));m++)
211
mpz_powm_ui(*bpow,*bpow,2,p);
214
for (int i = 1;i<= r-m-1;i++)
216
mpz_powm_ui(*gpow,*gpow,2,p);
218
F_mpz_mulmod(res,res,*gpow,p);
219
mpz_powm_ui(*gpow,*gpow,2,p);
220
F_mpz_mulmod(*b,*b,*gpow,p);
225
F_mpz_release();F_mpz_release();F_mpz_release();F_mpz_release();
226
F_mpz_release();F_mpz_release();F_mpz_release();
232
Computes the square root of a modulo p^k when given z, the square root mod p^(k-1)
235
void __sqrtmodpow(mpz_t res, mpz_t z, mpz_t a, mpz_t pk, mpz_t tempsqpow, mpz_t inv)
238
mpz_invert(inv,inv,pk);
239
mpz_set(tempsqpow,a);
240
mpz_submul(tempsqpow,z,z);
241
mpz_fdiv_r(tempsqpow,tempsqpow,pk);
242
F_mpz_mulmod(tempsqpow,tempsqpow,inv,pk);
243
mpz_add(tempsqpow,tempsqpow,z);
244
mpz_set(res,tempsqpow);
250
Computes the square root of a modulo p^k when given z, the square root mod p^{k-1}
253
void F_mpz_sqrtmodpklift(mpz_t res, mpz_t z, mpz_t a, mpz_t pk)
255
mpz_t* tempsqpow = F_mpz_alloc();
256
mpz_t* inv = F_mpz_alloc();
258
__sqrtmodpow(res, z, a, pk, *tempsqpow, *inv);
260
F_mpz_release();F_mpz_release();
264
computes the square root of a modulo p^k when given the square root modulo p
267
void F_mpz_sqrtmodptopk(mpz_t res, mpz_t sqrt, mpz_t a, mpz_t p, int k)
269
mpz_t* tempsqpow = F_mpz_alloc();
270
mpz_t* inv = F_mpz_alloc();
271
mpz_t* pk = F_mpz_alloc();
275
for (int i = 2; i<=k; i++)
278
__sqrtmodpow(res,res,a,*pk, *tempsqpow, *inv);
281
F_mpz_release();F_mpz_release();F_mpz_release();
285
Computes the square root of a modulo p^k
286
Returns 0 if the square root of a does not exist mod p
289
int F_mpz_sqrtmodpk(mpz_t res, mpz_t a, mpz_t p, int k)
292
mpz_t* sqrtmodp = F_mpz_alloc();
294
exists = F_mpz_sqrtmod(*sqrtmodp,a,p);
295
F_mpz_sqrtmodptopk(res,*sqrtmodp,a,p,k);
304
Find res mod n=n1*n2 such that res = x1 mod n1 and res = x2 mod n2
307
void F_mpz_CRT(mpz_t res, mpz_t n, mpz_t x1, mpz_t x2, mpz_t n1, mpz_t n2)
309
mpz_t* ntemp = F_mpz_alloc();
310
mpz_t* restemp = F_mpz_alloc();
311
mpz_t* chtemp = F_mpz_alloc();
313
mpz_mul(*ntemp,n1,n2);
314
mpz_invert(*restemp,n2,n1);
315
F_mpz_mulmod(*restemp,res,n2,*ntemp);
316
F_mpz_mulmod(*restemp,*restemp,x1,*ntemp);
318
mpz_invert(*chtemp,n1,n2);
319
F_mpz_mulmod(*chtemp,*chtemp,n1,*ntemp);
320
F_mpz_mulmod(*chtemp,*chtemp,x2,*ntemp);
322
mpz_add(res,*restemp,*chtemp);
323
mpz_fdiv_r(res,res,*ntemp);
326
F_mpz_release();F_mpz_release();F_mpz_release();
332
Compute the Montgomery reduced form of a mod m
333
Returns n such that m < 2^n (n will be divisible by FLINT_BITS)
334
Assumes a is already reduced mod m
337
unsigned long F_mpz_mont_red(mpz_t res, mpz_t a, mpz_t m)
339
unsigned long n = mpz_size(m)*FLINT_BITS;
341
mpz_mul_2exp(res, a, n);
342
mpz_mod(res, res, m);
348
Compute the Montgomery multiplication r = a*b mod m assuming a and b are in
349
Montgomery form with respect to 2^n where m < 2^n and R is -m mod 2^n
352
void F_mpz_mont_mul(mpz_t res, mpz_t a, mpz_t b, mpz_t m, mpz_t R, unsigned long n)
359
mpz_fdiv_r_2exp(s, x, n);
361
mpz_fdiv_r_2exp(s, s, n);
363
mpz_add(res, res, x);
364
mpz_fdiv_q_2exp(res, res, n);
366
if (mpz_cmp(res, m) >= 0) mpz_sub(res, res, m);
373
Compute a^exp mod m using Montgomery reduction
374
Requires that m is odd and positive and that exp is positive
377
void F_mpz_expmod_mont(mpz_t res, mpz_t a, mpz_t exp, mpz_t m)
380
unsigned long bits = mpz_sizeinbase(exp, 2);
392
n = F_mpz_mont_red(aRED, a, m);
395
mpz_mul_2exp(temp, temp, n);
396
mpz_invert(R, m, temp);
398
if (mpz_cmp(R, temp) == 0) mpz_sub(R, R, temp);
400
mpz_set(powRED, aRED);
402
gmp_printf("powRED = %Zd\n", powRED);
405
for (unsigned long i = 0; i < bits - 1; i++)
407
if (mpz_tstbit(exp, i))
409
if (flag) F_mpz_mont_mul(res, res, powRED, m, R, n);
412
mpz_set(res, powRED);
416
F_mpz_mont_mul(powRED, powRED, powRED, m, R, n);
418
gmp_printf("powRED = %Zd\n", powRED);
422
if (flag) F_mpz_mont_mul(res, res, powRED, m, R, n);
423
else mpz_set(res, powRED);
426
F_mpz_mont_mul(res, res, temp, m, R, n);
434
void F_mpz_divrem_BZ(mpz_t Q, mpz_t R, mpz_t A, mpz_t B)
436
unsigned long n = mpz_size(B);
437
unsigned long m = mpz_size(A) - n;
448
mpz_fdiv_qr(Q, R, A, B);
452
unsigned long k = m/2;
454
mpz_t * B0 = F_mpz_alloc();
455
mpz_t * B1 = F_mpz_alloc();
456
mpz_t * A0 = F_mpz_alloc();
457
mpz_t * A1 = F_mpz_alloc();
458
mpz_t * Q0 = F_mpz_alloc();
459
mpz_t * Q1 = F_mpz_alloc();
460
mpz_t * R0 = F_mpz_alloc();
461
mpz_t * R1 = F_mpz_alloc();
462
mpz_t * temp = F_mpz_alloc();
463
mpz_t * temp2 = F_mpz_alloc();
464
mpz_t * temp3 = F_mpz_alloc();
467
mpz_fdiv_q_2exp(*B1, B, FLINT_BITS*k);
468
mpz_fdiv_q_2exp(*A1, A, FLINT_BITS*2*k);
470
F_mpz_divrem_BZ(*Q1, *R1, *A1, *B1);
471
mpz_fdiv_r_2exp(*B0, B, FLINT_BITS*k);
472
mpz_fdiv_r_2exp(*A0, A, FLINT_BITS*2*k);
473
mpz_mul_2exp(*temp, *R1, FLINT_BITS*2*k);
474
mpz_add(*temp, *temp, *A0);
475
mpz_mul_2exp(*temp2, *Q1, FLINT_BITS*k);
476
mpz_mul(*temp2, *temp2, *B0);
477
mpz_sub(*temp, *temp, *temp2);
478
mpz_mul_2exp(*temp2, B, FLINT_BITS*k);
480
while (mpz_cmp_ui(*temp, 0) < 0)
482
mpz_sub_ui(*Q1, *Q1, 1);
483
mpz_add(*temp, *temp, *temp2);
485
mpz_fdiv_q_2exp(*temp2, *temp, FLINT_BITS*k);
486
F_mpz_divrem_BZ(*Q0, *R0, *temp2, *B1);
488
mpz_fdiv_r_2exp(*temp2, *temp, FLINT_BITS*k);
489
mpz_mul_2exp(R, *R0, FLINT_BITS*k);
490
mpz_add(R, R, *temp2);
491
mpz_submul(R, *Q0, *B0);
492
while (mpz_cmp_ui(R, 0) < 0)
494
mpz_sub_ui(*Q0, *Q0, 1);
497
mpz_mul_2exp(Q, *Q1, FLINT_BITS*k);
500
F_mpz_release(); F_mpz_release(); F_mpz_release(); F_mpz_release();
501
F_mpz_release(); F_mpz_release(); F_mpz_release(); F_mpz_release();
502
F_mpz_release(); F_mpz_release(); F_mpz_release();
505
void F_mpz_rem_BZ(mpz_t R, mpz_t A, mpz_t B)
507
unsigned long n = mpz_size(B);
508
unsigned long m = mpz_size(A) - n;
522
unsigned long k = m/2;
524
mpz_t * B0 = F_mpz_alloc();
525
mpz_t * B1 = F_mpz_alloc();
526
mpz_t * A0 = F_mpz_alloc();
527
mpz_t * A1 = F_mpz_alloc();
528
mpz_t * Q0 = F_mpz_alloc();
529
mpz_t * Q1 = F_mpz_alloc();
530
mpz_t * R0 = F_mpz_alloc();
531
mpz_t * R1 = F_mpz_alloc();
532
mpz_t * temp = F_mpz_alloc();
533
mpz_t * temp2 = F_mpz_alloc();
534
mpz_t * temp3 = F_mpz_alloc();
537
mpz_fdiv_q_2exp(*B1, B, FLINT_BITS*k);
538
mpz_fdiv_q_2exp(*A1, A, FLINT_BITS*2*k);
540
F_mpz_divrem_BZ(*Q1, *R1, *A1, *B1);
541
mpz_fdiv_r_2exp(*B0, B, FLINT_BITS*k);
542
mpz_fdiv_r_2exp(*A0, A, FLINT_BITS*2*k);
543
mpz_mul_2exp(*temp, *R1, FLINT_BITS*2*k);
544
mpz_add(*temp, *temp, *A0);
545
mpz_mul_2exp(*temp2, *Q1, FLINT_BITS*k);
546
mpz_mul(*temp2, *temp2, *B0);
547
mpz_sub(*temp, *temp, *temp2);
548
mpz_mul_2exp(*temp2, B, FLINT_BITS*k);
550
while (mpz_cmp_ui(*temp, 0) < 0)
552
mpz_sub_ui(*Q1, *Q1, 1);
553
mpz_add(*temp, *temp, *temp2);
555
mpz_fdiv_q_2exp(*temp2, *temp, FLINT_BITS*k);
556
F_mpz_divrem_BZ(*Q0, *R0, *temp2, *B1);
558
mpz_fdiv_r_2exp(*temp2, *temp, FLINT_BITS*k);
559
mpz_mul_2exp(R, *R0, FLINT_BITS*k);
560
mpz_add(R, R, *temp2);
561
mpz_submul(R, *Q0, *B0);
562
while (mpz_cmp_ui(R, 0) < 0)
567
F_mpz_release(); F_mpz_release(); F_mpz_release(); F_mpz_release();
568
F_mpz_release(); F_mpz_release(); F_mpz_release(); F_mpz_release();
569
F_mpz_release(); F_mpz_release(); F_mpz_release();
572
void F_mpz_mulmod_BZ(mpz_t res, mpz_t a, mpz_t b, mpz_t m)
574
mpz_t * temp = F_mpz_alloc();
576
mpz_mul(*temp, a, b);
577
F_mpz_rem_BZ(res, *temp, m);
583
void F_mpz_expmod_BZ(mpz_t res, mpz_t a, mpz_t exp, mpz_t m)
586
unsigned long bits = mpz_sizeinbase(exp, 2);
598
gmp_printf("powRED = %Zd\n", powRED);
601
for (unsigned long i = 0; i < bits - 1; i++)
603
if (mpz_tstbit(exp, i))
605
if (flag) F_mpz_mulmod_BZ(res, res, powRED, m);
608
mpz_set(res, powRED);
612
F_mpz_mulmod_BZ(powRED, powRED, powRED, m);
614
gmp_printf("powRED = %Zd\n", powRED);
618
if (flag) F_mpz_mulmod_BZ(res, res, powRED, m);
619
else mpz_set(res, powRED);
627
Large integer multiplication code
630
void __F_mpz_mul(mpz_t res, mpz_t a, mpz_t b, unsigned long twk)
632
unsigned long sa = mpz_size(a);
633
unsigned long sb = mpz_size(b);
635
if (sa+sb > FLINT_FFT_LIMBS_CROSSOVER)
637
unsigned long s1 = (FLINT_BIT_COUNT(a->_mp_d[sa-1]) + FLINT_BIT_COUNT(b->_mp_d[sb-1]) <= FLINT_BITS);
640
(mp_limb_t*) flint_stack_alloc(sa + sb - s1);
641
__F_mpn_mul(output, a->_mp_d, sa, b->_mp_d, sb, twk);
642
mpz_import(res, sa+sb-s1, -1, sizeof(mp_limb_t), 0, 0, output);
643
if (mpz_sgn(res) != mpz_sgn(a)*mpz_sgn(b)) mpz_neg(res,res);
644
flint_stack_release();
645
} else mpz_mul(res, a, b);
648
void F_mpz_mul(mpz_t res, mpz_t a, mpz_t b)
650
unsigned long sa = mpz_size(a);
651
unsigned long sb = mpz_size(b);
653
if (sa+sb > FLINT_FFT_LIMBS_CROSSOVER)
655
unsigned long s1 = (FLINT_BIT_COUNT(a->_mp_d[sa-1]) + FLINT_BIT_COUNT(b->_mp_d[sb-1]) <= FLINT_BITS);
657
(mp_limb_t*) flint_stack_alloc(sa + sb - s1);
658
F_mpn_mul(output, a->_mp_d, sa, b->_mp_d, sb);
659
mpz_import(res, sa+sb-s1, -1, sizeof(mp_limb_t), 0, 0, output);
660
if (mpz_sgn(res) != mpz_sgn(a)*mpz_sgn(b)) mpz_neg(res,res);
661
flint_stack_release();
662
} else mpz_mul(res, a, b);