1
/* Include file for internal GNU MP types and definitions.
3
THE CONTENTS OF THIS FILE ARE FOR INTERNAL USE AND ARE ALMOST CERTAIN TO
4
BE SUBJECT TO INCOMPATIBLE CHANGES IN FUTURE GNU MP RELEASES.
6
Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1999, 2000 Free Software
9
This file is part of the GNU MP Library.
11
The GNU MP Library is free software; you can redistribute it and/or modify
12
it under the terms of the GNU Lesser General Public License as published by
13
the Free Software Foundation; either version 2.1 of the License, or (at your
14
option) any later version.
16
The GNU MP Library is distributed in the hope that it will be useful, but
17
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
18
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
19
License for more details.
21
You should have received a copy of the GNU Lesser General Public License
22
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
23
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
24
MA 02111-1307, USA. */
27
#include "gmp-mparam.h"
28
/* #include "longlong.h" */
30
/* When using gcc, make sure to use its builtin alloca. */
31
#if ! defined (alloca) && defined (__GNUC__)
32
#define alloca __builtin_alloca
36
/* When using cc, do whatever necessary to allow use of alloca. For many
37
machines, this means including alloca.h. IBM's compilers need a #pragma
38
in "each module that needs to use alloca". */
39
#if ! defined (alloca)
40
/* We need lots of variants for MIPS, to cover all versions and perversions
42
#if defined (__mips) || defined (MIPSEL) || defined (MIPSEB) \
43
|| defined (_MIPSEL) || defined (_MIPSEB) || defined (__sgi) \
44
|| defined (__alpha) || defined (__sparc) || defined (sparc) \
48
* Modification For OpenVMS By Robert Alan Byer <byer@mail.ourservers.net>
49
* OpenVMS DECC v6.0-001 dosen't have <alloca.h>.
51
#if !defined(__DECC) && !defined(__VMS)
62
#if defined (__DECC) && !defined(__VMS)
63
#define alloca(x) __ALLOCA(x)
72
#if ! defined (HAVE_ALLOCA) || USE_STACK_ALLOC
73
#include "stack-alloc.h"
76
#define TMP_ALLOC(x) alloca(x)
81
/* Allocating various types. */
82
#define TMP_ALLOC_TYPE(n,type) ((type *) TMP_ALLOC ((n) * sizeof (type)))
83
#define TMP_ALLOC_LIMBS(n) TMP_ALLOC_TYPE(n,mp_limb_t)
84
#define TMP_ALLOC_MP_PTRS(n) TMP_ALLOC_TYPE(n,mp_ptr)
87
#if ! defined (__GNUC__) /* FIXME: Test for C++ compilers here,
88
__DECC understands __inline */
89
#define inline /* Empty */
92
#define ABS(x) (x >= 0 ? x : -x)
93
#define MIN(l,o) ((l) < (o) ? (l) : (o))
94
#define MAX(h,i) ((h) > (i) ? (h) : (i))
95
#define numberof(x) (sizeof (x) / sizeof ((x)[0]))
97
/* Field access macros. */
98
#define SIZ(x) ((x)->_mp_size)
99
#define ABSIZ(x) ABS (SIZ (x))
100
#define PTR(x) ((x)->_mp_d)
101
#define LIMBS(x) ((x)->_mp_d)
102
#define EXP(x) ((x)->_mp_exp)
103
#define PREC(x) ((x)->_mp_prec)
104
#define ALLOC(x) ((x)->_mp_alloc)
106
/* Extra casts because shorts are promoted to ints by "~" and "<<". "-1"
107
rather than "1" in SIGNED_TYPE_MIN avoids warnings from some compilers
108
about arithmetic overflow. */
109
#define UNSIGNED_TYPE_MAX(type) ((type) ~ (type) 0)
110
#define UNSIGNED_TYPE_HIGHBIT(type) ((type) ~ (UNSIGNED_TYPE_MAX(type) >> 1))
111
#define SIGNED_TYPE_MIN(type) (((type) -1) << (8*sizeof(type)-1))
112
#define SIGNED_TYPE_MAX(type) ((type) ~ SIGNED_TYPE_MIN(type))
113
#define SIGNED_TYPE_HIGHBIT(type) SIGNED_TYPE_MIN(type)
115
#define MP_LIMB_T_MAX UNSIGNED_TYPE_MAX (mp_limb_t)
116
#define MP_LIMB_T_HIGHBIT UNSIGNED_TYPE_HIGHBIT (mp_limb_t)
118
#define MP_SIZE_T_MAX SIGNED_TYPE_MAX (mp_size_t)
121
#define ULONG_MAX UNSIGNED_TYPE_MAX (unsigned long)
123
#define ULONG_HIGHBIT UNSIGNED_TYPE_HIGHBIT (unsigned long)
124
#define LONG_HIGHBIT SIGNED_TYPE_HIGHBIT (long)
126
#define LONG_MAX SIGNED_TYPE_MAX (long)
130
#define USHORT_MAX UNSIGNED_TYPE_MAX (unsigned short)
132
#define USHORT_HIGHBIT UNSIGNED_TYPE_HIGHBIT (unsigned short)
133
#define SHORT_HIGHBIT SIGNED_TYPE_HIGHBIT (short)
135
#define SHORT_MAX SIGNED_TYPE_MAX (short)
141
#define MP_LIMB_T_SWAP(x, y) \
143
mp_limb_t __mp_limb_t_swap__tmp = (x); \
145
(y) = __mp_limb_t_swap__tmp; \
147
#define MP_SIZE_T_SWAP(x, y) \
149
mp_size_t __mp_size_t_swap__tmp = (x); \
151
(y) = __mp_size_t_swap__tmp; \
154
#define MP_PTR_SWAP(x, y) \
156
mp_ptr __mp_ptr_swap__tmp = (x); \
158
(y) = __mp_ptr_swap__tmp; \
160
#define MP_SRCPTR_SWAP(x, y) \
162
mp_srcptr __mp_srcptr_swap__tmp = (x); \
164
(y) = __mp_srcptr_swap__tmp; \
167
#define MPN_PTR_SWAP(xp,xs, yp,ys) \
169
MP_PTR_SWAP (xp, yp); \
170
MP_SIZE_T_SWAP (xs, ys); \
172
#define MPN_SRCPTR_SWAP(xp,xs, yp,ys) \
174
MP_SRCPTR_SWAP (xp, yp); \
175
MP_SIZE_T_SWAP (xs, ys); \
178
#define MPZ_PTR_SWAP(x, y) \
180
mpz_ptr __mpz_ptr_swap__tmp = (x); \
182
(y) = __mpz_ptr_swap__tmp; \
184
#define MPZ_SRCPTR_SWAP(x, y) \
186
mpz_srcptr __mpz_srcptr_swap__tmp = (x); \
188
(y) = __mpz_srcptr_swap__tmp; \
192
#if defined (__cplusplus)
196
/* FIXME: These are purely internal, so do a search and replace to change
197
them to __gmp forms, rather than using these macros. */
198
#define _mp_allocate_func __gmp_allocate_func
199
#define _mp_reallocate_func __gmp_reallocate_func
200
#define _mp_free_func __gmp_free_func
201
#define _mp_default_allocate __gmp_default_allocate
202
#define _mp_default_reallocate __gmp_default_reallocate
203
#define _mp_default_free __gmp_default_free
205
extern void * (*_mp_allocate_func) _PROTO ((size_t));
206
extern void * (*_mp_reallocate_func) _PROTO ((void *, size_t, size_t));
207
extern void (*_mp_free_func) _PROTO ((void *, size_t));
209
void *_mp_default_allocate _PROTO ((size_t));
210
void *_mp_default_reallocate _PROTO ((void *, size_t, size_t));
211
void _mp_default_free _PROTO ((void *, size_t));
213
#define _MP_ALLOCATE_FUNC_TYPE(n,type) \
214
((type *) (*_mp_allocate_func) ((n) * sizeof (type)))
215
#define _MP_ALLOCATE_FUNC_LIMBS(n) _MP_ALLOCATE_FUNC_TYPE(n,mp_limb_t)
217
#define _MP_FREE_FUNC_TYPE(p,n,type) (*_mp_free_func) (p, (n) * sizeof (type))
218
#define _MP_FREE_FUNC_LIMBS(p,n) _MP_FREE_FUNC_TYPE(p,n,mp_limb_t)
221
#if (__STDC__-0) || defined (__cplusplus)
225
#define const /* Empty */
226
#define signed /* Empty */
230
#if defined (__GNUC__) && defined (__i386__)
231
#if 0 /* check that these actually improve things */
232
#define MPN_COPY_INCR(DST, SRC, N) \
233
__asm__ ("cld\n\trep\n\tmovsl" : : \
234
"D" (DST), "S" (SRC), "c" (N) : \
235
"cx", "di", "si", "memory")
236
#define MPN_COPY_DECR(DST, SRC, N) \
237
__asm__ ("std\n\trep\n\tmovsl" : : \
238
"D" ((DST) + (N) - 1), "S" ((SRC) + (N) - 1), "c" (N) : \
239
"cx", "di", "si", "memory")
240
#define MPN_NORMALIZE_NOT_ZERO(P, N) \
242
__asm__ ("std\n\trepe\n\tscasl" : "=c" (N) : \
243
"a" (0), "D" ((P) + (N) - 1), "0" (N) : \
250
#if HAVE_NATIVE_mpn_copyi
251
#define mpn_copyi __MPN(copyi)
252
void mpn_copyi _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
255
/* Remap names of internal mpn functions. */
256
#define __clz_tab __MPN(clz_tab)
257
#define mpn_udiv_w_sdiv __MPN(udiv_w_sdiv)
258
#define mpn_reciprocal __MPN(reciprocal)
260
#define mpn_sb_divrem_mn __MPN(sb_divrem_mn)
261
#define mpn_bz_divrem_n __MPN(bz_divrem_n)
262
/* #define mpn_tdiv_q __MPN(tdiv_q) */
264
#define mpn_kara_mul_n __MPN(kara_mul_n)
265
void mpn_kara_mul_n _PROTO((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr));
267
#define mpn_kara_sqr_n __MPN(kara_sqr_n)
268
void mpn_kara_sqr_n _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
270
#define mpn_toom3_mul_n __MPN(toom3_mul_n)
271
void mpn_toom3_mul_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t,mp_ptr));
273
#define mpn_toom3_sqr_n __MPN(toom3_sqr_n)
274
void mpn_toom3_sqr_n _PROTO((mp_ptr, mp_srcptr, mp_size_t, mp_ptr));
276
#define mpn_fft_best_k __MPN(fft_best_k)
277
int mpn_fft_best_k _PROTO ((mp_size_t n, int sqr));
279
#define mpn_mul_fft __MPN(mul_fft)
280
void mpn_mul_fft _PROTO ((mp_ptr op, mp_size_t pl,
281
mp_srcptr n, mp_size_t nl,
282
mp_srcptr m, mp_size_t ml,
285
#define mpn_mul_fft_full __MPN(mul_fft_full)
286
void mpn_mul_fft_full _PROTO ((mp_ptr op,
287
mp_srcptr n, mp_size_t nl,
288
mp_srcptr m, mp_size_t ml));
290
#define mpn_fft_next_size __MPN(fft_next_size)
291
mp_size_t mpn_fft_next_size _PROTO ((mp_size_t pl, int k));
293
mp_limb_t mpn_sb_divrem_mn _PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_srcptr, mp_size_t));
294
mp_limb_t mpn_bz_divrem_n _PROTO ((mp_ptr, mp_ptr, mp_srcptr, mp_size_t));
295
/* void mpn_tdiv_q _PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)); */
297
/* Copy NLIMBS *limbs* from SRC to DST, NLIMBS==0 allowed. */
298
#ifndef MPN_COPY_INCR
299
#if HAVE_NATIVE_mpn_copyi
300
#define MPN_COPY_INCR(DST, SRC, NLIMBS) mpn_copyi (DST, SRC, NLIMBS)
302
#define MPN_COPY_INCR(DST, SRC, NLIMBS) \
305
for (__i = 0; __i < (NLIMBS); __i++) \
306
(DST)[__i] = (SRC)[__i]; \
311
#if HAVE_NATIVE_mpn_copyd
312
#define mpn_copyd __MPN(copyd)
313
void mpn_copyd _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
316
/* NLIMBS==0 allowed */
317
#ifndef MPN_COPY_DECR
318
#if HAVE_NATIVE_mpn_copyd
319
#define MPN_COPY_DECR(DST, SRC, NLIMBS) mpn_copyd (DST, SRC, NLIMBS)
321
#define MPN_COPY_DECR(DST, SRC, NLIMBS) \
324
for (__i = (NLIMBS) - 1; __i >= 0; __i--) \
325
(DST)[__i] = (SRC)[__i]; \
330
/* Define MPN_COPY for vector computers. Since #pragma cannot be in a macro,
331
rely on function inlining. */
332
#if defined (_CRAY) || defined (__uxp__)
334
_MPN_COPY (d, s, n) mp_ptr d; mp_srcptr s; mp_size_t n;
336
int i; /* Faster for Cray with plain int */
337
#pragma _CRI ivdep /* Cray PVP systems */
338
#pragma loop noalias d,s /* Fujitsu VPP systems */
339
for (i = 0; i < n; i++)
342
#define MPN_COPY _MPN_COPY
346
#define MPN_COPY MPN_COPY_INCR
349
/* Zero NLIMBS *limbs* AT DST. */
351
#define MPN_ZERO(DST, NLIMBS) \
354
for (__i = 0; __i < (NLIMBS); __i++) \
359
#ifndef MPN_NORMALIZE
360
#define MPN_NORMALIZE(DST, NLIMBS) \
364
if ((DST)[(NLIMBS) - 1] != 0) \
370
#ifndef MPN_NORMALIZE_NOT_ZERO
371
#define MPN_NORMALIZE_NOT_ZERO(DST, NLIMBS) \
375
if ((DST)[(NLIMBS) - 1] != 0) \
382
/* Strip least significant zero limbs from ptr,size by incrementing ptr and
383
decrementing size. The number in ptr,size must be non-zero, ie. size!=0
384
and somewhere a non-zero limb. */
385
#define MPN_STRIP_LOW_ZEROS_NOT_ZERO(ptr, size) \
388
ASSERT ((size) != 0); \
389
while ((ptr)[0] == 0) \
393
ASSERT (size >= 0); \
398
/* Initialize X of type mpz_t with space for NLIMBS limbs. X should be a
399
temporary variable; it will be automatically cleared out at function
400
return. We use __x here to make it possible to accept both mpz_ptr and
402
#define MPZ_TMP_INIT(X, NLIMBS) \
405
__x->_mp_alloc = (NLIMBS); \
406
__x->_mp_d = (mp_ptr) TMP_ALLOC ((NLIMBS) * BYTES_PER_MP_LIMB); \
409
/* Realloc for an mpz_t WHAT if it has less thann NEEDED limbs. */
410
#define MPZ_REALLOC(what,needed) \
412
if ((needed) > ALLOC (what)) \
413
_mpz_realloc (what, needed); \
416
/* If KARATSUBA_MUL_THRESHOLD is not already defined, define it to a
417
value which is good on most machines. */
418
#ifndef KARATSUBA_MUL_THRESHOLD
419
#define KARATSUBA_MUL_THRESHOLD 32
422
/* If TOOM3_MUL_THRESHOLD is not already defined, define it to a
423
value which is good on most machines. */
424
#ifndef TOOM3_MUL_THRESHOLD
425
#define TOOM3_MUL_THRESHOLD 256
428
#ifndef KARATSUBA_SQR_THRESHOLD
429
#define KARATSUBA_SQR_THRESHOLD (2*KARATSUBA_MUL_THRESHOLD)
432
#ifndef TOOM3_SQR_THRESHOLD
433
#define TOOM3_SQR_THRESHOLD (2*TOOM3_MUL_THRESHOLD)
436
/* First k to use for an FFT modF multiply. A modF FFT is an order
437
log(2^k)/log(2^(k-1)) algorithm, so k=3 is merely 1.5 like karatsuba,
438
whereas k=4 is 1.33 which is faster than toom3 at 1.485. */
439
#define FFT_FIRST_K 4
441
/* Threshold at which FFT should be used to do a modF NxN -> N multiply. */
442
#ifndef FFT_MODF_MUL_THRESHOLD
443
#define FFT_MODF_MUL_THRESHOLD (TOOM3_MUL_THRESHOLD * 3)
445
#ifndef FFT_MODF_SQR_THRESHOLD
446
#define FFT_MODF_SQR_THRESHOLD (TOOM3_SQR_THRESHOLD * 3)
449
/* Threshold at which FFT should be used to do an NxN -> 2N multiply. This
450
will be a size where FFT is using k=7 or k=8, since an FFT-k used for an
451
NxN->2N multiply and not recursing into itself is an order
452
log(2^k)/log(2^(k-2)) algorithm, so it'll be at least k=7 at 1.39 which
453
is the first better than toom3. */
454
#ifndef FFT_MUL_THRESHOLD
455
#define FFT_MUL_THRESHOLD (FFT_MODF_MUL_THRESHOLD * 10)
457
#ifndef FFT_SQR_THRESHOLD
458
#define FFT_SQR_THRESHOLD (FFT_MODF_SQR_THRESHOLD * 10)
461
/* Table of thresholds for successive modF FFT "k"s. The first entry is
462
where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2,
463
etc. See mpn_fft_best_k(). */
464
#ifndef FFT_MUL_TABLE
465
#define FFT_MUL_TABLE \
466
{ TOOM3_MUL_THRESHOLD * 4, /* k=5 */ \
467
TOOM3_MUL_THRESHOLD * 8, /* k=6 */ \
468
TOOM3_MUL_THRESHOLD * 16, /* k=7 */ \
469
TOOM3_MUL_THRESHOLD * 32, /* k=8 */ \
470
TOOM3_MUL_THRESHOLD * 96, /* k=9 */ \
471
TOOM3_MUL_THRESHOLD * 288, /* k=10 */ \
474
#ifndef FFT_SQR_TABLE
475
#define FFT_SQR_TABLE \
476
{ TOOM3_SQR_THRESHOLD * 4, /* k=5 */ \
477
TOOM3_SQR_THRESHOLD * 8, /* k=6 */ \
478
TOOM3_SQR_THRESHOLD * 16, /* k=7 */ \
479
TOOM3_SQR_THRESHOLD * 32, /* k=8 */ \
480
TOOM3_SQR_THRESHOLD * 96, /* k=9 */ \
481
TOOM3_SQR_THRESHOLD * 288, /* k=10 */ \
485
#ifndef FFT_TABLE_ATTRS
486
#define FFT_TABLE_ATTRS static const
489
#define MPN_FFT_TABLE_SIZE 16
492
/* Return non-zero if xp,xsize and yp,ysize overlap.
493
If xp+xsize<=yp there's no overlap, or if yp+ysize<=xp there's no
494
overlap. If both these are false, there's an overlap. */
495
#define MPN_OVERLAP_P(xp, xsize, yp, ysize) \
496
((xp) + (xsize) > (yp) && (yp) + (ysize) > (xp))
499
/* ASSERT() is a private assertion checking scheme, similar to <assert.h>.
500
ASSERT() does the check only if WANT_ASSERT is selected, ASSERT_ALWAYS()
501
does it always. Generally assertions are meant for development, but
502
might help when looking for a problem later too.
504
ASSERT_NOCARRY() uses ASSERT() to check the expression is zero, but if
505
assertion checking is disabled, the expression is still evaluated. This
506
is meant for use with routines like mpn_add_n() where the return value
507
represents a carry or whatever that shouldn't occur. For example,
508
ASSERT_NOCARRY (mpn_add_n (rp, s1p, s2p, size)); */
511
#define ASSERT_LINE __LINE__
513
#define ASSERT_LINE -1
517
#define ASSERT_FILE __FILE__
519
#define ASSERT_FILE ""
522
int __gmp_assert_fail _PROTO((const char *filename, int linenum,
526
#define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, #expr)
528
#define ASSERT_FAIL(expr) __gmp_assert_fail (ASSERT_FILE, ASSERT_LINE, "expr")
532
#define CAST_TO_VOID (void)
537
#define ASSERT_ALWAYS(expr) ((expr) ? 0 : ASSERT_FAIL (expr))
540
#define ASSERT(expr) ASSERT_ALWAYS (expr)
541
#define ASSERT_NOCARRY(expr) ASSERT_ALWAYS ((expr) == 0)
544
#define ASSERT(expr) (CAST_TO_VOID 0)
545
#define ASSERT_NOCARRY(expr) (expr)
549
#if HAVE_NATIVE_mpn_com_n
550
#define mpn_com_n __MPN(com_n)
551
void mpn_com_n _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
553
#define mpn_com_n(d,s,n) \
557
mp_srcptr __s = (s); \
558
mp_size_t __n = (n); \
566
#define MPN_LOGOPS_N_INLINE(d,s1,s2,n,dop,op,s2op) \
570
mp_srcptr __s1 = (s1); \
571
mp_srcptr __s2 = (s2); \
572
mp_size_t __n = (n); \
574
*__d++ = dop (*__s1++ op s2op *__s2++); \
579
#if HAVE_NATIVE_mpn_and_n
580
#define mpn_and_n __MPN(and_n)
581
void mpn_and_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
583
#define mpn_and_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,&, )
586
#if HAVE_NATIVE_mpn_andn_n
587
#define mpn_andn_n __MPN(andn_n)
588
void mpn_andn_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
590
#define mpn_andn_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,&,~)
593
#if HAVE_NATIVE_mpn_nand_n
594
#define mpn_nand_n __MPN(nand_n)
595
void mpn_nand_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
597
#define mpn_nand_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n,~,&, )
600
#if HAVE_NATIVE_mpn_ior_n
601
#define mpn_ior_n __MPN(ior_n)
602
void mpn_ior_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
604
#define mpn_ior_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,|, )
607
#if HAVE_NATIVE_mpn_iorn_n
608
#define mpn_iorn_n __MPN(iorn_n)
609
void mpn_iorn_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
611
#define mpn_iorn_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,|,~)
614
#if HAVE_NATIVE_mpn_nior_n
615
#define mpn_nior_n __MPN(nior_n)
616
void mpn_nior_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
618
#define mpn_nior_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n,~,|, )
621
#if HAVE_NATIVE_mpn_xor_n
622
#define mpn_xor_n __MPN(xor_n)
623
void mpn_xor_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
625
#define mpn_xor_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n, ,^, )
628
#if HAVE_NATIVE_mpn_xnor_n
629
#define mpn_xnor_n __MPN(xnor_n)
630
void mpn_xnor_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t));
632
#define mpn_xnor_n(d,s1,s2,n) MPN_LOGOPS_N_INLINE(d,s1,s2,n,~,^, )
635
/* Structure for conversion between internal binary format and
636
strings in base 2..36. */
639
/* Number of digits in the conversion base that always fits in an mp_limb_t.
640
For example, for base 10 on a machine where a mp_limb_t has 32 bits this
641
is 9, since 10**9 is the largest number that fits into a mp_limb_t. */
644
/* log(2)/log(conversion_base) */
645
double chars_per_bit_exactly;
647
/* base**chars_per_limb, i.e. the biggest number that fits a word, built by
648
factors of base. Exception: For 2, 4, 8, etc, big_base is log2(base),
649
i.e. the number of bits used to represent each digit in the base. */
652
/* A BITS_PER_MP_LIMB bit approximation to 1/big_base, represented as a
653
fixed-point number. Instead of dividing by big_base an application can
654
choose to multiply by big_base_inverted. */
655
mp_limb_t big_base_inverted;
658
#define __mp_bases __MPN(mp_bases)
659
extern const struct bases __mp_bases[];
660
extern mp_size_t __gmp_default_fp_limb_precision;
662
#if defined (__i386__)
663
#define TARGET_REGISTER_STARVED 1
665
#define TARGET_REGISTER_STARVED 0
668
/* Use a library function for invert_limb, if available. */
669
#if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb
670
#define mpn_invert_limb __MPN(invert_limb)
671
mp_limb_t mpn_invert_limb _PROTO ((mp_limb_t));
672
#define invert_limb(invxl,xl) (invxl = __MPN(invert_limb) (xl))
676
#define invert_limb(invxl,xl) \
680
invxl = ~(mp_limb_t) 0; \
682
udiv_qrnnd (invxl, dummy, -xl, 0, xl); \
686
/* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
687
limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
688
If this would yield overflow, DI should be the largest possible number
689
(i.e., only ones). For correct operation, the most significant bit of D
690
has to be set. Put the quotient in Q and the remainder in R. */
691
#define udiv_qrnnd_preinv(q, r, nh, nl, d, di) \
693
mp_limb_t _q, _ql, _r; \
694
mp_limb_t _xh, _xl; \
695
umul_ppmm (_q, _ql, (nh), (di)); \
696
_q += (nh); /* DI is 2**BITS_PER_MP_LIMB too small */\
697
umul_ppmm (_xh, _xl, _q, (d)); \
698
sub_ddmmss (_xh, _r, (nh), (nl), _xh, _xl); \
701
sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \
705
sub_ddmmss (_xh, _r, _xh, _r, 0, (d)); \
717
/* Like udiv_qrnnd_preinv, but for for any value D. DNORM is D shifted left
718
so that its most significant bit is set. LGUP is ceil(log2(D)). */
719
#define udiv_qrnnd_preinv2gen(q, r, nh, nl, d, di, dnorm, lgup) \
721
mp_limb_t _n2, _n10, _n1, _nadj, _q1; \
722
mp_limb_t _xh, _xl; \
723
_n2 = ((nh) << (BITS_PER_MP_LIMB - (lgup))) + ((nl) >> 1 >> (l - 1));\
724
_n10 = (nl) << (BITS_PER_MP_LIMB - (lgup)); \
725
_n1 = ((mp_limb_signed_t) _n10 >> (BITS_PER_MP_LIMB - 1)); \
726
_nadj = _n10 + (_n1 & (dnorm)); \
727
umul_ppmm (_xh, _xl, di, _n2 - _n1); \
728
add_ssaaaa (_xh, _xl, _xh, _xl, 0, _nadj); \
729
_q1 = ~(_n2 + _xh); \
730
umul_ppmm (_xh, _xl, _q1, d); \
731
add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \
733
(r) = _xl + ((d) & _xh); \
736
/* Exactly like udiv_qrnnd_preinv, but branch-free. It is not clear which
738
#define udiv_qrnnd_preinv2norm(q, r, nh, nl, d, di) \
740
mp_limb_t _n2, _n10, _n1, _nadj, _q1; \
741
mp_limb_t _xh, _xl; \
744
_n1 = ((mp_limb_signed_t) _n10 >> (BITS_PER_MP_LIMB - 1)); \
745
_nadj = _n10 + (_n1 & (d)); \
746
umul_ppmm (_xh, _xl, di, _n2 - _n1); \
747
add_ssaaaa (_xh, _xl, _xh, _xl, 0, _nadj); \
748
_q1 = ~(_n2 + _xh); \
749
umul_ppmm (_xh, _xl, _q1, d); \
750
add_ssaaaa (_xh, _xl, _xh, _xl, nh, nl); \
752
(r) = _xl + ((d) & _xh); \
757
/* modlimb_invert() sets "inv" to the multiplicative inverse of "n" modulo
758
2^BITS_PER_MP_LIMB, ie. so that inv*n == 1 mod 2^BITS_PER_MP_LIMB.
759
"n" must be odd (otherwise such an inverse doesn't exist).
761
This is not to be confused with invert_limb(), which is completely
764
The table lookup gives an inverse with the low 8 bits valid, and each
765
multiply step doubles the number of bits. See Jebelean's exact division
766
paper, end of section 4 (reference in gmp.texi). */
768
#define modlimb_invert_table __gmp_modlimb_invert_table
769
extern const unsigned char modlimb_invert_table[128];
771
#if BITS_PER_MP_LIMB <= 32
772
#define modlimb_invert(inv,n) \
774
mp_limb_t __n = (n); \
776
ASSERT ((__n & 1) == 1); \
777
__inv = modlimb_invert_table[(__n&0xFF)/2]; /* 8 */ \
778
__inv = 2 * __inv - __inv * __inv * __n; /* 16 */ \
779
__inv = 2 * __inv - __inv * __inv * __n; /* 32 */ \
780
ASSERT (__inv * __n == 1); \
785
#if BITS_PER_MP_LIMB > 32 && BITS_PER_MP_LIMB <= 64
786
#define modlimb_invert(inv,n) \
788
mp_limb_t __n = (n); \
790
ASSERT ((__n & 1) == 1); \
791
__inv = modlimb_invert_table[(__n&0xFF)/2]; /* 8 */ \
792
__inv = 2 * __inv - __inv * __inv * __n; /* 16 */ \
793
__inv = 2 * __inv - __inv * __inv * __n; /* 32 */ \
794
__inv = 2 * __inv - __inv * __inv * __n; /* 64 */ \
795
ASSERT (__inv * __n == 1); \
801
/* The `mode' attribute was introduced in GCC 2.2, but we can only distinguish
802
between GCC 2 releases from 2.5, since __GNUC_MINOR__ wasn't introduced
804
#if (__GNUC__ - 0 > 2 || defined (__GNUC_MINOR__)) && ! defined (__APPLE_CC__)
805
/* Define stuff for longlong.h. */
806
typedef unsigned int UQItype __attribute__ ((mode (QI)));
807
typedef int SItype __attribute__ ((mode (SI)));
808
typedef unsigned int USItype __attribute__ ((mode (SI)));
809
typedef int DItype __attribute__ ((mode (DI)));
810
typedef unsigned int UDItype __attribute__ ((mode (DI)));
812
typedef unsigned char UQItype;
814
typedef unsigned long USItype;
815
#if defined _LONGLONG || defined _LONG_LONG_LIMB
816
typedef long long int DItype;
817
typedef unsigned long long int UDItype;
818
#else /* Assume `long' gives us a wide enough type. Needed for hppa2.0w. */
819
typedef long int DItype;
820
typedef unsigned long int UDItype;
824
typedef mp_limb_t UWtype;
825
typedef unsigned int UHWtype;
826
#define W_TYPE_SIZE BITS_PER_MP_LIMB
828
/* Define ieee_double_extract and _GMP_IEEE_FLOATS. */
830
#if (defined (__arm__) && (defined (__ARMWEL__) || defined (__linux__)))
831
/* Special case for little endian ARM since floats remain in big-endian. */
832
#define _GMP_IEEE_FLOATS 1
833
union ieee_double_extract
837
unsigned int manh:20;
840
unsigned int manl:32;
845
#if defined (_LITTLE_ENDIAN) || defined (__LITTLE_ENDIAN__) \
846
|| defined (__alpha) \
847
|| defined (__clipper__) \
848
|| defined (__cris) \
849
|| defined (__i386__) \
850
|| defined (__i860__) \
851
|| defined (__i960__) \
852
|| defined (MIPSEL) || defined (_MIPSEL) \
853
|| defined (__ns32000__) \
854
|| defined (__WINNT) || defined (_WIN32)
855
#define _GMP_IEEE_FLOATS 1
856
union ieee_double_extract
860
unsigned int manl:32;
861
unsigned int manh:20;
867
#else /* Need this as an #else since the tests aren't made exclusive. */
868
#if defined (_BIG_ENDIAN) || defined (__BIG_ENDIAN__) \
869
|| defined (__a29k__) || defined (_AM29K) \
870
|| defined (__arm__) \
871
|| (defined (__convex__) && defined (_IEEE_FLOAT_)) \
872
|| defined (_CRAYMPP) \
873
|| defined (__i370__) || defined (__mvs__) \
874
|| defined (__mc68000__) || defined (__mc68020__) || defined (__m68k__)\
875
|| defined(mc68020) \
876
|| defined (__m88000__) \
877
|| defined (MIPSEB) || defined (_MIPSEB) \
878
|| defined (__hppa) || defined (__hppa__) \
879
|| defined (__pyr__) \
880
|| defined (__ibm032__) \
881
|| defined (_IBMR2) || defined (_ARCH_PPC) \
882
|| defined (__sh__) \
883
|| defined (__sparc) || defined (sparc) \
884
|| defined (__we32k__)
885
#define _GMP_IEEE_FLOATS 1
886
union ieee_double_extract
892
unsigned int manh:20;
893
unsigned int manl:32;
901
/* Using "(2.0 * ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 1)))" doesn't work on
902
SunOS 4.1.4 native /usr/ucb/cc (K&R), it comes out as -4294967296.0,
903
presumably due to treating the mp_limb_t constant as signed rather than
905
#define MP_BASE_AS_DOUBLE (4.0 * ((mp_limb_t) 1 << (BITS_PER_MP_LIMB - 2)))
906
#if BITS_PER_MP_LIMB == 64
907
#define LIMBS_PER_DOUBLE 2
909
#define LIMBS_PER_DOUBLE 3
912
double __gmp_scale2 _PROTO ((double, int));
913
int __gmp_extract_double _PROTO ((mp_ptr, double));
915
extern int __gmp_junk;
916
extern const int __gmp_0;
917
#define GMP_ERROR(code) (gmp_errno |= (code), __gmp_junk = 10/__gmp_0)
918
#define DIVIDE_BY_ZERO GMP_ERROR(GMP_ERROR_DIVISION_BY_ZERO)
919
#define SQRT_OF_NEGATIVE GMP_ERROR(GMP_ERROR_SQRT_OF_NEGATIVE)
921
#if defined _LONG_LONG_LIMB
922
#if defined (__STDC__)
923
#define CNST_LIMB(C) C##LL
925
#define CNST_LIMB(C) C/**/LL
927
#else /* not _LONG_LONG_LIMB */
928
#if defined (__STDC__)
929
#define CNST_LIMB(C) C##L
931
#define CNST_LIMB(C) C/**/L
933
#endif /* _LONG_LONG_LIMB */
935
/*** Stuff used by mpn/generic/prefsqr.c and mpn/generic/next_prime.c ***/
936
#if BITS_PER_MP_LIMB == 32
937
#define PP 0xC0CFD797L /* 3 x 5 x 7 x 11 x 13 x ... x 29 */
938
#define PP_INVERTED 0x53E5645CL
939
#define PP_MAXPRIME 29
940
#define PP_MASK 0x208A28A8L
943
#if BITS_PER_MP_LIMB == 64
944
#define PP CNST_LIMB(0xE221F97C30E94E1D) /* 3 x 5 x 7 x 11 x 13 x ... x 53 */
945
#define PP_INVERTED CNST_LIMB(0x21CFE6CFC938B36B)
946
#define PP_MAXPRIME 53
947
#define PP_MASK CNST_LIMB(0x208A20A08A28A8)
951
/* BIT1 means a result value in bit 1 (second least significant bit), with a
952
zero bit representing +1 and a one bit representing -1. Bits other than
955
JACOBI_TWOS_U_BIT1 and JACOBI_RECIP_UU_BIT1 are used in mpn_jacobi_base
956
and their speed is important. Expressions are used rather than
957
conditionals to accumulate sign changes, which effectively means XORs
958
instead of conditional JUMPs. */
960
/* (a/0), with a signed; is 1 if a=+/-1, 0 otherwise */
961
#define JACOBI_S0(a) \
962
(((a) == 1) | ((a) == -1))
964
/* (a/0), with a unsigned; is 1 if a=+/-1, 0 otherwise */
965
#define JACOBI_U0(a) \
968
/* (a/0), with a an mpz_t; is 1 if a=+/-1, 0 otherwise
969
An mpz_t always has at least one limb of allocated space, so the fetch of
970
the low limb is valid. */
971
#define JACOBI_Z0(a) \
972
(((SIZ(a) == 1) | (SIZ(a) == -1)) & (PTR(a)[0] == 1))
974
/* Convert a bit1 to +1 or -1. */
975
#define JACOBI_BIT1_TO_PN(result_bit1) \
976
(1 - ((result_bit1) & 2))
978
/* (2/b), with b unsigned and odd;
979
is (-1)^((b^2-1)/8) which is 1 if b==1,7mod8 or -1 if b==3,5mod8 and
980
hence obtained from (b>>1)^b */
981
#define JACOBI_TWO_U_BIT1(b) \
982
(ASSERT (b & 1), (((b) >> 1) ^ (b)))
984
/* (2/b)^twos, with b unsigned and odd */
985
#define JACOBI_TWOS_U_BIT1(twos, b) \
986
(((twos) << 1) & JACOBI_TWO_U_BIT1 (b))
988
/* (2/b)^twos, with b unsigned and odd */
989
#define JACOBI_TWOS_U(twos, b) \
990
(JACOBI_BIT1_TO_PN (JACOBI_TWOS_U_BIT1 (twos, b)))
992
/* (a/b) effect due to sign of a: signed/unsigned, b odd;
993
is (-1)^((b-1)/2) if a<0, or +1 if a>=0 */
994
#define JACOBI_ASGN_SU_BIT1(a, b) \
995
((((a) < 0) << 1) & (b))
997
/* (a/b) effect due to sign of b: signed/mpz;
998
is -1 if a and b both negative, +1 otherwise */
999
#define JACOBI_BSGN_SZ_BIT1(a, b) \
1000
((((a) < 0) & (SIZ(b) < 0)) << 1)
1002
/* (a/b) effect due to sign of b: mpz/signed */
1003
#define JACOBI_BSGN_ZS_BIT1(a, b) \
1004
JACOBI_BSGN_SZ_BIT1(b, a)
1006
/* (a/b) reciprocity to switch to (b/a), a,b both unsigned and odd.
1007
Is (-1)^((a-1)*(b-1)/4), which means +1 if either a,b==1mod4 or -1 if
1008
both a,b==3mod4, achieved in bit 1 by a&b. No ASSERT()s about a,b odd
1009
because this is used in a couple of places with only bit 1 of a or b
1011
#define JACOBI_RECIP_UU_BIT1(a, b) \
1015
/* For testing and debugging. */
1016
#define MPZ_CHECK_FORMAT(z) \
1017
(ASSERT_ALWAYS (SIZ(z) == 0 || PTR(z)[ABSIZ(z) - 1] != 0), \
1018
ASSERT_ALWAYS (ALLOC(z) >= ABSIZ(z)))
1019
#define MPZ_PROVOKE_REALLOC(z) \
1020
do { ALLOC(z) = ABSIZ(z); } while (0)
1023
#if TUNE_PROGRAM_BUILD
1024
/* Some extras wanted when recompiling some .c files for use by the tune
1025
program. Not part of a normal build. */
1027
extern mp_size_t mul_threshold[];
1028
extern mp_size_t fft_modf_mul_threshold;
1029
extern mp_size_t sqr_threshold[];
1030
extern mp_size_t fft_modf_sqr_threshold;
1031
extern mp_size_t bz_threshold[];
1032
extern mp_size_t fib_threshold[];
1033
extern mp_size_t powm_threshold[];
1034
extern mp_size_t gcd_accel_threshold[];
1035
extern mp_size_t gcdext_threshold[];
1037
#undef KARATSUBA_MUL_THRESHOLD
1038
#undef TOOM3_MUL_THRESHOLD
1039
#undef FFT_MUL_TABLE
1040
#undef FFT_MUL_THRESHOLD
1041
#undef FFT_MODF_MUL_THRESHOLD
1042
#undef KARATSUBA_SQR_THRESHOLD
1043
#undef TOOM3_SQR_THRESHOLD
1044
#undef FFT_SQR_TABLE
1045
#undef FFT_SQR_THRESHOLD
1046
#undef FFT_MODF_SQR_THRESHOLD
1048
#undef FIB_THRESHOLD
1049
#undef POWM_THRESHOLD
1050
#undef GCD_ACCEL_THRESHOLD
1051
#undef GCDEXT_THRESHOLD
1053
#define KARATSUBA_MUL_THRESHOLD mul_threshold[0]
1054
#define TOOM3_MUL_THRESHOLD mul_threshold[1]
1055
#define FFT_MUL_TABLE 0
1056
#define FFT_MUL_THRESHOLD mul_threshold[2]
1057
#define FFT_MODF_MUL_THRESHOLD fft_modf_mul_threshold
1058
#define KARATSUBA_SQR_THRESHOLD sqr_threshold[0]
1059
#define TOOM3_SQR_THRESHOLD sqr_threshold[1]
1060
#define FFT_SQR_TABLE 0
1061
#define FFT_SQR_THRESHOLD sqr_threshold[2]
1062
#define FFT_MODF_SQR_THRESHOLD fft_modf_sqr_threshold
1063
#define BZ_THRESHOLD bz_threshold[0]
1064
#define FIB_THRESHOLD fib_threshold[0]
1065
#define POWM_THRESHOLD powm_threshold[0]
1066
#define GCD_ACCEL_THRESHOLD gcd_accel_threshold[0]
1067
#define GCDEXT_THRESHOLD gcdext_threshold[0]
1069
#define TOOM3_MUL_THRESHOLD_LIMIT 700
1071
#undef FFT_TABLE_ATTRS
1072
#define FFT_TABLE_ATTRS
1073
extern mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE];
1075
#endif /* TUNE_PROGRAM_BUILD */
1077
#if defined (__cplusplus)