6
6
THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
9
Copyright 1991, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002 Free Software
9
Copyright 1991, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003, 2005
10
Free Software Foundation, Inc.
12
12
This file is part of the GNU MP Library.
24
24
You should have received a copy of the GNU Lesser General Public License
25
25
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
26
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
27
MA 02111-1307, USA. */
26
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
27
MA 02110-1301, USA. */
30
30
#include "gmp-impl.h"
33
#ifndef MUL_BASECASE_MAX_UN
34
#define MUL_BASECASE_MAX_UN 500
32
37
/* Multiply the natural numbers u (pointed to by UP, with UN limbs) and v
33
38
(pointed to by VP, with VN limbs), and store the result at PRODP. The
34
39
result is UN + VN limbs. Return the most significant limb of the result.
41
46
2. PRODP != UP and PRODP != VP, i.e. the destination must be distinct from
42
47
the multiplier and the multiplicand. */
45
mpn_sqr_n (mp_ptr prodp,
46
mp_srcptr up, mp_size_t un)
49
ASSERT (! MPN_OVERLAP_P (prodp, 2*un, up, un));
51
/* FIXME: Can this be removed? */
55
if (BELOW_THRESHOLD (un, SQR_BASECASE_THRESHOLD))
56
{ /* mul_basecase is faster than sqr_basecase on small sizes sometimes */
57
mpn_mul_basecase (prodp, up, un, up, un);
59
else if (BELOW_THRESHOLD (un, SQR_KARATSUBA_THRESHOLD))
60
{ /* plain schoolbook multiplication */
61
mpn_sqr_basecase (prodp, up, un);
63
else if (BELOW_THRESHOLD (un, SQR_TOOM3_THRESHOLD))
64
{ /* karatsuba multiplication */
68
tspace = TMP_ALLOC_LIMBS (MPN_KARA_SQR_N_TSIZE (un));
69
mpn_kara_sqr_n (prodp, up, un, tspace);
72
#if WANT_FFT || TUNE_PROGRAM_BUILD
73
else if (BELOW_THRESHOLD (un, SQR_FFT_THRESHOLD))
77
{ /* Toom3 multiplication.
78
Use workspace from the heap, as stack may be limited. Since n is
79
at least MUL_TOOM3_THRESHOLD, the multiplication will take much
80
longer than malloc()/free(). */
83
tsize = MPN_TOOM3_SQR_N_TSIZE (un);
84
tspace = __GMP_ALLOCATE_FUNC_LIMBS (tsize);
85
mpn_toom3_sqr_n (prodp, up, un, tspace);
86
__GMP_FREE_FUNC_LIMBS (tspace, tsize);
88
#if WANT_FFT || TUNE_PROGRAM_BUILD
91
/* schoenhage multiplication */
92
mpn_mul_fft_full (prodp, up, un, up, un);
98
50
mpn_mul (mp_ptr prodp,
99
51
mp_srcptr up, mp_size_t un,
116
68
if (vn < MUL_KARATSUBA_THRESHOLD)
117
{ /* long multiplication */
118
mpn_mul_basecase (prodp, up, un, vp, vn);
69
{ /* plain schoolbook multiplication */
70
if (un <= MUL_BASECASE_MAX_UN)
71
mpn_mul_basecase (prodp, up, un, vp, vn);
74
/* We have un >> MUL_BASECASE_MAX_UN > vn. For better memory
75
locality, split up[] into MUL_BASECASE_MAX_UN pieces and multiply
76
these pieces with the vp[] operand. After each such partial
77
multiplication (but the last) we copy the most significant vn
78
limbs into a temporary buffer since that part would otherwise be
79
overwritten by the next multiplication. After the next
80
multiplication, we add it back. This illustrates the situation:
83
| |<------- un ------->|
84
_____________________|
86
/XX__________________/ |
87
_____________________ |
89
/XX__________________/ |
90
_____________________ |
92
/____________________/ |
93
==================================================================
95
The parts marked with X are the parts whose sums are copied into
96
the temporary buffer. */
98
mp_limb_t tp[MUL_KARATSUBA_THRESHOLD_LIMIT];
100
ASSERT (MUL_KARATSUBA_THRESHOLD <= MUL_KARATSUBA_THRESHOLD_LIMIT);
102
mpn_mul_basecase (prodp, up, MUL_BASECASE_MAX_UN, vp, vn);
103
prodp += MUL_BASECASE_MAX_UN;
104
MPN_COPY (tp, prodp, vn); /* preserve high triangle */
105
up += MUL_BASECASE_MAX_UN;
106
un -= MUL_BASECASE_MAX_UN;
107
while (un > MUL_BASECASE_MAX_UN)
109
mpn_mul_basecase (prodp, up, MUL_BASECASE_MAX_UN, vp, vn);
110
cy = mpn_add_n (prodp, prodp, tp, vn); /* add back preserved triangle */
111
mpn_incr_u (prodp + vn, cy); /* safe? */
112
prodp += MUL_BASECASE_MAX_UN;
113
MPN_COPY (tp, prodp, vn); /* preserve high triangle */
114
up += MUL_BASECASE_MAX_UN;
115
un -= MUL_BASECASE_MAX_UN;
119
mpn_mul_basecase (prodp, up, un, vp, vn);
123
ASSERT_ALWAYS (un > 0);
124
mpn_mul_basecase (prodp, vp, vn, up, un);
126
cy = mpn_add_n (prodp, prodp, tp, vn); /* add back preserved triangle */
127
mpn_incr_u (prodp + vn, cy); /* safe? */
129
return prodp[un + vn - 1];
132
if (ABOVE_THRESHOLD (vn, MUL_FFT_THRESHOLD))
134
mpn_mul_fft_full (prodp, up, un, vp, vn);
119
135
return prodp[un + vn - 1];