1
/* TomsFastMath, a fast ISO C bignum library.
3
* This project is meant to fill in where LibTomMath
4
* falls short. That is speed ;-)
6
* This project is public domain and free for all purposes.
8
* Tom St Denis, tomstdenis@gmail.com
15
#include "bignum_fast.h"
17
#if defined(TFM_PRESCOTT) && defined(TFM_SSE2)
22
/* these are the combas. Worship them. */
24
/* Generic x86 optimized code */
26
/* anything you need at the start */
29
/* clear the chaining variables */
33
/* forward the carry to the next digit */
34
#define COMBA_FORWARD \
35
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
37
/* store the first sum */
38
#define COMBA_STORE(x) \
41
/* store the second sum [carry] */
42
#define COMBA_STORE2(x) \
45
/* anything you need at the end */
48
/* this should multiply i and j */
49
#define MULADD(i, j) \
51
"movl %6,%%eax \n\t" \
53
"addl %%eax,%0 \n\t" \
54
"adcl %%edx,%1 \n\t" \
56
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","%edx","cc");
58
#elif defined(TFM_X86_64)
59
/* x86-64 optimized */
61
/* anything you need at the start */
64
/* clear the chaining variables */
68
/* forward the carry to the next digit */
69
#define COMBA_FORWARD \
70
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
72
/* store the first sum */
73
#define COMBA_STORE(x) \
76
/* store the second sum [carry] */
77
#define COMBA_STORE2(x) \
80
/* anything you need at the end */
83
/* this should multiply i and j */
84
#define MULADD(i, j) \
86
"movq %6,%%rax \n\t" \
88
"addq %%rax,%0 \n\t" \
89
"adcq %%rdx,%1 \n\t" \
91
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) :"%rax","%rdx","cc");
93
#elif defined(TFM_SSE2)
94
/* use SSE2 optimizations */
96
/* anything you need at the start */
99
/* clear the chaining variables */
100
#define COMBA_CLEAR \
103
/* forward the carry to the next digit */
104
#define COMBA_FORWARD \
105
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
107
/* store the first sum */
108
#define COMBA_STORE(x) \
111
/* store the second sum [carry] */
112
#define COMBA_STORE2(x) \
115
/* anything you need at the end */
119
/* this should multiply i and j */
120
#define MULADD(i, j) \
122
"movd %6,%%mm0 \n\t" \
123
"movd %7,%%mm1 \n\t" \
124
"pmuludq %%mm1,%%mm0\n\t" \
125
"movd %%mm0,%%eax \n\t" \
126
"psrlq $32,%%mm0 \n\t" \
127
"addl %%eax,%0 \n\t" \
128
"movd %%mm0,%%eax \n\t" \
129
"adcl %%eax,%1 \n\t" \
131
:"=r"(c0), "=r"(c1), "=r"(c2): "0"(c0), "1"(c1), "2"(c2), "m"(i), "m"(j) :"%eax","cc");
133
#elif defined(TFM_ARM)
138
#define COMBA_CLEAR \
141
#define COMBA_FORWARD \
142
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
144
#define COMBA_STORE(x) \
147
#define COMBA_STORE2(x) \
152
#define MULADD(i, j) \
154
" UMULL r0,r1,%6,%7 \n\t" \
155
" ADDS %0,%0,r0 \n\t" \
156
" ADCS %1,%1,r1 \n\t" \
157
" ADC %2,%2,#0 \n\t" \
158
:"=r"(c0), "=r"(c1), "=r"(c2) : "0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j) : "r0", "r1", "cc");
160
#elif defined(TFM_PPC32)
165
#define COMBA_CLEAR \
168
#define COMBA_FORWARD \
169
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
171
#define COMBA_STORE(x) \
174
#define COMBA_STORE2(x) \
179
/* untested: will mulhwu change the flags? Docs say no */
180
#define MULADD(i, j) \
182
" mullw 16,%6,%7 \n\t" \
183
" addc %0,%0,16 \n\t" \
184
" mulhwu 16,%6,%7 \n\t" \
185
" adde %1,%1,16 \n\t" \
186
" addze %2,%2 \n\t" \
187
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"16");
189
#elif defined(TFM_PPC64)
194
#define COMBA_CLEAR \
197
#define COMBA_FORWARD \
198
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
200
#define COMBA_STORE(x) \
203
#define COMBA_STORE2(x) \
208
/* untested: will mulhdu change the flags? Docs say no */
209
#define MULADD(i, j) \
211
" mulld r16,%6,%7 \n\t" \
212
" addc %0,%0,16 \n\t" \
213
" mulhdu r16,%6,%7 \n\t" \
214
" adde %1,%1,16 \n\t" \
215
" addze %2,%2 \n\t" \
216
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r16");
218
#elif defined(TFM_AVR32)
224
#define COMBA_CLEAR \
227
#define COMBA_FORWARD \
228
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
230
#define COMBA_STORE(x) \
233
#define COMBA_STORE2(x) \
238
#define MULADD(i, j) \
240
" mulu.d r2,%6,%7 \n\t"\
242
" adc %1,%1,r3 \n\t"\
244
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"r2","r3");
246
#elif defined(TFM_MIPS)
250
#define COMBA_CLEAR \
253
#define COMBA_FORWARD \
254
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
256
#define COMBA_STORE(x) \
259
#define COMBA_STORE2(x) \
264
#define MULADD(i, j) \
266
" multu %6,%7 \n\t" \
269
" addu %0,%0,$12 \n\t" \
270
" sltu $12,%0,$12 \n\t" \
271
" addu %1,%1,$13 \n\t" \
272
" sltu $13,%1,$13 \n\t" \
273
" addu %1,%1,$12 \n\t" \
274
" sltu $12,%1,$12 \n\t" \
275
" addu %2,%2,$13 \n\t" \
276
" addu %2,%2,$12 \n\t" \
277
:"=r"(c0), "=r"(c1), "=r"(c2):"0"(c0), "1"(c1), "2"(c2), "r"(i), "r"(j):"$12","$13");
284
#define COMBA_CLEAR \
287
#define COMBA_FORWARD \
288
do { c0 = c1; c1 = c2; c2 = 0; } while (0);
290
#define COMBA_STORE(x) \
293
#define COMBA_STORE2(x) \
298
#define MULADD(i, j) \
300
t = (fp_word)c0 + ((fp_word)i) * ((fp_word)j); c0 = t; \
301
t = (fp_word)c1 + (t >> DIGIT_BIT); c1 = t; c2 += t >> DIGIT_BIT; \
308
/* generic PxQ multiplier */
309
void fp_mul_comba(fp_int *A, fp_int *B, fp_int *C)
311
int ix, iy, iz, tx, ty, pa;
312
fp_digit c0, c1, c2, *tmpx, *tmpy;
318
/* get size of output and trim */
319
pa = A->used + B->used;
324
if (A == C || B == C) {
332
for (ix = 0; ix < pa; ix++) {
333
/* get offsets into the two bignums */
334
ty = MIN(ix, B->used-1);
337
/* setup temp aliases */
341
/* this is the number of times the loop will iterrate, essentially its
342
while (tx++ < a->used && ty-- >= 0) { ... }
344
iy = MIN(A->used-tx, ty+1);
348
for (iz = 0; iz < iy; ++iz) {
349
MULADD(*tmpx++, *tmpy--);
353
COMBA_STORE(dst->dp[ix]);
358
dst->sign = A->sign ^ B->sign;
365
/* $Source: /cvs/libtom/tomsfastmath/src/mul/fp_mul_comba.c,v $ */
366
/* $Revision: 1.4 $ */
367
/* $Date: 2007/03/14 23:47:42 $ */