1
/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
3
Copyright 1991, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003
4
Free Software Foundation, Inc.
6
This file is free software; you can redistribute it and/or modify
7
it under the terms of the GNU Lesser General Public License as published by
8
the Free Software Foundation; either version 2.1 of the License, or (at your
9
option) any later version.
11
This file is distributed in the hope that it will be useful, but
12
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14
License for more details.
16
You should have received a copy of the GNU Lesser General Public License
17
along with this file; see the file COPYING.LIB. If not, write to
18
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
19
MA 02111-1307, USA. */
21
/* You have to define the following before including this file:
23
UWtype -- An unsigned type, default type for operations (typically a "word")
24
UHWtype -- An unsigned type, at least half the size of UWtype.
25
UDWtype -- An unsigned type, at least twice as large a UWtype
26
W_TYPE_SIZE -- size in bits of UWtype
28
SItype, USItype -- Signed and unsigned 32 bit types.
29
DItype, UDItype -- Signed and unsigned 64 bit types.
31
On a 32 bit machine UWtype should typically be USItype;
32
on a 64 bit machine, UWtype should typically be UDItype.
35
#define __BITS4 (W_TYPE_SIZE / 4)
36
#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
37
#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
38
#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
40
/* This is used to make sure no undesirable sharing between different libraries
41
that use this file takes place. */
43
#define __MPN(x) __##x
47
#if (__STDC__-0) || defined (__cplusplus)
54
/* Define auxiliary asm macros.
56
1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two
57
UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype
58
word product in HIGH_PROD and LOW_PROD.
60
2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
61
UDWtype product. This is just a variant of umul_ppmm.
63
3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
64
denominator) divides a UDWtype, composed by the UWtype integers
65
HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
66
in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less
67
than DENOMINATOR for correct operation. If, in addition, the most
68
significant bit of DENOMINATOR must be 1, then the pre-processor symbol
69
UDIV_NEEDS_NORMALIZATION is defined to 1.
71
4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
72
denominator). Like udiv_qrnnd but the numbers are signed. The quotient
75
5) count_leading_zeros(count, x) counts the number of zero-bits from the
76
msb to the first non-zero bit in the UWtype X. This is the number of
77
steps X needs to be shifted left to set the msb. Undefined for X == 0,
78
unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
80
6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
81
from the least significant end.
83
7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
84
high_addend_2, low_addend_2) adds two UWtype integers, composed by
85
HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
86
respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow
87
(i.e. carry out) is not stored anywhere, and is lost.
89
8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
90
high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
91
composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
92
LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE
93
and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere,
96
If any of these macros are left undefined for a particular CPU,
102
For add_ssaaaa the two high and two low addends can both commute, but
103
unfortunately gcc only supports one "%" commutative in each asm block.
104
This has always been so but is only documented in recent versions
105
(eg. pre-release 3.3). Having two or more "%"s can cause an internal
106
compiler error in certain rare circumstances.
108
Apparently it was only the last "%" that was ever actually respected, so
109
the code has been updated to leave just that. Clearly there's a free
110
choice whether high or low should get it, if there's a reason to favour
111
one over the other. Also obviously when the constraints on the two
112
operands are identical there's no benefit to the reloader in any "%" at
117
/* The CPUs come in alphabetical order below.
119
Please add support for more CPUs here, or improve the current support
120
for the CPUs below! */
122
/* FIXME: The macros using external routines like __MPN(count_leading_zeros)
123
don't need to be under !NO_ASM */
124
#if ! defined (NO_ASM)
126
#if defined (__alpha) && W_TYPE_SIZE == 64
127
/* Most alpha-based machines, except Cray systems. */
128
#if defined (__GNUC__)
129
#define umul_ppmm(ph, pl, m0, m1) \
131
UDItype __m0 = (m0), __m1 = (m1); \
132
__asm__ ("umulh %r1,%2,%0" \
134
: "%rJ" (m0), "rI" (m1)); \
135
(pl) = __m0 * __m1; \
138
#else /* ! __GNUC__ */
139
#include <machine/builtins.h>
140
#define umul_ppmm(ph, pl, m0, m1) \
142
UDItype __m0 = (m0), __m1 = (m1); \
143
(ph) = __UMULH (m0, m1); \
144
(pl) = __m0 * __m1; \
147
#ifndef LONGLONG_STANDALONE
148
#define udiv_qrnnd(q, r, n1, n0, d) \
150
__di = __MPN(invert_limb) (d); \
151
udiv_qrnnd_preinv (q, r, n1, n0, d, __di); \
153
#define UDIV_PREINV_ALWAYS 1
154
#define UDIV_NEEDS_NORMALIZATION 1
155
#define UDIV_TIME 220
156
#endif /* LONGLONG_STANDALONE */
157
/* clz_tab is required by mpn/alpha/cntlz.asm, and that file is built for
158
all alphas, even though ev67 and ev68 don't need it. */
159
#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
160
#if defined (__GNUC__) && (HAVE_HOST_CPU_alphaev67 || HAVE_HOST_CPU_alphaev68)
161
#define count_leading_zeros(COUNT,X) \
162
__asm__("ctlz %1,%0" : "=r"(COUNT) : "r"(X))
163
#define count_trailing_zeros(COUNT,X) \
164
__asm__("cttz %1,%0" : "=r"(COUNT) : "r"(X))
165
#else /* ! (ev67 || ev68) */
166
#ifndef LONGLONG_STANDALONE
167
#if HAVE_ATTRIBUTE_CONST
168
long __MPN(count_leading_zeros) _PROTO ((UDItype)) __attribute__ ((const));
170
long __MPN(count_leading_zeros) _PROTO ((UDItype));
172
#define count_leading_zeros(count, x) \
173
((count) = __MPN(count_leading_zeros) (x))
174
#endif /* LONGLONG_STANDALONE */
175
#endif /* ! (ev67 || ev68) */
178
#if defined (_CRAY) && W_TYPE_SIZE == 64
179
#include <intrinsics.h>
180
#define UDIV_PREINV_ALWAYS 1
181
#define UDIV_NEEDS_NORMALIZATION 1
182
#define UDIV_TIME 220
183
long __MPN(count_leading_zeros) _PROTO ((UDItype));
184
#define count_leading_zeros(count, x) \
185
((count) = _leadz ((UWtype) (x)))
186
#if defined (_CRAYIEEE) /* I.e., Cray T90/ieee, T3D, and T3E */
187
#define umul_ppmm(ph, pl, m0, m1) \
189
UDItype __m0 = (m0), __m1 = (m1); \
190
(ph) = _int_mult_upper (m0, m1); \
191
(pl) = __m0 * __m1; \
193
#ifndef LONGLONG_STANDALONE
194
#define udiv_qrnnd(q, r, n1, n0, d) \
196
__di = __MPN(invert_limb) (d); \
197
udiv_qrnnd_preinv (q, r, n1, n0, d, __di); \
199
#endif /* LONGLONG_STANDALONE */
200
#endif /* _CRAYIEEE */
203
#if defined (__hppa) && W_TYPE_SIZE == 64
204
/* These macros are for ABI=2.0w. In ABI=2.0n they can't be used, since GCC
205
(3.2) puts longlong into two adjacent 32-bit registers. Presumably this
206
is just a case of no direct support for 2.0n but treating it like 1.0. */
207
#if defined (__GNUC__) && ! defined (_LONG_LONG_LIMB)
208
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
209
__asm__ ("add %4,%5,%1\n\tadd,dc %2,%3,%0" \
210
: "=r" (sh), "=&r" (sl) \
211
: "rM" (ah), "rM" (bh), "%rM" (al), "rM" (bl))
212
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
213
__asm__ ("sub %4,%5,%1\n\tsub,db %2,%3,%0" \
214
: "=r" (sh), "=&r" (sl) \
215
: "rM" (ah), "rM" (bh), "rM" (al), "rM" (bl))
217
/* We put the result pointer parameter last here, since it makes passing
218
of the other parameters more efficient. */
219
#ifndef LONGLONG_STANDALONE
220
#define umul_ppmm(wh, wl, u, v) \
223
(wh) = __MPN(umul_ppmm) (u, v, &__p0); \
226
extern UWtype __MPN(umul_ppmm) _PROTO ((UWtype, UWtype, UWtype *));
227
#define udiv_qrnnd(q, r, n1, n0, d) \
229
(q) = __MPN(udiv_qrnnd) (n1, n0, d, &__r); \
232
extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype, UWtype, UWtype, UWtype *));
235
#endif /* LONGLONG_STANDALONE */
238
#if defined (__ia64) && W_TYPE_SIZE == 64
239
#if defined (__GNUC__)
240
#define umul_ppmm(ph, pl, m0, m1) \
242
UDItype __m0 = (m0), __m1 = (m1); \
243
__asm__ ("xma.hu %0 = %1, %2, f0" \
245
: "f" (m0), "f" (m1)); \
246
(pl) = __m0 * __m1; \
249
#define count_leading_zeros(count, x) \
251
UWtype _x = (x), _y, _a, _c; \
252
__asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x)); \
253
__asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y)); \
254
_c = (_a - 1) << 3; \
261
(count) = W_TYPE_SIZE - 1 - _c; \
264
#ifndef LONGLONG_STANDALONE
265
#define udiv_qrnnd(q, r, n1, n0, d) \
267
__di = __MPN(invert_limb) (d); \
268
udiv_qrnnd_preinv (q, r, n1, n0, d, __di); \
270
#define UDIV_PREINV_ALWAYS 1
271
#define UDIV_NEEDS_NORMALIZATION 1
273
#define UDIV_TIME 220
277
#if defined (__GNUC__)
279
/* We sometimes need to clobber "cc" with gcc2, but that would not be
280
understood by gcc1. Use cpp to avoid major code duplication. */
283
#define __AND_CLOBBER_CC
284
#else /* __GNUC__ >= 2 */
285
#define __CLOBBER_CC : "cc"
286
#define __AND_CLOBBER_CC , "cc"
287
#endif /* __GNUC__ < 2 */
289
#if (defined (__a29k__) || defined (_AM29K)) && W_TYPE_SIZE == 32
290
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
291
__asm__ ("add %1,%4,%5\n\taddc %0,%2,%3" \
292
: "=r" (sh), "=&r" (sl) \
293
: "r" (ah), "rI" (bh), "%r" (al), "rI" (bl))
294
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
295
__asm__ ("sub %1,%4,%5\n\tsubc %0,%2,%3" \
296
: "=r" (sh), "=&r" (sl) \
297
: "r" (ah), "rI" (bh), "r" (al), "rI" (bl))
298
#define umul_ppmm(xh, xl, m0, m1) \
300
USItype __m0 = (m0), __m1 = (m1); \
301
__asm__ ("multiplu %0,%1,%2" \
303
: "r" (__m0), "r" (__m1)); \
304
__asm__ ("multmu %0,%1,%2" \
306
: "r" (__m0), "r" (__m1)); \
308
#define udiv_qrnnd(q, r, n1, n0, d) \
309
__asm__ ("dividu %0,%3,%4" \
310
: "=r" (q), "=q" (r) \
311
: "1" (n1), "r" (n0), "r" (d))
312
#define count_leading_zeros(count, x) \
313
__asm__ ("clz %0,%1" \
316
#define COUNT_LEADING_ZEROS_0 32
317
#endif /* __a29k__ */
319
#if defined (__arc__)
320
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
321
__asm__ ("add.f\t%1, %4, %5\n\tadc\t%0, %2, %3" \
322
: "=r" ((USItype) (sh)), \
323
"=&r" ((USItype) (sl)) \
324
: "r" ((USItype) (ah)), \
325
"rIJ" ((USItype) (bh)), \
326
"%r" ((USItype) (al)), \
327
"rIJ" ((USItype) (bl)))
328
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
329
__asm__ ("sub.f\t%1, %4, %5\n\tsbc\t%0, %2, %3" \
330
: "=r" ((USItype) (sh)), \
331
"=&r" ((USItype) (sl)) \
332
: "r" ((USItype) (ah)), \
333
"rIJ" ((USItype) (bh)), \
334
"r" ((USItype) (al)), \
335
"rIJ" ((USItype) (bl)))
338
#if defined (__arm__) && W_TYPE_SIZE == 32
339
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
340
__asm__ ("adds\t%1, %4, %5\n\tadc\t%0, %2, %3" \
341
: "=r" (sh), "=&r" (sl) \
342
: "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC)
343
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
345
if (__builtin_constant_p (al)) \
347
if (__builtin_constant_p (ah)) \
348
__asm__ ("rsbs\t%1, %5, %4\n\trsc\t%0, %3, %2" \
349
: "=r" (sh), "=&r" (sl) \
350
: "rI" (ah), "r" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \
352
__asm__ ("rsbs\t%1, %5, %4\n\tsbc\t%0, %2, %3" \
353
: "=r" (sh), "=&r" (sl) \
354
: "r" (ah), "rI" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \
356
else if (__builtin_constant_p (ah)) \
358
if (__builtin_constant_p (bl)) \
359
__asm__ ("subs\t%1, %4, %5\n\trsc\t%0, %3, %2" \
360
: "=r" (sh), "=&r" (sl) \
361
: "rI" (ah), "r" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
363
__asm__ ("rsbs\t%1, %5, %4\n\trsc\t%0, %3, %2" \
364
: "=r" (sh), "=&r" (sl) \
365
: "rI" (ah), "r" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \
367
else if (__builtin_constant_p (bl)) \
369
if (__builtin_constant_p (bh)) \
370
__asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \
371
: "=r" (sh), "=&r" (sl) \
372
: "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
374
__asm__ ("subs\t%1, %4, %5\n\trsc\t%0, %3, %2" \
375
: "=r" (sh), "=&r" (sl) \
376
: "rI" (ah), "r" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
378
else /* only bh might be a constant */ \
379
__asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \
380
: "=r" (sh), "=&r" (sl) \
381
: "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC);\
383
#if 1 || defined (__arm_m__) /* `M' series has widening multiply support */
384
#define umul_ppmm(xh, xl, a, b) \
385
__asm__ ("umull %0,%1,%2,%3" : "=&r" (xl), "=&r" (xh) : "r" (a), "r" (b))
387
#define smul_ppmm(xh, xl, a, b) \
388
__asm__ ("smull %0,%1,%2,%3" : "=&r" (xl), "=&r" (xh) : "r" (a), "r" (b))
389
#ifndef LONGLONG_STANDALONE
390
#define udiv_qrnnd(q, r, n1, n0, d) \
392
__di = __MPN(invert_limb) (d); \
393
udiv_qrnnd_preinv (q, r, n1, n0, d, __di); \
395
#define UDIV_PREINV_ALWAYS 1
396
#define UDIV_NEEDS_NORMALIZATION 1
398
#endif /* LONGLONG_STANDALONE */
400
#define umul_ppmm(xh, xl, a, b) \
401
__asm__ ("%@ Inlined umul_ppmm\n" \
402
" mov %|r0, %2, lsr #16\n" \
403
" mov %|r2, %3, lsr #16\n" \
404
" bic %|r1, %2, %|r0, lsl #16\n" \
405
" bic %|r2, %3, %|r2, lsl #16\n" \
406
" mul %1, %|r1, %|r2\n" \
407
" mul %|r2, %|r0, %|r2\n" \
408
" mul %|r1, %0, %|r1\n" \
409
" mul %0, %|r0, %0\n" \
410
" adds %|r1, %|r2, %|r1\n" \
411
" addcs %0, %0, #65536\n" \
412
" adds %1, %1, %|r1, lsl #16\n" \
413
" adc %0, %0, %|r1, lsr #16" \
414
: "=&r" (xh), "=r" (xl) \
418
#ifndef LONGLONG_STANDALONE
419
#define udiv_qrnnd(q, r, n1, n0, d) \
421
(q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d)); \
424
extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype));
425
#define UDIV_TIME 200
426
#endif /* LONGLONG_STANDALONE */
430
#if defined (__clipper__) && W_TYPE_SIZE == 32
431
#define umul_ppmm(w1, w0, u, v) \
432
({union {UDItype __ll; \
433
struct {USItype __l, __h;} __i; \
435
__asm__ ("mulwux %2,%0" \
437
: "%0" ((USItype)(u)), "r" ((USItype)(v))); \
438
(w1) = __x.__i.__h; (w0) = __x.__i.__l;})
439
#define smul_ppmm(w1, w0, u, v) \
440
({union {DItype __ll; \
441
struct {SItype __l, __h;} __i; \
443
__asm__ ("mulwx %2,%0" \
445
: "%0" ((SItype)(u)), "r" ((SItype)(v))); \
446
(w1) = __x.__i.__h; (w0) = __x.__i.__l;})
447
#define __umulsidi3(u, v) \
449
__asm__ ("mulwux %2,%0" \
450
: "=r" (__w) : "%0" ((USItype)(u)), "r" ((USItype)(v))); \
452
#endif /* __clipper__ */
454
/* Fujitsu vector computers. */
455
#if defined (__uxp__) && W_TYPE_SIZE == 32
456
#define umul_ppmm(ph, pl, u, v) \
458
union {UDItype __ll; \
459
struct {USItype __h, __l;} __i; \
461
__asm__ ("mult.lu %1,%2,%0" : "=r" (__x.__ll) : "%r" (u), "rK" (v));\
462
(ph) = __x.__i.__h; \
463
(pl) = __x.__i.__l; \
465
#define smul_ppmm(ph, pl, u, v) \
467
union {UDItype __ll; \
468
struct {USItype __h, __l;} __i; \
470
__asm__ ("mult.l %1,%2,%0" : "=r" (__x.__ll) : "%r" (u), "rK" (v)); \
471
(ph) = __x.__i.__h; \
472
(pl) = __x.__i.__l; \
476
#if defined (__gmicro__) && W_TYPE_SIZE == 32
477
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
478
__asm__ ("add.w %5,%1\n\taddx %3,%0" \
479
: "=g" ((USItype)(sh)), "=&g" ((USItype)(sl)) \
480
: "0" ((USItype)(ah)), "g" ((USItype)(bh)), \
481
"%1" ((USItype)(al)), "g" ((USItype)(bl)))
482
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
483
__asm__ ("sub.w %5,%1\n\tsubx %3,%0" \
484
: "=g" ((USItype)(sh)), "=&g" ((USItype)(sl)) \
485
: "0" ((USItype)(ah)), "g" ((USItype)(bh)), \
486
"1" ((USItype)(al)), "g" ((USItype)(bl)))
487
#define umul_ppmm(ph, pl, m0, m1) \
488
__asm__ ("mulx %3,%0,%1" \
489
: "=g" ((USItype)(ph)), "=r" ((USItype)(pl)) \
490
: "%0" ((USItype)(m0)), "g" ((USItype)(m1)))
491
#define udiv_qrnnd(q, r, nh, nl, d) \
492
__asm__ ("divx %4,%0,%1" \
493
: "=g" ((USItype)(q)), "=r" ((USItype)(r)) \
494
: "1" ((USItype)(nh)), "0" ((USItype)(nl)), "g" ((USItype)(d)))
495
#define count_leading_zeros(count, x) \
496
__asm__ ("bsch/1 %1,%0" \
497
: "=g" (count) : "g" ((USItype)(x)), "0" ((USItype)0))
500
#if defined (__hppa) && W_TYPE_SIZE == 32
501
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
502
__asm__ ("add %4,%5,%1\n\taddc %2,%3,%0" \
503
: "=r" (sh), "=&r" (sl) \
504
: "rM" (ah), "rM" (bh), "%rM" (al), "rM" (bl))
505
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
506
__asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0" \
507
: "=r" (sh), "=&r" (sl) \
508
: "rM" (ah), "rM" (bh), "rM" (al), "rM" (bl))
509
#if defined (_PA_RISC1_1)
510
#define umul_ppmm(wh, wl, u, v) \
512
union {UDItype __ll; \
513
struct {USItype __h, __l;} __i; \
515
__asm__ ("xmpyu %1,%2,%0" : "=*f" (__x.__ll) : "*f" (u), "*f" (v)); \
516
(wh) = __x.__i.__h; \
517
(wl) = __x.__i.__l; \
525
#ifndef LONGLONG_STANDALONE
526
#define udiv_qrnnd(q, r, n1, n0, d) \
528
(q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d)); \
531
extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype));
532
#endif /* LONGLONG_STANDALONE */
533
#define count_leading_zeros(count, x) \
538
" extru,= %1,15,16,%%r0 ; Bits 31..16 zero?\n" \
539
" extru,tr %1,15,16,%1 ; No. Shift down, skip add.\n" \
540
" ldo 16(%0),%0 ; Yes. Perform add.\n" \
541
" extru,= %1,23,8,%%r0 ; Bits 15..8 zero?\n" \
542
" extru,tr %1,23,8,%1 ; No. Shift down, skip add.\n" \
543
" ldo 8(%0),%0 ; Yes. Perform add.\n" \
544
" extru,= %1,27,4,%%r0 ; Bits 7..4 zero?\n" \
545
" extru,tr %1,27,4,%1 ; No. Shift down, skip add.\n" \
546
" ldo 4(%0),%0 ; Yes. Perform add.\n" \
547
" extru,= %1,29,2,%%r0 ; Bits 3..2 zero?\n" \
548
" extru,tr %1,29,2,%1 ; No. Shift down, skip add.\n" \
549
" ldo 2(%0),%0 ; Yes. Perform add.\n" \
550
" extru %1,30,1,%1 ; Extract bit 1.\n" \
551
" sub %0,%1,%0 ; Subtract it.\n" \
552
: "=r" (count), "=r" (__tmp) : "1" (x)); \
556
#if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32
557
#define smul_ppmm(xh, xl, m0, m1) \
559
union {DItype __ll; \
560
struct {USItype __h, __l;} __i; \
562
__asm__ ("lr %N0,%1\n\tmr %0,%2" \
564
: "r" (m0), "r" (m1)); \
565
(xh) = __x.__i.__h; (xl) = __x.__i.__l; \
567
#define sdiv_qrnnd(q, r, n1, n0, d) \
569
union {DItype __ll; \
570
struct {USItype __h, __l;} __i; \
572
__x.__i.__h = n1; __x.__i.__l = n0; \
573
__asm__ ("dr %0,%2" \
575
: "0" (__x.__ll), "r" (d)); \
576
(q) = __x.__i.__l; (r) = __x.__i.__h; \
580
#if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
581
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
582
__asm__ ("addl %5,%1\n\tadcl %3,%0" \
583
: "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \
584
: "0" ((USItype)(ah)), "g" ((USItype)(bh)), \
585
"%1" ((USItype)(al)), "g" ((USItype)(bl)))
586
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
587
__asm__ ("subl %5,%1\n\tsbbl %3,%0" \
588
: "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \
589
: "0" ((USItype)(ah)), "g" ((USItype)(bh)), \
590
"1" ((USItype)(al)), "g" ((USItype)(bl)))
591
#define umul_ppmm(w1, w0, u, v) \
593
: "=a" (w0), "=d" (w1) \
594
: "%0" ((USItype)(u)), "rm" ((USItype)(v)))
595
#define udiv_qrnnd(q, r, n1, n0, dx) /* d renamed to dx avoiding "=d" */\
596
__asm__ ("divl %4" /* stringification in K&R C */ \
597
: "=a" (q), "=d" (r) \
598
: "0" ((USItype)(n0)), "1" ((USItype)(n1)), "rm" ((USItype)(dx)))
600
/* P5 bsrl takes between 10 and 72 cycles depending where the most
601
significant 1 bit is, hence the use of the alternatives below. bsfl is
602
slow too, between 18 and 42 depending where the least significant 1 bit
603
is. The faster count_leading_zeros are pressed into service via the
604
generic count_trailing_zeros at the end of the file. */
606
#if HAVE_HOST_CPU_i586 || HAVE_HOST_CPU_pentium
608
/* The following should be a fixed 14 cycles or so. Some scheduling
609
opportunities should be available between the float load/store too. This
610
is used (with "n&-n" to get trailing zeros) in gcc 3 for __builtin_ffs
611
and is apparently suggested by the Intel optimizing manual (don't know
612
exactly where). gcc 2.95 or up will be best for this, so the "double" is
613
correctly aligned on the stack. */
615
#define count_leading_zeros(c,n) \
622
__u.d = (UWtype) (n); \
623
(c) = 0x3FF + 31 - (__u.a[1] >> 20); \
625
#define COUNT_LEADING_ZEROS_0 (0x3FF + 31)
627
#else /* ! pentium */
628
#if HAVE_HOST_CPU_pentiummmx
630
/* The following should be a fixed 14 or 15 cycles, but possibly plus an L1
631
cache miss reading from __clz_tab. It's favoured over the float above so
632
as to avoid mixing MMX and x87, since the penalty for switching between
633
the two is about 100 cycles.
635
The asm block sets __shift to -3 if the high 24 bits are clear, -2 for
636
16, -1 for 8, or 0 otherwise. This could be written equivalently as
637
follows, but as of gcc 2.95.2 it results in conditional jumps.
639
__shift = -(__n < 0x1000000);
640
__shift -= (__n < 0x10000);
641
__shift -= (__n < 0x100);
643
The middle two sbbl and cmpl's pair, and with luck something gcc
644
generates might pair with the first cmpl and the last sbbl. The "32+1"
645
constant could be folded into __clz_tab[], but it doesn't seem worth
646
making a different table just for that. */
648
#define count_leading_zeros(c,n) \
652
__asm__ ("cmpl $0x1000000, %1\n" \
654
"cmpl $0x10000, %1\n" \
656
"cmpl $0x100, %1\n" \
658
: "=&r" (__shift) : "r" (__n)); \
659
__shift = __shift*8 + 24 + 1; \
660
(c) = 32 + 1 - __shift - __clz_tab[__n >> __shift]; \
663
#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
664
#define COUNT_LEADING_ZEROS_0 31 /* n==0 indistinguishable from n==1 */
666
#else /* !pentiummmx */
667
/* On P6, gcc prior to 3.0 generates a partial register stall for
668
__cbtmp^31, due to using "xorb $31" instead of "xorl $31", the former
669
being 1 code byte smaller. "31-__cbtmp" is a workaround, probably at the
670
cost of one extra instruction. Do this for "i386" too, since that means
673
&& (HAVE_HOST_CPU_i386 \
674
|| HAVE_HOST_CPU_i686 \
675
|| HAVE_HOST_CPU_pentiumpro \
676
|| HAVE_HOST_CPU_pentium2 \
677
|| HAVE_HOST_CPU_pentium3)
678
#define count_leading_zeros(count, x) \
682
__asm__ ("bsrl %1,%0" : "=r" (__cbtmp) : "rm" ((USItype)(x))); \
683
(count) = 31 - __cbtmp; \
686
#define count_leading_zeros(count, x) \
690
__asm__ ("bsrl %1,%0" : "=r" (__cbtmp) : "rm" ((USItype)(x))); \
691
(count) = __cbtmp ^ 31; \
695
#define count_trailing_zeros(count, x) \
698
__asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x))); \
700
#endif /* ! pentiummmx */
701
#endif /* ! pentium */
711
#if defined (__x86_64__) && W_TYPE_SIZE == 64
712
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
713
__asm__ ("addq %5,%1\n\tadcq %3,%0" \
714
: "=r" ((UDItype)(sh)), "=&r" ((UDItype)(sl)) \
715
: "0" ((UDItype)(ah)), "g" ((UDItype)(bh)), \
716
"%1" ((UDItype)(al)), "g" ((UDItype)(bl)))
717
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
718
__asm__ ("subq %5,%1\n\tsbbq %3,%0" \
719
: "=r" ((UDItype)(sh)), "=&r" ((UDItype)(sl)) \
720
: "0" ((UDItype)(ah)), "g" ((UDItype)(bh)), \
721
"1" ((UDItype)(al)), "g" ((UDItype)(bl)))
722
#define umul_ppmm(w1, w0, u, v) \
724
: "=a" (w0), "=d" (w1) \
725
: "%0" ((UDItype)(u)), "rm" ((UDItype)(v)))
726
#define udiv_qrnnd(q, r, n1, n0, dx) /* d renamed to dx avoiding "=d" */\
727
__asm__ ("divq %4" /* stringification in K&R C */ \
728
: "=a" (q), "=d" (r) \
729
: "0" ((UDItype)(n0)), "1" ((UDItype)(n1)), "rm" ((UDItype)(dx)))
730
#define count_leading_zeros(count, x) \
734
__asm__ ("bsrq %1,%0" : "=r" (__cbtmp) : "rm" ((UDItype)(x))); \
735
(count) = __cbtmp ^ 63; \
737
/* bsfq destination must be a 64-bit register, "%q0" forces this in case
738
count is only an int. */
739
#define count_trailing_zeros(count, x) \
742
__asm__ ("bsfq %1,%q0" : "=r" (count) : "rm" ((UDItype)(x))); \
746
#if defined (__i860__) && W_TYPE_SIZE == 32
747
#define rshift_rhlc(r,h,l,c) \
748
__asm__ ("shr %3,r0,r0\;shrd %1,%2,%0" \
749
"=r" (r) : "r" (h), "r" (l), "rn" (c))
752
#if defined (__i960__) && W_TYPE_SIZE == 32
753
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
754
__asm__ ("cmpo 1,0\;addc %5,%4,%1\;addc %3,%2,%0" \
755
: "=r" (sh), "=&r" (sl) \
756
: "dI" (ah), "dI" (bh), "%dI" (al), "dI" (bl))
757
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
758
__asm__ ("cmpo 0,0\;subc %5,%4,%1\;subc %3,%2,%0" \
759
: "=r" (sh), "=&r" (sl) \
760
: "dI" (ah), "dI" (bh), "dI" (al), "dI" (bl))
761
#define umul_ppmm(w1, w0, u, v) \
762
({union {UDItype __ll; \
763
struct {USItype __l, __h;} __i; \
765
__asm__ ("emul %2,%1,%0" \
766
: "=d" (__x.__ll) : "%dI" (u), "dI" (v)); \
767
(w1) = __x.__i.__h; (w0) = __x.__i.__l;})
768
#define __umulsidi3(u, v) \
770
__asm__ ("emul %2,%1,%0" : "=d" (__w) : "%dI" (u), "dI" (v)); \
772
#define udiv_qrnnd(q, r, nh, nl, d) \
774
union {UDItype __ll; \
775
struct {USItype __l, __h;} __i; \
777
__nn.__i.__h = (nh); __nn.__i.__l = (nl); \
778
__asm__ ("ediv %d,%n,%0" \
779
: "=d" (__rq.__ll) : "dI" (__nn.__ll), "dI" (d)); \
780
(r) = __rq.__i.__l; (q) = __rq.__i.__h; \
782
#define count_leading_zeros(count, x) \
785
__asm__ ("scanbit %1,%0" : "=r" (__cbtmp) : "r" (x)); \
786
(count) = __cbtmp ^ 31; \
788
#define COUNT_LEADING_ZEROS_0 (-32) /* sic */
789
#if defined (__i960mx) /* what is the proper symbol to test??? */
790
#define rshift_rhlc(r,h,l,c) \
792
union {UDItype __ll; \
793
struct {USItype __l, __h;} __i; \
795
__nn.__i.__h = (h); __nn.__i.__l = (l); \
796
__asm__ ("shre %2,%1,%0" : "=d" (r) : "dI" (__nn.__ll), "dI" (c)); \
801
#if (defined (__mc68000__) || defined (__mc68020__) || defined(mc68020) \
802
|| defined (__m68k__) || defined (__mc5200__) || defined (__mc5206e__) \
803
|| defined (__mc5307__)) && W_TYPE_SIZE == 32
804
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
805
__asm__ ("add%.l %5,%1\n\taddx%.l %3,%0" \
806
: "=d" ((USItype)(sh)), "=&d" ((USItype)(sl)) \
807
: "0" ((USItype)(ah)), "d" ((USItype)(bh)), \
808
"%1" ((USItype)(al)), "g" ((USItype)(bl)))
809
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
810
__asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0" \
811
: "=d" ((USItype)(sh)), "=&d" ((USItype)(sl)) \
812
: "0" ((USItype)(ah)), "d" ((USItype)(bh)), \
813
"1" ((USItype)(al)), "g" ((USItype)(bl)))
814
/* The '020, '030, '040 and CPU32 have 32x32->64 and 64/32->32q-32r. */
815
#if defined (__mc68020__) || defined(mc68020) \
816
|| defined (__mc68030__) || defined (mc68030) \
817
|| defined (__mc68040__) || defined (mc68040) \
818
|| defined (__mcpu32__) || defined (mcpu32) \
819
|| defined (__NeXT__)
820
#define umul_ppmm(w1, w0, u, v) \
821
__asm__ ("mulu%.l %3,%1:%0" \
822
: "=d" ((USItype)(w0)), "=d" ((USItype)(w1)) \
823
: "%0" ((USItype)(u)), "dmi" ((USItype)(v)))
825
#define udiv_qrnnd(q, r, n1, n0, d) \
826
__asm__ ("divu%.l %4,%1:%0" \
827
: "=d" ((USItype)(q)), "=d" ((USItype)(r)) \
828
: "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d)))
830
#define sdiv_qrnnd(q, r, n1, n0, d) \
831
__asm__ ("divs%.l %4,%1:%0" \
832
: "=d" ((USItype)(q)), "=d" ((USItype)(r)) \
833
: "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d)))
834
#else /* for other 68k family members use 16x16->32 multiplication */
835
#define umul_ppmm(xh, xl, a, b) \
836
do { USItype __umul_tmp1, __umul_tmp2; \
837
__asm__ ("| Inlined umul_ppmm\n" \
850
" add%.l %#0x10000,%0\n" \
851
"1: move%.l %2,%3\n" \
858
" | End inlined umul_ppmm" \
859
: "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \
860
"=d" (__umul_tmp1), "=&d" (__umul_tmp2) \
861
: "%2" ((USItype)(a)), "d" ((USItype)(b))); \
863
#define UMUL_TIME 100
864
#define UDIV_TIME 400
865
#endif /* not mc68020 */
866
/* The '020, '030, '040 and '060 have bitfield insns.
867
GCC 3.4 defines __mc68020__ when in CPU32 mode, check for __mcpu32__ to
868
exclude bfffo on that chip (bitfield insns not available). */
869
#if (defined (__mc68020__) || defined (mc68020) \
870
|| defined (__mc68030__) || defined (mc68030) \
871
|| defined (__mc68040__) || defined (mc68040) \
872
|| defined (__mc68060__) || defined (mc68060) \
873
|| defined (__NeXT__)) \
874
&& ! defined (__mcpu32__)
875
#define count_leading_zeros(count, x) \
876
__asm__ ("bfffo %1{%b2:%b2},%0" \
877
: "=d" ((USItype) (count)) \
878
: "od" ((USItype) (x)), "n" (0))
879
#define COUNT_LEADING_ZEROS_0 32
883
#if defined (__m88000__) && W_TYPE_SIZE == 32
884
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
885
__asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3" \
886
: "=r" (sh), "=&r" (sl) \
887
: "rJ" (ah), "rJ" (bh), "%rJ" (al), "rJ" (bl))
888
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
889
__asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3" \
890
: "=r" (sh), "=&r" (sl) \
891
: "rJ" (ah), "rJ" (bh), "rJ" (al), "rJ" (bl))
892
#define count_leading_zeros(count, x) \
895
__asm__ ("ff1 %0,%1" : "=r" (__cbtmp) : "r" (x)); \
896
(count) = __cbtmp ^ 31; \
898
#define COUNT_LEADING_ZEROS_0 63 /* sic */
899
#if defined (__m88110__)
900
#define umul_ppmm(wh, wl, u, v) \
902
union {UDItype __ll; \
903
struct {USItype __h, __l;} __i; \
905
__asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v)); \
906
(wh) = __x.__i.__h; \
907
(wl) = __x.__i.__l; \
909
#define udiv_qrnnd(q, r, n1, n0, d) \
910
({union {UDItype __ll; \
911
struct {USItype __h, __l;} __i; \
913
__x.__i.__h = (n1); __x.__i.__l = (n0); \
914
__asm__ ("divu.d %0,%1,%2" \
915
: "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \
916
(r) = (n0) - __q.__l * (d); (q) = __q.__l; })
921
#define UDIV_TIME 150
922
#endif /* __m88110__ */
923
#endif /* __m88000__ */
925
#if defined (__mips) && W_TYPE_SIZE == 32
926
#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
927
#define umul_ppmm(w1, w0, u, v) \
928
__asm__ ("multu %2,%3" : "=l" (w0), "=h" (w1) : "d" (u), "d" (v))
930
#define umul_ppmm(w1, w0, u, v) \
931
__asm__ ("multu %2,%3\n\tmflo %0\n\tmfhi %1" \
932
: "=d" (w0), "=d" (w1) : "d" (u), "d" (v))
935
#define UDIV_TIME 100
938
#if (defined (__mips) && __mips >= 3) && W_TYPE_SIZE == 64
939
#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
940
#define umul_ppmm(w1, w0, u, v) \
941
__asm__ ("dmultu %2,%3" : "=l" (w0), "=h" (w1) : "d" (u), "d" (v))
943
#define umul_ppmm(w1, w0, u, v) \
944
__asm__ ("dmultu %2,%3\n\tmflo %0\n\tmfhi %1" \
945
: "=d" (w0), "=d" (w1) : "d" (u), "d" (v))
948
#define UDIV_TIME 140
951
#if defined (__ns32000__) && W_TYPE_SIZE == 32
952
#define umul_ppmm(w1, w0, u, v) \
953
({union {UDItype __ll; \
954
struct {USItype __l, __h;} __i; \
956
__asm__ ("meid %2,%0" \
958
: "%0" ((USItype)(u)), "g" ((USItype)(v))); \
959
(w1) = __x.__i.__h; (w0) = __x.__i.__l;})
960
#define __umulsidi3(u, v) \
962
__asm__ ("meid %2,%0" \
964
: "%0" ((USItype)(u)), "g" ((USItype)(v))); \
966
#define udiv_qrnnd(q, r, n1, n0, d) \
967
({union {UDItype __ll; \
968
struct {USItype __l, __h;} __i; \
970
__x.__i.__h = (n1); __x.__i.__l = (n0); \
971
__asm__ ("deid %2,%0" \
973
: "0" (__x.__ll), "g" ((USItype)(d))); \
974
(r) = __x.__i.__l; (q) = __x.__i.__h; })
975
#define count_trailing_zeros(count,x) \
977
__asm__ ("ffsd %2,%0" \
978
: "=r" ((USItype) (count)) \
979
: "0" ((USItype) 0), "r" ((USItype) (x))); \
981
#endif /* __ns32000__ */
983
/* FIXME: We should test _IBMR2 here when we add assembly support for the
984
system vendor compilers. */
985
#if (defined (_ARCH_PPC) /* AIX */ \
986
|| defined (_ARCH_PWR) /* AIX */ \
987
|| defined (__powerpc__) /* gcc */ \
988
|| defined (__POWERPC__) /* BEOS */ \
989
|| defined (__ppc__) /* Darwin */ \
990
|| (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */ \
991
|| (defined (PPC) && defined (CPU_FAMILY) /* VxWorks */ \
992
&& CPU_FAMILY == PPC) \
993
) && W_TYPE_SIZE == 32
994
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
996
if (__builtin_constant_p (bh) && (bh) == 0) \
997
__asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \
998
: "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
999
else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
1000
__asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \
1001
: "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
1003
__asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \
1004
: "=r" (sh), "=&r" (sl) \
1005
: "r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
1007
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1009
if (__builtin_constant_p (ah) && (ah) == 0) \
1010
__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
1011
: "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
1012
else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0) \
1013
__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
1014
: "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
1015
else if (__builtin_constant_p (bh) && (bh) == 0) \
1016
__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \
1017
: "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
1018
else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \
1019
__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \
1020
: "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
1022
__asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \
1023
: "=r" (sh), "=&r" (sl) \
1024
: "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
1026
#define count_leading_zeros(count, x) \
1027
__asm__ ("{cntlz|cntlzw} %0,%1" : "=r" (count) : "r" (x))
1028
#define COUNT_LEADING_ZEROS_0 32
1029
#if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \
1030
|| defined (__ppc__) \
1031
|| (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */ \
1032
|| (defined (PPC) && defined (CPU_FAMILY) /* VxWorks */ \
1033
&& CPU_FAMILY == PPC)
1034
#define umul_ppmm(ph, pl, m0, m1) \
1036
USItype __m0 = (m0), __m1 = (m1); \
1037
__asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
1038
(pl) = __m0 * __m1; \
1040
#define UMUL_TIME 15
1041
#define smul_ppmm(ph, pl, m0, m1) \
1043
SItype __m0 = (m0), __m1 = (m1); \
1044
__asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
1045
(pl) = __m0 * __m1; \
1047
#define SMUL_TIME 14
1048
#define UDIV_TIME 120
1051
#define smul_ppmm(xh, xl, m0, m1) \
1052
__asm__ ("mul %0,%2,%3" : "=r" (xh), "=q" (xl) : "r" (m0), "r" (m1))
1054
#define sdiv_qrnnd(q, r, nh, nl, d) \
1055
__asm__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d))
1056
#define UDIV_TIME 100
1058
#endif /* 32-bit POWER architecture variants. */
1060
/* We should test _IBMR2 here when we add assembly support for the system
1061
vendor compilers. */
1062
#if (defined (_ARCH_PPC) || defined (__powerpc__)) && W_TYPE_SIZE == 64
1063
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1065
if (__builtin_constant_p (bh) && (bh) == 0) \
1066
__asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \
1067
: "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
1068
else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
1069
__asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \
1070
: "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
1072
__asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \
1073
: "=r" (sh), "=&r" (sl) \
1074
: "r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \
1076
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1078
if (__builtin_constant_p (ah) && (ah) == 0) \
1079
__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
1080
: "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
1081
else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \
1082
__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
1083
: "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
1084
else if (__builtin_constant_p (bh) && (bh) == 0) \
1085
__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \
1086
: "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
1087
else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \
1088
__asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \
1089
: "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
1091
__asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \
1092
: "=r" (sh), "=&r" (sl) \
1093
: "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \
1095
#define count_leading_zeros(count, x) \
1096
__asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
1097
#define COUNT_LEADING_ZEROS_0 64
1098
#define umul_ppmm(ph, pl, m0, m1) \
1100
UDItype __m0 = (m0), __m1 = (m1); \
1101
__asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
1102
(pl) = __m0 * __m1; \
1104
#define UMUL_TIME 15
1105
#define smul_ppmm(ph, pl, m0, m1) \
1107
DItype __m0 = (m0), __m1 = (m1); \
1108
__asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \
1109
(pl) = __m0 * __m1; \
1111
#define SMUL_TIME 14 /* ??? */
1112
#define UDIV_TIME 120 /* ??? */
1113
#endif /* 64-bit PowerPC. */
1115
#if defined (__pyr__) && W_TYPE_SIZE == 32
1116
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1117
__asm__ ("addw %5,%1\n\taddwc %3,%0" \
1118
: "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \
1119
: "0" ((USItype)(ah)), "g" ((USItype)(bh)), \
1120
"%1" ((USItype)(al)), "g" ((USItype)(bl)))
1121
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1122
__asm__ ("subw %5,%1\n\tsubwb %3,%0" \
1123
: "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \
1124
: "0" ((USItype)(ah)), "g" ((USItype)(bh)), \
1125
"1" ((USItype)(al)), "g" ((USItype)(bl)))
1126
/* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP. */
1127
#define umul_ppmm(w1, w0, u, v) \
1128
({union {UDItype __ll; \
1129
struct {USItype __h, __l;} __i; \
1131
__asm__ ("movw %1,%R0\n\tuemul %2,%0" \
1132
: "=&r" (__x.__ll) \
1133
: "g" ((USItype) (u)), "g" ((USItype)(v))); \
1134
(w1) = __x.__i.__h; (w0) = __x.__i.__l;})
1135
#endif /* __pyr__ */
1137
#if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
1138
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1139
__asm__ ("a %1,%5\n\tae %0,%3" \
1140
: "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \
1141
: "0" ((USItype)(ah)), "r" ((USItype)(bh)), \
1142
"%1" ((USItype)(al)), "r" ((USItype)(bl)))
1143
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1144
__asm__ ("s %1,%5\n\tse %0,%3" \
1145
: "=r" ((USItype)(sh)), "=&r" ((USItype)(sl)) \
1146
: "0" ((USItype)(ah)), "r" ((USItype)(bh)), \
1147
"1" ((USItype)(al)), "r" ((USItype)(bl)))
1148
#define smul_ppmm(ph, pl, m0, m1) \
1170
: "=r" ((USItype)(ph)), "=r" ((USItype)(pl)) \
1171
: "%r" ((USItype)(m0)), "r" ((USItype)(m1)) \
1173
#define UMUL_TIME 20
1174
#define UDIV_TIME 200
1175
#define count_leading_zeros(count, x) \
1177
if ((x) >= 0x10000) \
1178
__asm__ ("clz %0,%1" \
1179
: "=r" ((USItype)(count)) : "r" ((USItype)(x) >> 16)); \
1182
__asm__ ("clz %0,%1" \
1183
: "=r" ((USItype)(count)) : "r" ((USItype)(x))); \
1187
#endif /* RT/ROMP */
1189
#if defined (__sh2__) && W_TYPE_SIZE == 32
1190
#define umul_ppmm(w1, w0, u, v) \
1191
__asm__ ("dmulu.l %2,%3\n\tsts macl,%1\n\tsts mach,%0" \
1192
: "=r" (w1), "=r" (w0) : "r" (u), "r" (v) : "macl", "mach")
1196
#if defined (__sparc__) && W_TYPE_SIZE == 32
1197
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1198
__asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0" \
1199
: "=r" (sh), "=&r" (sl) \
1200
: "rJ" (ah), "rI" (bh),"%rJ" (al), "rI" (bl) \
1202
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1203
__asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0" \
1204
: "=r" (sh), "=&r" (sl) \
1205
: "rJ" (ah), "rI" (bh), "rJ" (al), "rI" (bl) \
1207
/* FIXME: When gcc -mcpu=v9 is used on solaris, gcc/config/sol2-sld-64.h
1208
doesn't define anything to indicate that to us, it only sets __sparcv8. */
1209
#if defined (__sparc_v9__) || defined (__sparcv9)
1210
/* Perhaps we should use floating-point operations here? */
1212
/* Triggers a bug making mpz/tests/t-gcd.c fail.
1213
Perhaps we simply need explicitly zero-extend the inputs? */
1214
#define umul_ppmm(w1, w0, u, v) \
1215
__asm__ ("mulx %2,%3,%%g1; srl %%g1,0,%1; srlx %%g1,32,%0" : \
1216
"=r" (w1), "=r" (w0) : "r" (u), "r" (v) : "g1")
1218
/* Use v8 umul until above bug is fixed. */
1219
#define umul_ppmm(w1, w0, u, v) \
1220
__asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v))
1222
/* Use a plain v8 divide for v9. */
1223
#define udiv_qrnnd(q, r, n1, n0, d) \
1226
__asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
1227
: "=r" (__q) : "r" (n1), "r" (n0), "r" (d)); \
1228
(r) = (n0) - __q * (d); \
1232
#if defined (__sparc_v8__) /* gcc normal */ \
1233
|| defined (__sparcv8) /* gcc solaris */
1234
/* Don't match immediate range because, 1) it is not often useful,
1235
2) the 'I' flag thinks of the range as a 13 bit signed interval,
1236
while we want to match a 13 bit interval, sign extended to 32 bits,
1237
but INTERPRETED AS UNSIGNED. */
1238
#define umul_ppmm(w1, w0, u, v) \
1239
__asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v))
1242
#if HAVE_HOST_CPU_supersparc
1243
#define UDIV_TIME 60 /* SuperSPARC timing */
1245
/* Don't use this on SuperSPARC because its udiv only handles 53 bit
1246
dividends and will trap to the kernel for the rest. */
1247
#define udiv_qrnnd(q, r, n1, n0, d) \
1250
__asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
1251
: "=r" (__q) : "r" (n1), "r" (n0), "r" (d)); \
1252
(r) = (n0) - __q * (d); \
1255
#define UDIV_TIME 25
1256
#endif /* HAVE_HOST_CPU_supersparc */
1258
#else /* ! __sparc_v8__ */
1259
#if defined (__sparclite__)
1260
/* This has hardware multiply but not divide. It also has two additional
1261
instructions scan (ffs from high bit) and divscc. */
1262
#define umul_ppmm(w1, w0, u, v) \
1263
__asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v))
1265
#define udiv_qrnnd(q, r, n1, n0, d) \
1266
__asm__ ("! Inlined udiv_qrnnd\n" \
1267
" wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \
1269
" divscc %3,%4,%%g1\n" \
1270
" divscc %%g1,%4,%%g1\n" \
1271
" divscc %%g1,%4,%%g1\n" \
1272
" divscc %%g1,%4,%%g1\n" \
1273
" divscc %%g1,%4,%%g1\n" \
1274
" divscc %%g1,%4,%%g1\n" \
1275
" divscc %%g1,%4,%%g1\n" \
1276
" divscc %%g1,%4,%%g1\n" \
1277
" divscc %%g1,%4,%%g1\n" \
1278
" divscc %%g1,%4,%%g1\n" \
1279
" divscc %%g1,%4,%%g1\n" \
1280
" divscc %%g1,%4,%%g1\n" \
1281
" divscc %%g1,%4,%%g1\n" \
1282
" divscc %%g1,%4,%%g1\n" \
1283
" divscc %%g1,%4,%%g1\n" \
1284
" divscc %%g1,%4,%%g1\n" \
1285
" divscc %%g1,%4,%%g1\n" \
1286
" divscc %%g1,%4,%%g1\n" \
1287
" divscc %%g1,%4,%%g1\n" \
1288
" divscc %%g1,%4,%%g1\n" \
1289
" divscc %%g1,%4,%%g1\n" \
1290
" divscc %%g1,%4,%%g1\n" \
1291
" divscc %%g1,%4,%%g1\n" \
1292
" divscc %%g1,%4,%%g1\n" \
1293
" divscc %%g1,%4,%%g1\n" \
1294
" divscc %%g1,%4,%%g1\n" \
1295
" divscc %%g1,%4,%%g1\n" \
1296
" divscc %%g1,%4,%%g1\n" \
1297
" divscc %%g1,%4,%%g1\n" \
1298
" divscc %%g1,%4,%%g1\n" \
1299
" divscc %%g1,%4,%%g1\n" \
1300
" divscc %%g1,%4,%0\n" \
1304
"1: ! End of inline udiv_qrnnd" \
1305
: "=r" (q), "=r" (r) : "r" (n1), "r" (n0), "rI" (d) \
1306
: "%g1" __AND_CLOBBER_CC)
1307
#define UDIV_TIME 37
1308
#define count_leading_zeros(count, x) \
1309
__asm__ ("scan %1,1,%0" : "=r" (count) : "r" (x))
1310
/* Early sparclites return 63 for an argument of 0, but they warn that future
1311
implementations might change this. Therefore, leave COUNT_LEADING_ZEROS_0
1313
#endif /* __sparclite__ */
1314
#endif /* __sparc_v8__ */
1315
#endif /* __sparc_v9__ */
1316
/* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd. */
1318
#define umul_ppmm(w1, w0, u, v) \
1319
__asm__ ("! Inlined umul_ppmm\n" \
1320
" wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n" \
1321
" sra %3,31,%%g2 ! Don't move this insn\n" \
1322
" and %2,%%g2,%%g2 ! Don't move this insn\n" \
1323
" andcc %%g0,0,%%g1 ! Don't move this insn\n" \
1324
" mulscc %%g1,%3,%%g1\n" \
1325
" mulscc %%g1,%3,%%g1\n" \
1326
" mulscc %%g1,%3,%%g1\n" \
1327
" mulscc %%g1,%3,%%g1\n" \
1328
" mulscc %%g1,%3,%%g1\n" \
1329
" mulscc %%g1,%3,%%g1\n" \
1330
" mulscc %%g1,%3,%%g1\n" \
1331
" mulscc %%g1,%3,%%g1\n" \
1332
" mulscc %%g1,%3,%%g1\n" \
1333
" mulscc %%g1,%3,%%g1\n" \
1334
" mulscc %%g1,%3,%%g1\n" \
1335
" mulscc %%g1,%3,%%g1\n" \
1336
" mulscc %%g1,%3,%%g1\n" \
1337
" mulscc %%g1,%3,%%g1\n" \
1338
" mulscc %%g1,%3,%%g1\n" \
1339
" mulscc %%g1,%3,%%g1\n" \
1340
" mulscc %%g1,%3,%%g1\n" \
1341
" mulscc %%g1,%3,%%g1\n" \
1342
" mulscc %%g1,%3,%%g1\n" \
1343
" mulscc %%g1,%3,%%g1\n" \
1344
" mulscc %%g1,%3,%%g1\n" \
1345
" mulscc %%g1,%3,%%g1\n" \
1346
" mulscc %%g1,%3,%%g1\n" \
1347
" mulscc %%g1,%3,%%g1\n" \
1348
" mulscc %%g1,%3,%%g1\n" \
1349
" mulscc %%g1,%3,%%g1\n" \
1350
" mulscc %%g1,%3,%%g1\n" \
1351
" mulscc %%g1,%3,%%g1\n" \
1352
" mulscc %%g1,%3,%%g1\n" \
1353
" mulscc %%g1,%3,%%g1\n" \
1354
" mulscc %%g1,%3,%%g1\n" \
1355
" mulscc %%g1,%3,%%g1\n" \
1356
" mulscc %%g1,0,%%g1\n" \
1357
" add %%g1,%%g2,%0\n" \
1359
: "=r" (w1), "=r" (w0) : "%rI" (u), "r" (v) \
1360
: "%g1", "%g2" __AND_CLOBBER_CC)
1361
#define UMUL_TIME 39 /* 39 instructions */
1364
#ifndef LONGLONG_STANDALONE
1365
#define udiv_qrnnd(q, r, n1, n0, d) \
1367
(q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d)); \
1370
extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype));
1372
#define UDIV_TIME 140
1374
#endif /* LONGLONG_STANDALONE */
1375
#endif /* udiv_qrnnd */
1376
#endif /* __sparc__ */
1378
#if defined (__sparc__) && W_TYPE_SIZE == 64
1379
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1381
"addcc %r4,%5,%1\n" \
1382
" addccc %r6,%7,%%g0\n" \
1384
: "=r" (sh), "=&r" (sl) \
1385
: "rJ" (ah), "rI" (bh), "%rJ" (al), "rI" (bl), \
1386
"%rJ" ((al) >> 32), "rI" ((bl) >> 32) \
1388
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1390
"subcc %r4,%5,%1\n" \
1391
" subccc %r6,%7,%%g0\n" \
1393
: "=r" (sh), "=&r" (sl) \
1394
: "rJ" (ah), "rI" (bh), "rJ" (al), "rI" (bl), \
1395
"rJ" ((al) >> 32), "rI" ((bl) >> 32) \
1399
#if defined (__vax__) && W_TYPE_SIZE == 32
1400
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1401
__asm__ ("addl2 %5,%1\n\tadwc %3,%0" \
1402
: "=g" ((USItype)(sh)), "=&g" ((USItype)(sl)) \
1403
: "0" ((USItype)(ah)), "g" ((USItype)(bh)), \
1404
"%1" ((USItype)(al)), "g" ((USItype)(bl)))
1405
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1406
__asm__ ("subl2 %5,%1\n\tsbwc %3,%0" \
1407
: "=g" ((USItype)(sh)), "=&g" ((USItype)(sl)) \
1408
: "0" ((USItype)(ah)), "g" ((USItype)(bh)), \
1409
"1" ((USItype)(al)), "g" ((USItype)(bl)))
1410
#define smul_ppmm(xh, xl, m0, m1) \
1412
union {UDItype __ll; \
1413
struct {USItype __l, __h;} __i; \
1415
USItype __m0 = (m0), __m1 = (m1); \
1416
__asm__ ("emul %1,%2,$0,%0" \
1417
: "=g" (__x.__ll) : "g" (__m0), "g" (__m1)); \
1418
(xh) = __x.__i.__h; (xl) = __x.__i.__l; \
1420
#define sdiv_qrnnd(q, r, n1, n0, d) \
1422
union {DItype __ll; \
1423
struct {SItype __l, __h;} __i; \
1425
__x.__i.__h = n1; __x.__i.__l = n0; \
1426
__asm__ ("ediv %3,%2,%0,%1" \
1427
: "=g" (q), "=g" (r) : "g" (__x.__ll), "g" (d)); \
1430
/* FIXME: This instruction appears to be unimplemented on some systems (vax
1432
#define count_trailing_zeros(count,x) \
1434
__asm__ ("ffs 0, 31, %1, %0" \
1435
: "=g" ((USItype) (count)) \
1436
: "g" ((USItype) (x))); \
1439
#endif /* __vax__ */
1441
#if defined (__z8000__) && W_TYPE_SIZE == 16
1442
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1443
__asm__ ("add %H1,%H5\n\tadc %H0,%H3" \
1444
: "=r" ((unsigned int)(sh)), "=&r" ((unsigned int)(sl)) \
1445
: "0" ((unsigned int)(ah)), "r" ((unsigned int)(bh)), \
1446
"%1" ((unsigned int)(al)), "rQR" ((unsigned int)(bl)))
1447
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1448
__asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \
1449
: "=r" ((unsigned int)(sh)), "=&r" ((unsigned int)(sl)) \
1450
: "0" ((unsigned int)(ah)), "r" ((unsigned int)(bh)), \
1451
"1" ((unsigned int)(al)), "rQR" ((unsigned int)(bl)))
1452
#define umul_ppmm(xh, xl, m0, m1) \
1454
union {long int __ll; \
1455
struct {unsigned int __h, __l;} __i; \
1457
unsigned int __m0 = (m0), __m1 = (m1); \
1458
__asm__ ("mult %S0,%H3" \
1459
: "=r" (__x.__i.__h), "=r" (__x.__i.__l) \
1460
: "%1" (m0), "rQR" (m1)); \
1461
(xh) = __x.__i.__h; (xl) = __x.__i.__l; \
1462
(xh) += ((((signed int) __m0 >> 15) & __m1) \
1463
+ (((signed int) __m1 >> 15) & __m0)); \
1465
#endif /* __z8000__ */
1467
#endif /* __GNUC__ */
1472
#if !defined (umul_ppmm) && defined (__umulsidi3)
1473
#define umul_ppmm(ph, pl, m0, m1) \
1475
UDWtype __ll = __umulsidi3 (m0, m1); \
1476
ph = (UWtype) (__ll >> W_TYPE_SIZE); \
1477
pl = (UWtype) __ll; \
1481
#if !defined (__umulsidi3)
1482
#define __umulsidi3(u, v) \
1483
({UWtype __hi, __lo; \
1484
umul_ppmm (__hi, __lo, u, v); \
1485
((UDWtype) __hi << W_TYPE_SIZE) | __lo; })
1489
/* Note the prototypes are under !define(umul_ppmm) etc too, since the HPPA
1490
versions above are different and we don't want to conflict. */
1492
#if ! defined (umul_ppmm) && HAVE_NATIVE_mpn_umul_ppmm
1493
#define mpn_umul_ppmm __MPN(umul_ppmm)
1494
extern mp_limb_t mpn_umul_ppmm _PROTO ((mp_limb_t *, mp_limb_t, mp_limb_t));
1495
#define umul_ppmm(wh, wl, u, v) \
1497
mp_limb_t __umul_ppmm__p0; \
1498
(wh) = __MPN(umul_ppmm) (&__umul_ppmm__p0, \
1499
(mp_limb_t) (u), (mp_limb_t) (v)); \
1500
(wl) = __umul_ppmm__p0; \
1504
#if ! defined (udiv_qrnnd) && HAVE_NATIVE_mpn_udiv_qrnnd
1505
#define mpn_udiv_qrnnd __MPN(udiv_qrnnd)
1506
extern mp_limb_t mpn_udiv_qrnnd _PROTO ((mp_limb_t *,
1507
mp_limb_t, mp_limb_t, mp_limb_t));
1508
#define udiv_qrnnd(q, r, n1, n0, d) \
1510
mp_limb_t __udiv_qrnnd__r; \
1511
(q) = mpn_udiv_qrnnd (&__udiv_qrnnd__r, \
1512
(mp_limb_t) (n1), (mp_limb_t) (n0), (mp_limb_t) d); \
1513
(r) = __udiv_qrnnd__r; \
1518
/* If this machine has no inline assembler, use C macros. */
1520
#if !defined (add_ssaaaa)
1521
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
1524
__x = (al) + (bl); \
1525
(sh) = (ah) + (bh) + (__x < (al)); \
1530
#if !defined (sub_ddmmss)
1531
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
1534
__x = (al) - (bl); \
1535
(sh) = (ah) - (bh) - (__x > (al)); \
1540
/* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of
1542
#if !defined (umul_ppmm) && defined (smul_ppmm)
1543
#define umul_ppmm(w1, w0, u, v) \
1546
UWtype __xm0 = (u), __xm1 = (v); \
1547
smul_ppmm (__w1, w0, __xm0, __xm1); \
1548
(w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \
1549
+ (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \
1553
/* If we still don't have umul_ppmm, define it using plain C. */
1554
#if !defined (umul_ppmm)
1555
#define umul_ppmm(w1, w0, u, v) \
1557
UWtype __x0, __x1, __x2, __x3; \
1558
UHWtype __ul, __vl, __uh, __vh; \
1559
UWtype __u = (u), __v = (v); \
1561
__ul = __ll_lowpart (__u); \
1562
__uh = __ll_highpart (__u); \
1563
__vl = __ll_lowpart (__v); \
1564
__vh = __ll_highpart (__v); \
1566
__x0 = (UWtype) __ul * __vl; \
1567
__x1 = (UWtype) __ul * __vh; \
1568
__x2 = (UWtype) __uh * __vl; \
1569
__x3 = (UWtype) __uh * __vh; \
1571
__x1 += __ll_highpart (__x0);/* this can't give carry */ \
1572
__x1 += __x2; /* but this indeed can */ \
1573
if (__x1 < __x2) /* did we get it? */ \
1574
__x3 += __ll_B; /* yes, add it in the proper pos. */ \
1576
(w1) = __x3 + __ll_highpart (__x1); \
1577
(w0) = (__x1 << W_TYPE_SIZE/2) + __ll_lowpart (__x0); \
1581
/* If we don't have smul_ppmm, define it using umul_ppmm (which surely will
1582
exist in one form or another. */
1583
#if !defined (smul_ppmm)
1584
#define smul_ppmm(w1, w0, u, v) \
1587
UWtype __xm0 = (u), __xm1 = (v); \
1588
umul_ppmm (__w1, w0, __xm0, __xm1); \
1589
(w1) = __w1 - (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \
1590
- (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \
1594
/* Define this unconditionally, so it can be used for debugging. */
1595
#define __udiv_qrnnd_c(q, r, n1, n0, d) \
1597
UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \
1599
ASSERT ((d) != 0); \
1600
ASSERT ((n1) < (d)); \
1602
__d1 = __ll_highpart (d); \
1603
__d0 = __ll_lowpart (d); \
1605
__q1 = (n1) / __d1; \
1606
__r1 = (n1) - __q1 * __d1; \
1607
__m = (UWtype) __q1 * __d0; \
1608
__r1 = __r1 * __ll_B | __ll_highpart (n0); \
1611
__q1--, __r1 += (d); \
1612
if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
1614
__q1--, __r1 += (d); \
1618
__q0 = __r1 / __d1; \
1619
__r0 = __r1 - __q0 * __d1; \
1620
__m = (UWtype) __q0 * __d0; \
1621
__r0 = __r0 * __ll_B | __ll_lowpart (n0); \
1624
__q0--, __r0 += (d); \
1627
__q0--, __r0 += (d); \
1631
(q) = (UWtype) __q1 * __ll_B | __q0; \
1635
/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
1636
__udiv_w_sdiv (defined in libgcc or elsewhere). */
1637
#if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
1638
#define udiv_qrnnd(q, r, nh, nl, d) \
1641
(q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); \
1646
/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */
1647
#if !defined (udiv_qrnnd)
1648
#define UDIV_NEEDS_NORMALIZATION 1
1649
#define udiv_qrnnd __udiv_qrnnd_c
1652
#if !defined (count_leading_zeros)
1653
#define count_leading_zeros(count, x) \
1655
UWtype __xr = (x); \
1658
if (W_TYPE_SIZE == 32) \
1660
__a = __xr < ((UWtype) 1 << 2*__BITS4) \
1661
? (__xr < ((UWtype) 1 << __BITS4) ? 1 : __BITS4 + 1) \
1662
: (__xr < ((UWtype) 1 << 3*__BITS4) ? 2*__BITS4 + 1 \
1667
for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \
1668
if (((__xr >> __a) & 0xff) != 0) \
1673
(count) = W_TYPE_SIZE + 1 - __a - __clz_tab[__xr >> __a]; \
1675
/* This version gives a well-defined value for zero. */
1676
#define COUNT_LEADING_ZEROS_0 (W_TYPE_SIZE - 1)
1677
#define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
1680
#ifdef COUNT_LEADING_ZEROS_NEED_CLZ_TAB
1681
extern const unsigned char __GMP_DECLSPEC __clz_tab[128];
1684
#if !defined (count_trailing_zeros)
1685
/* Define count_trailing_zeros using count_leading_zeros. The latter might be
1686
defined in asm, but if it is not, the C version above is good enough. */
1687
#define count_trailing_zeros(count, x) \
1689
UWtype __ctz_x = (x); \
1691
ASSERT (__ctz_x != 0); \
1692
count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x); \
1693
(count) = W_TYPE_SIZE - 1 - __ctz_c; \
1697
#ifndef UDIV_NEEDS_NORMALIZATION
1698
#define UDIV_NEEDS_NORMALIZATION 0
1701
/* Whether udiv_qrnnd is actually implemented with udiv_qrnnd_preinv, and
1702
that hence the latter should always be used. */
1703
#ifndef UDIV_PREINV_ALWAYS
1704
#define UDIV_PREINV_ALWAYS 0
1707
/* Give defaults for UMUL_TIME and UDIV_TIME. */
1713
#define UDIV_TIME UMUL_TIME