1
/* mpn_divrem_1 -- mpn by limb division.
3
Copyright 1991, 1993, 1994, 1996, 1998, 1999, 2000, 2002, 2003 Free Software
6
This file is part of the GNU MP Library.
8
The GNU MP Library is free software; you can redistribute it and/or modify
9
it under the terms of the GNU Lesser General Public License as published by
10
the Free Software Foundation; either version 2.1 of the License, or (at your
11
option) any later version.
13
The GNU MP Library is distributed in the hope that it will be useful, but
14
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16
License for more details.
18
You should have received a copy of the GNU Lesser General Public License
19
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
20
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
21
MA 02110-1301, USA. */
28
/* The size where udiv_qrnnd_preinv should be used rather than udiv_qrnnd,
29
meaning the quotient size where that should happen, the quotient size
30
being how many udiv divisions will be done.
32
The default is to use preinv always, CPUs where this doesn't suit have
33
tuned thresholds. Note in particular that preinv should certainly be
34
used if that's the only division available (USE_PREINV_ALWAYS). */
36
#ifndef DIVREM_1_NORM_THRESHOLD
37
#define DIVREM_1_NORM_THRESHOLD 0
39
#ifndef DIVREM_1_UNNORM_THRESHOLD
40
#define DIVREM_1_UNNORM_THRESHOLD 0
45
/* If the cpu only has multiply-by-inverse division (eg. alpha), then NORM
46
and UNNORM thresholds are 0 and only the inversion code is included.
48
If multiply-by-inverse is never viable, then NORM and UNNORM thresholds
49
will be MP_SIZE_T_MAX and only the plain division code is included.
51
Otherwise mul-by-inverse is better than plain division above some
52
threshold, and best results are obtained by having code for both present.
54
The main reason for separating the norm and unnorm cases is that not all
55
CPUs give zero for "n0 >> BITS_PER_MP_LIMB" which would arise in the
56
unnorm code used on an already normalized divisor.
58
If UDIV_NEEDS_NORMALIZATION is false then plain division uses the same
59
non-shifting code for both the norm and unnorm cases, though with
60
different criteria for skipping a division, and with different thresholds
61
of course. And in fact if inversion is never viable, then that simple
62
non-shifting division would be all that's left.
64
The NORM and UNNORM thresholds might not differ much, but if there's
65
going to be separate code for norm and unnorm then it makes sense to have
66
separate thresholds. One thing that's possible is that the
67
mul-by-inverse might be better only for normalized divisors, due to that
68
case not needing variable bit shifts.
70
Notice that the thresholds are tested after the decision to possibly skip
71
one divide step, so they're based on the actual number of divisions done.
73
For the unnorm case, it would be possible to call mpn_lshift to adjust
74
the dividend all in one go (into the quotient space say), rather than
75
limb-by-limb in the loop. This might help if mpn_lshift is a lot faster
76
than what the compiler can generate for EXTRACT. But this is left to CPU
77
specific implementations to consider, especially since EXTRACT isn't on
78
the dependent chain. */
81
mpn_divrem_1 (mp_ptr qp, mp_size_t qxn,
82
mp_srcptr up, mp_size_t un, mp_limb_t d)
92
/* FIXME: What's the correct overlap rule when qxn!=0? */
93
ASSERT (MPN_SAME_OR_SEPARATE_P (qp+qxn, up, un));
101
qp += (n - 1); /* Make qp point at most significant quotient limb */
103
if ((d & GMP_LIMB_HIGHBIT) != 0)
107
/* High quotient limb is 0 or 1, skip a divide step. */
109
r = up[un - 1] << GMP_NAIL_BITS;
118
if (BELOW_THRESHOLD (n, DIVREM_1_NORM_THRESHOLD))
121
for (i = un - 1; i >= 0; i--)
123
n0 = up[i] << GMP_NAIL_BITS;
124
udiv_qrnnd (*qp, r, r, n0, d);
128
for (i = qxn - 1; i >= 0; i--)
130
udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);
138
/* Multiply-by-inverse, divisor already normalized. */
140
invert_limb (dinv, d);
142
for (i = un - 1; i >= 0; i--)
144
n0 = up[i] << GMP_NAIL_BITS;
145
udiv_qrnnd_preinv (*qp, r, r, n0, d, dinv);
149
for (i = qxn - 1; i >= 0; i--)
151
udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);
160
/* Most significant bit of divisor == 0. */
163
/* Skip a division if high < divisor (high quotient 0). Testing here
164
before normalizing will still skip as often as possible. */
167
n1 = up[un - 1] << GMP_NAIL_BITS;
170
r = n1 >> GMP_NAIL_BITS;
179
if (! UDIV_NEEDS_NORMALIZATION
180
&& BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
183
count_leading_zeros (norm, d);
187
if (UDIV_NEEDS_NORMALIZATION
188
&& BELOW_THRESHOLD (n, DIVREM_1_UNNORM_THRESHOLD))
192
n1 = up[un - 1] << GMP_NAIL_BITS;
193
r |= (n1 >> (GMP_LIMB_BITS - norm));
194
for (i = un - 2; i >= 0; i--)
196
n0 = up[i] << GMP_NAIL_BITS;
197
udiv_qrnnd (*qp, r, r,
198
(n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm)),
204
udiv_qrnnd (*qp, r, r, n1 << norm, d);
208
for (i = qxn - 1; i >= 0; i--)
210
udiv_qrnnd (*qp, r, r, CNST_LIMB(0), d);
219
invert_limb (dinv, d);
222
n1 = up[un - 1] << GMP_NAIL_BITS;
223
r |= (n1 >> (GMP_LIMB_BITS - norm));
224
for (i = un - 2; i >= 0; i--)
226
n0 = up[i] << GMP_NAIL_BITS;
227
udiv_qrnnd_preinv (*qp, r, r,
228
((n1 << norm) | (n0 >> (GMP_NUMB_BITS - norm))),
234
udiv_qrnnd_preinv (*qp, r, r, n1 << norm, d, dinv);
238
for (i = qxn - 1; i >= 0; i--)
240
udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv);