1
/* mpn_tdiv_qr -- Divide the numerator (np,nn) by the denominator (dp,dn) and
2
write the nn-dn+1 quotient limbs at qp and the dn remainder limbs at rp. If
3
qxn is non-zero, generate that many fraction limbs and append them after the
4
other quotient limbs, and update the remainder accordningly. The input
5
operands are unaffected.
8
1. The most significant limb of of the divisor must be non-zero.
9
2. No argument overlap is permitted. (??? relax this ???)
10
3. nn >= dn, even if qxn is non-zero. (??? relax this ???)
12
The time complexity of this is O(qn*qn+M(dn,qn)), where M(m,n) is the time
13
complexity of multiplication.
15
Copyright (C) 1997, 2000 Free Software Foundation, Inc.
17
This file is part of the GNU MP Library.
19
The GNU MP Library is free software; you can redistribute it and/or modify
20
it under the terms of the GNU Lesser General Public License as published by
21
the Free Software Foundation; either version 2.1 of the License, or (at your
22
option) any later version.
24
The GNU MP Library is distributed in the hope that it will be useful, but
25
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
27
License for more details.
29
You should have received a copy of the GNU Lesser General Public License
30
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
31
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
32
MA 02111-1307, USA. */
40
#define BZ_THRESHOLD (7 * KARATSUBA_MUL_THRESHOLD)
43
/* Extract the middle limb from ((h,,l) << cnt) */
44
#define SHL(h,l,cnt) \
45
((h << cnt) | ((l >> 1) >> ((~cnt) & (BITS_PER_MP_LIMB - 1))))
49
mpn_tdiv_qr (mp_ptr qp, mp_ptr rp, mp_size_t qxn,
50
mp_srcptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn)
52
mpn_tdiv_qr (qp, rp, qxn, np, nn, dp, dn)
64
2. pass allocated storage in additional parameter?
76
rp[0] = mpn_divmod_1 (qp, np, nn, dp[0]);
87
count_leading_zeros (cnt, dp[dn - 1]);
90
d2p = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
91
mpn_lshift (d2p, dp, dn, cnt);
92
n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
93
cy = mpn_lshift (n2p, np, nn, cnt);
95
qhl = mpn_divrem_2 (qp, 0L, n2p, nn + (cy != 0), d2p);
97
qp[nn - 2] = qhl; /* always store nn-dn+1 quotient limbs */
102
n2p = (mp_ptr) TMP_ALLOC (nn * BYTES_PER_MP_LIMB);
103
MPN_COPY (n2p, np, nn);
104
qhl = mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
105
qp[nn - 2] = qhl; /* always store nn-dn+1 quotient limbs */
109
mpn_rshift (rp, n2p, dn, cnt);
111
MPN_COPY (rp, n2p, dn);
121
adjust = np[nn - 1] >= dp[dn - 1]; /* conservative tests for quotient size */
122
if (nn + adjust >= 2 * dn)
127
count_leading_zeros (cnt, dp[dn - 1]);
129
qp[nn - dn] = 0; /* zero high quotient limb */
130
if (cnt != 0) /* normalize divisor if needed */
132
d2p = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
133
mpn_lshift (d2p, dp, dn, cnt);
134
n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
135
cy = mpn_lshift (n2p, np, nn, cnt);
142
n2p = (mp_ptr) TMP_ALLOC ((nn + 1) * BYTES_PER_MP_LIMB);
143
MPN_COPY (n2p, np, nn);
149
mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
150
else if (dn < BZ_THRESHOLD)
151
mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn);
154
/* Perform 2*dn / dn limb divisions as long as the limbs
156
mp_ptr q2p = qp + nn - 2 * dn;
158
mpn_bz_divrem_n (q2p, n2p, d2p, dn);
163
q2p -= dn; n2p -= dn;
164
c = mpn_bz_divrem_n (q2p, n2p, d2p, dn);
165
ASSERT_ALWAYS (c == 0);
172
/* In theory, we could fall out to the cute code below
173
since we now have exactly the situation that code
174
is designed to handle. We botch this badly and call
175
the basic mpn_sb_divrem_mn! */
177
mpn_divrem_2 (qp, 0L, n2p, nn, d2p);
179
mpn_sb_divrem_mn (qp, n2p, nn, d2p, dn);
185
mpn_rshift (rp, n2p, dn, cnt);
187
MPN_COPY (rp, n2p, dn);
192
/* When we come here, the numerator/partial remainder is less
193
than twice the size of the denominator. */
198
Divide a numerator N with nn limbs by a denominator D with dn
199
limbs forming a quotient of nn-dn+1 limbs. When qn is small
200
compared to dn, conventional division algorithms perform poorly.
201
We want an algorithm that has an expected running time that is
202
dependent only on qn. It is assumed that the most significant
203
limb of the numerator is smaller than the most significant limb
206
Algorithm (very informally stated):
208
1) Divide the 2 x qn most significant limbs from the numerator
209
by the qn most significant limbs from the denominator. Call
210
the result qest. This is either the correct quotient, but
211
might be 1 or 2 too large. Compute the remainder from the
212
division. (This step is implemented by a mpn_divrem call.)
214
2) Is the most significant limb from the remainder < p, where p
215
is the product of the most significant limb from the quotient
216
and the next(d). (Next(d) denotes the next ignored limb from
217
the denominator.) If it is, decrement qest, and adjust the
218
remainder accordingly.
220
3) Is the remainder >= qest? If it is, qest is the desired
221
quotient. The algorithm terminates.
223
4) Subtract qest x next(d) from the remainder. If there is
224
borrow out, decrement qest, and adjust the remainder
227
5) Skip one word from the denominator (i.e., let next(d) denote
228
the next less significant limb. */
235
mp_limb_t quotient_too_large;
239
qp[qn] = 0; /* zero high quotient limb */
240
qn += adjust; /* qn cannot become bigger */
244
MPN_COPY (rp, np, dn);
249
in = dn - qn; /* (at least partially) ignored # of limbs in ops */
250
/* Normalize denominator by shifting it to the left such that its
251
most significant bit is set. Then shift the numerator the same
252
amount, to mathematically preserve quotient. */
253
count_leading_zeros (cnt, dp[dn - 1]);
256
d2p = (mp_ptr) TMP_ALLOC (qn * BYTES_PER_MP_LIMB);
258
mpn_lshift (d2p, dp + in, qn, cnt);
259
d2p[0] |= dp[in - 1] >> (BITS_PER_MP_LIMB - cnt);
261
n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB);
262
cy = mpn_lshift (n2p, np + nn - 2 * qn, 2 * qn, cnt);
270
n2p[0] |= np[nn - 2 * qn - 1] >> (BITS_PER_MP_LIMB - cnt);
275
d2p = (mp_ptr) dp + in;
277
n2p = (mp_ptr) TMP_ALLOC ((2 * qn + 1) * BYTES_PER_MP_LIMB);
278
MPN_COPY (n2p, np + nn - 2 * qn, 2 * qn);
286
/* Get an approximate quotient using the extracted operands. */
290
mp_limb_t gcc272bug_n1, gcc272bug_n0, gcc272bug_d0;
291
/* Due to a gcc 2.7.2.3 reload pass bug, we have to use some
292
temps here. This doesn't hurt code quality on any machines
293
so we do it unconditionally. */
294
gcc272bug_n1 = n2p[1];
295
gcc272bug_n0 = n2p[0];
296
gcc272bug_d0 = d2p[0];
297
udiv_qrnnd (q0, r0, gcc272bug_n1, gcc272bug_n0, gcc272bug_d0);
302
mpn_divrem_2 (qp, 0L, n2p, 4L, d2p);
303
else if (qn < BZ_THRESHOLD)
304
mpn_sb_divrem_mn (qp, n2p, qn * 2, d2p, qn);
306
mpn_bz_divrem_n (qp, n2p, d2p, qn);
309
/* Multiply the first ignored divisor limb by the most significant
310
quotient limb. If that product is > the partial remainder's
311
most significant limb, we know the quotient is too large. This
312
test quickly catches most cases where the quotient is too large;
313
it catches all cases where the quotient is 2 too large. */
323
x = SHL (dp[in - 1], dl, cnt);
324
umul_ppmm (h, l, x, qp[qn - 1]);
330
mpn_decr_u (qp, (mp_limb_t) 1);
331
cy = mpn_add_n (n2p, n2p, d2p, qn);
334
/* The partial remainder is safely large. */
341
quotient_too_large = 0;
346
/* Append partially used numerator limb to partial remainder. */
347
cy1 = mpn_lshift (n2p, n2p, rn, BITS_PER_MP_LIMB - cnt);
348
n2p[0] |= np[in - 1] & (~(mp_limb_t) 0 >> cnt);
350
/* Update partial remainder with partially used divisor limb. */
351
cy2 = mpn_submul_1 (n2p, qp, qn, dp[in - 1] & (~(mp_limb_t) 0 >> cnt));
362
quotient_too_large = (cy1 < cy2);
367
/* True: partial remainder now is neutral, i.e., it is not shifted up. */
369
tp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
375
MPN_COPY (rp, n2p, rn);
380
mpn_mul (tp, qp, qn, dp, in);
383
mpn_mul (tp, dp, in, qp, qn);
385
cy = mpn_sub (n2p, n2p, rn, tp + in, qn);
386
MPN_COPY (rp + in, n2p, dn - in);
387
quotient_too_large |= cy;
388
cy = mpn_sub_n (rp, np, tp, in);
389
cy = mpn_sub_1 (rp + in, rp + in, rn, cy);
390
quotient_too_large |= cy;
392
if (quotient_too_large)
394
mpn_decr_u (qp, (mp_limb_t) 1);
395
mpn_add_n (rp, rp, dp, dn);