2
* Copyright (c) 2005 Josef Cejka
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions
9
* - Redistributions of source code must retain the above copyright
10
* notice, this list of conditions and the following disclaimer.
11
* - Redistributions in binary form must reproduce the above copyright
12
* notice, this list of conditions and the following disclaimer in the
13
* documentation and/or other materials provided with the distribution.
14
* - The name of the author may not be used to endorse or promote products
15
* derived from this software without specific prior written permission.
17
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
/** @addtogroup softfloat
38
#include<comparison.h>
43
float32 divFloat32(float32 a, float32 b)
46
int32_t aexp, bexp, cexp;
47
uint64_t afrac, bfrac, cfrac;
49
result.parts.sign = a.parts.sign ^ b.parts.sign;
51
if (isFloat32NaN(a)) {
52
if (isFloat32SigNaN(a)) {
59
if (isFloat32NaN(b)) {
60
if (isFloat32SigNaN(b)) {
67
if (isFloat32Infinity(a)) {
68
if (isFloat32Infinity(b)) {
70
result.binary = FLOAT32_NAN;
74
result.parts.exp = a.parts.exp;
75
result.parts.fraction = a.parts.fraction;
79
if (isFloat32Infinity(b)) {
80
if (isFloat32Zero(a)) {
83
result.parts.fraction = 0;
88
result.parts.fraction = 0;
92
if (isFloat32Zero(b)) {
93
if (isFloat32Zero(a)) {
95
result.binary = FLOAT32_NAN;
98
/* FIXME: division by zero */
100
result.parts.fraction = 0;
105
afrac = a.parts.fraction;
107
bfrac = b.parts.fraction;
110
/* denormalized numbers */
113
result.parts.exp = 0;
114
result.parts.fraction = 0;
120
/* afrac is nonzero => it must stop */
121
while (! (afrac & FLOAT32_HIDDEN_BIT_MASK) ) {
129
/* bfrac is nonzero => it must stop */
130
while (! (bfrac & FLOAT32_HIDDEN_BIT_MASK) ) {
136
afrac = (afrac | FLOAT32_HIDDEN_BIT_MASK ) << (32 - FLOAT32_FRACTION_SIZE - 1 );
137
bfrac = (bfrac | FLOAT32_HIDDEN_BIT_MASK ) << (32 - FLOAT32_FRACTION_SIZE );
139
if ( bfrac <= (afrac << 1) ) {
144
cexp = aexp - bexp + FLOAT32_BIAS - 2;
146
cfrac = (afrac << 32) / bfrac;
147
if (( cfrac & 0x3F ) == 0) {
148
cfrac |= ( bfrac * cfrac != afrac << 32 );
153
/* find first nonzero digit and shift result and detect possibly underflow */
154
while ((cexp > 0) && (cfrac) && (!(cfrac & (FLOAT32_HIDDEN_BIT_MASK << 7 )))) {
157
/* TODO: fix underflow */
160
cfrac += (0x1 << 6); /* FIXME: 7 is not sure*/
162
if (cfrac & (FLOAT32_HIDDEN_BIT_MASK << 7)) {
168
if (cexp >= FLOAT32_MAX_EXPONENT ) {
169
/* FIXME: overflow, return infinity */
170
result.parts.exp = FLOAT32_MAX_EXPONENT;
171
result.parts.fraction = 0;
176
/* FIXME: underflow */
177
result.parts.exp = 0;
178
if ((cexp + FLOAT32_FRACTION_SIZE) < 0) {
179
result.parts.fraction = 0;
189
result.parts.exp = (uint32_t)cexp;
192
result.parts.fraction = ((cfrac >> 6) & (~FLOAT32_HIDDEN_BIT_MASK));
197
float64 divFloat64(float64 a, float64 b)
200
int64_t aexp, bexp, cexp;
201
uint64_t afrac, bfrac, cfrac;
202
uint64_t remlo, remhi;
204
result.parts.sign = a.parts.sign ^ b.parts.sign;
206
if (isFloat64NaN(a)) {
208
if (isFloat64SigNaN(b)) {
213
if (isFloat64SigNaN(a)) {
220
if (isFloat64NaN(b)) {
221
if (isFloat64SigNaN(b)) {
228
if (isFloat64Infinity(a)) {
229
if (isFloat64Infinity(b) || isFloat64Zero(b)) {
230
/*FIXME: inf / inf */
231
result.binary = FLOAT64_NAN;
235
result.parts.exp = a.parts.exp;
236
result.parts.fraction = a.parts.fraction;
240
if (isFloat64Infinity(b)) {
241
if (isFloat64Zero(a)) {
243
result.parts.exp = 0;
244
result.parts.fraction = 0;
247
/* FIXME: num / inf*/
248
result.parts.exp = 0;
249
result.parts.fraction = 0;
253
if (isFloat64Zero(b)) {
254
if (isFloat64Zero(a)) {
256
result.binary = FLOAT64_NAN;
259
/* FIXME: division by zero */
260
result.parts.exp = 0;
261
result.parts.fraction = 0;
266
afrac = a.parts.fraction;
268
bfrac = b.parts.fraction;
271
/* denormalized numbers */
274
result.parts.exp = 0;
275
result.parts.fraction = 0;
281
/* afrac is nonzero => it must stop */
282
while (! (afrac & FLOAT64_HIDDEN_BIT_MASK) ) {
290
/* bfrac is nonzero => it must stop */
291
while (! (bfrac & FLOAT64_HIDDEN_BIT_MASK) ) {
297
afrac = (afrac | FLOAT64_HIDDEN_BIT_MASK ) << (64 - FLOAT64_FRACTION_SIZE - 2 );
298
bfrac = (bfrac | FLOAT64_HIDDEN_BIT_MASK ) << (64 - FLOAT64_FRACTION_SIZE - 1);
300
if ( bfrac <= (afrac << 1) ) {
305
cexp = aexp - bexp + FLOAT64_BIAS - 2;
307
cfrac = divFloat64estim(afrac, bfrac);
309
if (( cfrac & 0x1FF ) <= 2) { /*FIXME:?? */
310
mul64integers( bfrac, cfrac, &remlo, &remhi);
311
/* (__u128)afrac << 64 - ( ((__u128)remhi<<64) + (__u128)remlo )*/
312
remhi = afrac - remhi - ( remlo > 0);
315
while ((int64_t) remhi < 0) {
318
remhi += ( remlo < bfrac );
320
cfrac |= ( remlo != 0 );
323
/* round and shift */
324
result = finishFloat64(cexp, cfrac, result.parts.sign);
329
uint64_t divFloat64estim(uint64_t a, uint64_t b)
332
uint64_t remhi, remlo;
336
return 0xFFFFFFFFFFFFFFFFull;
340
result = ((bhi << 32) <= a) ?( 0xFFFFFFFFull << 32) : ( a / bhi) << 32;
341
mul64integers(b, result, &remlo, &remhi);
343
remhi = a - remhi - (remlo > 0);
347
while ( (int64_t) remhi < 0 ) {
348
result -= 0x1ll << 32;
350
remhi += bhi + ( remlo < b );
352
remhi = (remhi << 32) | (remlo >> 32);
353
if (( bhi << 32) <= remhi) {
354
result |= 0xFFFFFFFF;
356
result |= remhi / bhi;