2
* Copyright (c) 2005 Josef Cejka
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions
9
* - Redistributions of source code must retain the above copyright
10
* notice, this list of conditions and the following disclaimer.
11
* - Redistributions in binary form must reproduce the above copyright
12
* notice, this list of conditions and the following disclaimer in the
13
* documentation and/or other materials provided with the distribution.
14
* - The name of the author may not be used to endorse or promote products
15
* derived from this software without specific prior written permission.
17
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
/** @addtogroup softfloat
37
#include<comparison.h>
40
/** Multiply two 32 bit float numbers
43
float32 mulFloat32(float32 a, float32 b)
46
uint64_t frac1, frac2;
49
result.parts.sign = a.parts.sign ^ b.parts.sign;
51
if (isFloat32NaN(a) || isFloat32NaN(b) ) {
52
/* TODO: fix SigNaNs */
53
if (isFloat32SigNaN(a)) {
54
result.parts.fraction = a.parts.fraction;
55
result.parts.exp = a.parts.exp;
58
if (isFloat32SigNaN(b)) { /* TODO: fix SigNaN */
59
result.parts.fraction = b.parts.fraction;
60
result.parts.exp = b.parts.exp;
63
/* set NaN as result */
64
result.binary = FLOAT32_NAN;
68
if (isFloat32Infinity(a)) {
69
if (isFloat32Zero(b)) {
70
/* FIXME: zero * infinity */
71
result.binary = FLOAT32_NAN;
74
result.parts.fraction = a.parts.fraction;
75
result.parts.exp = a.parts.exp;
79
if (isFloat32Infinity(b)) {
80
if (isFloat32Zero(a)) {
81
/* FIXME: zero * infinity */
82
result.binary = FLOAT32_NAN;
85
result.parts.fraction = b.parts.fraction;
86
result.parts.exp = b.parts.exp;
90
/* exp is signed so we can easy detect underflow */
91
exp = a.parts.exp + b.parts.exp;
94
if (exp >= FLOAT32_MAX_EXPONENT) {
96
/* set infinity as result */
97
result.binary = FLOAT32_INF;
98
result.parts.sign = a.parts.sign ^ b.parts.sign;
103
/* FIXME: underflow */
104
/* return signed zero */
105
result.parts.fraction = 0x0;
106
result.parts.exp = 0x0;
110
frac1 = a.parts.fraction;
111
if (a.parts.exp > 0) {
112
frac1 |= FLOAT32_HIDDEN_BIT_MASK;
117
frac2 = b.parts.fraction;
119
if (b.parts.exp > 0) {
120
frac2 |= FLOAT32_HIDDEN_BIT_MASK;
125
frac1 <<= 1; /* one bit space for rounding */
127
frac1 = frac1 * frac2;
128
/* round and return */
130
while ((exp < FLOAT32_MAX_EXPONENT) && (frac1 >= ( 1 << (FLOAT32_FRACTION_SIZE + 2)))) {
131
/* 23 bits of fraction + one more for hidden bit (all shifted 1 bit left)*/
137
/* ++frac1; FIXME: not works - without it is ok */
138
frac1 >>= 1; /* shift off rounding space */
140
if ((exp < FLOAT32_MAX_EXPONENT) && (frac1 >= (1 << (FLOAT32_FRACTION_SIZE + 1)))) {
145
if (exp >= FLOAT32_MAX_EXPONENT ) {
146
/* TODO: fix overflow */
148
result.parts.exp = FLOAT32_MAX_EXPONENT;
149
result.parts.fraction = 0x0;
153
exp -= FLOAT32_FRACTION_SIZE;
155
if (exp <= FLOAT32_FRACTION_SIZE) {
156
/* denormalized number */
157
frac1 >>= 1; /* denormalize */
158
while ((frac1 > 0) && (exp < 0)) {
163
/* FIXME : underflow */
164
result.parts.exp = 0;
165
result.parts.fraction = 0;
169
result.parts.exp = exp;
170
result.parts.fraction = frac1 & ( (1 << FLOAT32_FRACTION_SIZE) - 1);
176
/** Multiply two 64 bit float numbers
179
float64 mulFloat64(float64 a, float64 b)
182
uint64_t frac1, frac2;
185
result.parts.sign = a.parts.sign ^ b.parts.sign;
187
if (isFloat64NaN(a) || isFloat64NaN(b) ) {
188
/* TODO: fix SigNaNs */
189
if (isFloat64SigNaN(a)) {
190
result.parts.fraction = a.parts.fraction;
191
result.parts.exp = a.parts.exp;
194
if (isFloat64SigNaN(b)) { /* TODO: fix SigNaN */
195
result.parts.fraction = b.parts.fraction;
196
result.parts.exp = b.parts.exp;
199
/* set NaN as result */
200
result.binary = FLOAT64_NAN;
204
if (isFloat64Infinity(a)) {
205
if (isFloat64Zero(b)) {
206
/* FIXME: zero * infinity */
207
result.binary = FLOAT64_NAN;
210
result.parts.fraction = a.parts.fraction;
211
result.parts.exp = a.parts.exp;
215
if (isFloat64Infinity(b)) {
216
if (isFloat64Zero(a)) {
217
/* FIXME: zero * infinity */
218
result.binary = FLOAT64_NAN;
221
result.parts.fraction = b.parts.fraction;
222
result.parts.exp = b.parts.exp;
226
/* exp is signed so we can easy detect underflow */
227
exp = a.parts.exp + b.parts.exp - FLOAT64_BIAS;
229
frac1 = a.parts.fraction;
231
if (a.parts.exp > 0) {
232
frac1 |= FLOAT64_HIDDEN_BIT_MASK;
237
frac2 = b.parts.fraction;
239
if (b.parts.exp > 0) {
240
frac2 |= FLOAT64_HIDDEN_BIT_MASK;
245
frac1 <<= (64 - FLOAT64_FRACTION_SIZE - 1);
246
frac2 <<= (64 - FLOAT64_FRACTION_SIZE - 2);
248
mul64integers(frac1, frac2, &frac1, &frac2);
250
frac2 |= (frac1 != 0);
251
if (frac2 & (0x1ll << 62)) {
256
result = finishFloat64(exp, frac2, result.parts.sign);
260
/** Multiply two 64 bit numbers and return result in two parts
261
* @param a first operand
262
* @param b second operand
263
* @param lo lower part from result
264
* @param hi higher part of result
266
void mul64integers(uint64_t a,uint64_t b, uint64_t *lo, uint64_t *hi)
268
uint64_t low, high, middle1, middle2;
271
alow = a & 0xFFFFFFFF;
272
blow = b & 0xFFFFFFFF;
277
low = ((uint64_t)alow) * blow;
283
high += (((uint64_t)(middle1 < middle2)) << 32) + (middle1 >> 32);
286
high += (low < middle1);