2
* Copyright (c) 2005 Josef Cejka
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions
9
* - Redistributions of source code must retain the above copyright
10
* notice, this list of conditions and the following disclaimer.
11
* - Redistributions in binary form must reproduce the above copyright
12
* notice, this list of conditions and the following disclaimer in the
13
* documentation and/or other materials provided with the distribution.
14
* - The name of the author may not be used to endorse or promote products
15
* derived from this software without specific prior written permission.
17
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
/** @addtogroup softfloat
36
#include "conversion.h"
37
#include "comparison.h"
40
float64 convertFloat32ToFloat64(float32 a)
45
result.parts.sign = a.parts.sign;
46
result.parts.fraction = a.parts.fraction;
47
result.parts.fraction <<= (FLOAT64_FRACTION_SIZE - FLOAT32_FRACTION_SIZE );
49
if ((isFloat32Infinity(a))||(isFloat32NaN(a))) {
50
result.parts.exp = 0x7FF;
51
/* TODO; check if its correct for SigNaNs*/
55
result.parts.exp = a.parts.exp + ( (int)FLOAT64_BIAS - FLOAT32_BIAS );
56
if (a.parts.exp == 0) {
57
/* normalize denormalized numbers */
59
if (result.parts.fraction == 0ll) { /* fix zero */
60
result.parts.exp = 0ll;
64
frac = result.parts.fraction;
66
while (!(frac & (0x10000000000000ll))) {
72
result.parts.fraction = frac;
79
float32 convertFloat64ToFloat32(float64 a)
85
result.parts.sign = a.parts.sign;
87
if (isFloat64NaN(a)) {
89
result.parts.exp = 0xFF;
91
if (isFloat64SigNaN(a)) {
92
result.parts.fraction = 0x400000; /* set first bit of fraction nonzero */
96
result.parts.fraction = 0x1; /* fraction nonzero but its first bit is zero */
100
if (isFloat64Infinity(a)) {
101
result.parts.fraction = 0;
102
result.parts.exp = 0xFF;
106
exp = (int)a.parts.exp - FLOAT64_BIAS + FLOAT32_BIAS;
110
result.parts.fraction = 0;
111
result.parts.exp = 0xFF;
114
} else if (exp <= 0 ) {
116
/* underflow or denormalized */
118
result.parts.exp = 0;
121
if (exp > FLOAT32_FRACTION_SIZE ) {
122
/* FIXME: underflow */
123
result.parts.fraction = 0;
129
frac = a.parts.fraction;
130
frac |= 0x10000000000000ll; /* denormalize and set hidden bit */
132
frac >>= (FLOAT64_FRACTION_SIZE - FLOAT32_FRACTION_SIZE + 1);
138
result.parts.fraction = frac;
143
result.parts.exp = exp;
144
result.parts.fraction = a.parts.fraction >> (FLOAT64_FRACTION_SIZE - FLOAT32_FRACTION_SIZE);
149
/** Helping procedure for converting float32 to uint32
150
* @param a floating point number in normalized form (no NaNs or Inf are checked )
151
* @return unsigned integer
153
static uint32_t _float32_to_uint32_helper(float32 a)
157
if (a.parts.exp < FLOAT32_BIAS) {
162
frac = a.parts.fraction;
164
frac |= FLOAT32_HIDDEN_BIT_MASK;
165
/* shift fraction to left so hidden bit will be the most significant bit */
166
frac <<= 32 - FLOAT32_FRACTION_SIZE - 1;
168
frac >>= 32 - (a.parts.exp - FLOAT32_BIAS) - 1;
169
if ((a.parts.sign == 1) && (frac != 0)) {
177
/* Convert float to unsigned int32
178
* FIXME: Im not sure what to return if overflow/underflow happens
179
* - now its the biggest or the smallest int
181
uint32_t float32_to_uint32(float32 a)
183
if (isFloat32NaN(a)) {
187
if (isFloat32Infinity(a) || (a.parts.exp >= (32 + FLOAT32_BIAS))) {
194
return _float32_to_uint32_helper(a);
197
/* Convert float to signed int32
198
* FIXME: Im not sure what to return if overflow/underflow happens
199
* - now its the biggest or the smallest int
201
int32_t float32_to_int32(float32 a)
203
if (isFloat32NaN(a)) {
207
if (isFloat32Infinity(a) || (a.parts.exp >= (32 + FLOAT32_BIAS))) {
213
return _float32_to_uint32_helper(a);
217
/** Helping procedure for converting float64 to uint64
218
* @param a floating point number in normalized form (no NaNs or Inf are checked )
219
* @return unsigned integer
221
static uint64_t _float64_to_uint64_helper(float64 a)
225
if (a.parts.exp < FLOAT64_BIAS) {
230
frac = a.parts.fraction;
232
frac |= FLOAT64_HIDDEN_BIT_MASK;
233
/* shift fraction to left so hidden bit will be the most significant bit */
234
frac <<= 64 - FLOAT64_FRACTION_SIZE - 1;
236
frac >>= 64 - (a.parts.exp - FLOAT64_BIAS) - 1;
237
if ((a.parts.sign == 1) && (frac != 0)) {
245
/* Convert float to unsigned int64
246
* FIXME: Im not sure what to return if overflow/underflow happens
247
* - now its the biggest or the smallest int
249
uint64_t float64_to_uint64(float64 a)
251
if (isFloat64NaN(a)) {
255
if (isFloat64Infinity(a) || (a.parts.exp >= (64 + FLOAT64_BIAS))) {
262
return _float64_to_uint64_helper(a);
265
/* Convert float to signed int64
266
* FIXME: Im not sure what to return if overflow/underflow happens
267
* - now its the biggest or the smallest int
269
int64_t float64_to_int64(float64 a)
271
if (isFloat64NaN(a)) {
275
if (isFloat64Infinity(a) || (a.parts.exp >= (64 + FLOAT64_BIAS))) {
281
return _float64_to_uint64_helper(a);
288
/** Helping procedure for converting float32 to uint64
289
* @param a floating point number in normalized form (no NaNs or Inf are checked )
290
* @return unsigned integer
292
static uint64_t _float32_to_uint64_helper(float32 a)
296
if (a.parts.exp < FLOAT32_BIAS) {
301
frac = a.parts.fraction;
303
frac |= FLOAT32_HIDDEN_BIT_MASK;
304
/* shift fraction to left so hidden bit will be the most significant bit */
305
frac <<= 64 - FLOAT32_FRACTION_SIZE - 1;
307
frac >>= 64 - (a.parts.exp - FLOAT32_BIAS) - 1;
308
if ((a.parts.sign == 1) && (frac != 0)) {
316
/* Convert float to unsigned int64
317
* FIXME: Im not sure what to return if overflow/underflow happens
318
* - now its the biggest or the smallest int
320
uint64_t float32_to_uint64(float32 a)
322
if (isFloat32NaN(a)) {
326
if (isFloat32Infinity(a) || (a.parts.exp >= (64 + FLOAT32_BIAS))) {
333
return _float32_to_uint64_helper(a);
336
/* Convert float to signed int64
337
* FIXME: Im not sure what to return if overflow/underflow happens
338
* - now its the biggest or the smallest int
340
int64_t float32_to_int64(float32 a)
342
if (isFloat32NaN(a)) {
346
if (isFloat32Infinity(a) || (a.parts.exp >= (64 + FLOAT32_BIAS))) {
352
return _float32_to_uint64_helper(a);
356
/* Convert float64 to unsigned int32
357
* FIXME: Im not sure what to return if overflow/underflow happens
358
* - now its the biggest or the smallest int
360
uint32_t float64_to_uint32(float64 a)
362
if (isFloat64NaN(a)) {
366
if (isFloat64Infinity(a) || (a.parts.exp >= (32 + FLOAT64_BIAS))) {
373
return (uint32_t)_float64_to_uint64_helper(a);
376
/* Convert float64 to signed int32
377
* FIXME: Im not sure what to return if overflow/underflow happens
378
* - now its the biggest or the smallest int
380
int32_t float64_to_int32(float64 a)
382
if (isFloat64NaN(a)) {
386
if (isFloat64Infinity(a) || (a.parts.exp >= (32 + FLOAT64_BIAS))) {
392
return (int32_t)_float64_to_uint64_helper(a);
395
/** Convert unsigned integer to float32
399
float32 uint32_to_float32(uint32_t i)
405
result.parts.sign = 0;
406
result.parts.fraction = 0;
408
counter = countZeroes32(i);
410
exp = FLOAT32_BIAS + 32 - counter - 1;
423
roundFloat32(&exp, &i);
425
result.parts.fraction = i >> 7;
426
result.parts.exp = exp;
431
float32 int32_to_float32(int32_t i)
436
result = uint32_to_float32((uint32_t)(-i));
438
result = uint32_to_float32((uint32_t)i);
441
result.parts.sign = i < 0;
447
float32 uint64_to_float32(uint64_t i)
454
result.parts.sign = 0;
455
result.parts.fraction = 0;
457
counter = countZeroes64(i);
459
exp = FLOAT32_BIAS + 64 - counter - 1;
466
/* Shift all to the first 31 bits (31. will be hidden 1)*/
468
i <<= counter - 1 - 32;
470
i >>= 1 + 32 - counter;
474
roundFloat32(&exp, &j);
476
result.parts.fraction = j >> 7;
477
result.parts.exp = exp;
481
float32 int64_to_float32(int64_t i)
486
result = uint64_to_float32((uint64_t)(-i));
488
result = uint64_to_float32((uint64_t)i);
491
result.parts.sign = i < 0;
496
/** Convert unsigned integer to float64
500
float64 uint32_to_float64(uint32_t i)
507
result.parts.sign = 0;
508
result.parts.fraction = 0;
510
counter = countZeroes32(i);
512
exp = FLOAT64_BIAS + 32 - counter - 1;
520
frac <<= counter + 32 - 1;
522
roundFloat64(&exp, &frac);
524
result.parts.fraction = frac >> 10;
525
result.parts.exp = exp;
530
float64 int32_to_float64(int32_t i)
535
result = uint32_to_float64((uint32_t)(-i));
537
result = uint32_to_float64((uint32_t)i);
540
result.parts.sign = i < 0;
546
float64 uint64_to_float64(uint64_t i)
552
result.parts.sign = 0;
553
result.parts.fraction = 0;
555
counter = countZeroes64(i);
557
exp = FLOAT64_BIAS + 64 - counter - 1;
570
roundFloat64(&exp, &i);
572
result.parts.fraction = i >> 10;
573
result.parts.exp = exp;
577
float64 int64_to_float64(int64_t i)
582
result = uint64_to_float64((uint64_t)(-i));
584
result = uint64_to_float64((uint64_t)i);
587
result.parts.sign = i < 0;