255
281
flag float128_lt_quiet( float128, float128 );
256
282
flag float128_is_signaling_nan( float128 );
284
/*----------------------------------------------------------------------------
285
| Packs the sign `zSign', the exponent `zExp', and the significand formed
286
| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
287
| floating-point value, returning the result. After being shifted into the
288
| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
289
| added together to form the most significant 32 bits of the result. This
290
| means that any integer portion of `zSig0' will be added into the exponent.
291
| Since a properly normalized significand will have an integer portion equal
292
| to 1, the `zExp' input should be 1 less than the desired result exponent
293
| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
295
*----------------------------------------------------------------------------*/
298
packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
303
z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
308
/*----------------------------------------------------------------------------
309
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
310
| and extended significand formed by the concatenation of `zSig0', `zSig1',
311
| and `zSig2', and returns the proper quadruple-precision floating-point value
312
| corresponding to the abstract input. Ordinarily, the abstract value is
313
| simply rounded and packed into the quadruple-precision format, with the
314
| inexact exception raised if the abstract input cannot be represented
315
| exactly. However, if the abstract value is too large, the overflow and
316
| inexact exceptions are raised and an infinity or maximal finite value is
317
| returned. If the abstract value is too small, the input value is rounded to
318
| a subnormal number, and the underflow and inexact exceptions are raised if
319
| the abstract input cannot be represented exactly as a subnormal quadruple-
320
| precision floating-point number.
321
| The input significand must be normalized or smaller. If the input
322
| significand is not normalized, `zExp' must be 0; in that case, the result
323
| returned is a subnormal number, and it must not require rounding. In the
324
| usual case that the input significand is normalized, `zExp' must be 1 less
325
| than the ``true'' floating-point exponent. The handling of underflow and
326
| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
327
*----------------------------------------------------------------------------*/
330
roundAndPackFloat128(
331
flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 )
334
flag roundNearestEven, increment, isTiny;
336
roundingMode = float_rounding_mode;
337
roundNearestEven = ( roundingMode == float_round_nearest_even );
338
increment = ( (sbits64) zSig2 < 0 );
339
if ( ! roundNearestEven ) {
340
if ( roundingMode == float_round_to_zero ) {
345
increment = ( roundingMode == float_round_down ) && zSig2;
348
increment = ( roundingMode == float_round_up ) && zSig2;
352
if ( 0x7FFD <= (bits32) zExp ) {
353
if ( ( 0x7FFD < zExp )
354
|| ( ( zExp == 0x7FFD )
356
LIT64( 0x0001FFFFFFFFFFFF ),
357
LIT64( 0xFFFFFFFFFFFFFFFF ),
364
float_raise( float_flag_overflow | float_flag_inexact );
365
if ( ( roundingMode == float_round_to_zero )
366
|| ( zSign && ( roundingMode == float_round_up ) )
367
|| ( ! zSign && ( roundingMode == float_round_down ) )
373
LIT64( 0x0000FFFFFFFFFFFF ),
374
LIT64( 0xFFFFFFFFFFFFFFFF )
377
return packFloat128( zSign, 0x7FFF, 0, 0 );
381
( float_detect_tininess == float_tininess_before_rounding )
387
LIT64( 0x0001FFFFFFFFFFFF ),
388
LIT64( 0xFFFFFFFFFFFFFFFF )
390
shift128ExtraRightJamming(
391
zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
393
if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
394
if ( roundNearestEven ) {
395
increment = ( (sbits64) zSig2 < 0 );
399
increment = ( roundingMode == float_round_down ) && zSig2;
402
increment = ( roundingMode == float_round_up ) && zSig2;
407
if ( zSig2 ) float_exception_flags |= float_flag_inexact;
409
add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
410
zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
413
if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
415
return packFloat128( zSign, zExp, zSig0, zSig1 );
419
/*----------------------------------------------------------------------------
420
| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
421
| and significand formed by the concatenation of `zSig0' and `zSig1', and
422
| returns the proper quadruple-precision floating-point value corresponding
423
| to the abstract input. This routine is just like `roundAndPackFloat128'
424
| except that the input significand has fewer bits and does not have to be
425
| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
427
*----------------------------------------------------------------------------*/
430
normalizeRoundAndPackFloat128(
431
flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
441
shiftCount = countLeadingZeros64( zSig0 ) - 15;
442
if ( 0 <= shiftCount ) {
444
shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
447
shift128ExtraRightJamming(
448
zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
451
return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );