diff options
author | Chanho Park <chanho61.park@samsung.com> | 2014-06-26 20:28:10 +0900 |
---|---|---|
committer | Chanho Park <chanho61.park@samsung.com> | 2014-07-07 16:25:44 +0900 |
commit | a15119db2ff5c2fdfdeb913b297bf8aa3399132e (patch) | |
tree | 7d6f779408bb772b11c029ab88000fc01856b599 /fpu | |
parent | 340f06c9eaee097e626c251bf7a013350649c091 (diff) | |
download | qemu-a15119db2ff5c2fdfdeb913b297bf8aa3399132e.tar.gz qemu-a15119db2ff5c2fdfdeb913b297bf8aa3399132e.tar.bz2 qemu-a15119db2ff5c2fdfdeb913b297bf8aa3399132e.zip |
Imported Upstream version 2.0.0upstream/2.0.0
Change-Id: I081766c4314e7893f54fec80b920b1638d15021f
Diffstat (limited to 'fpu')
-rw-r--r-- | fpu/softfloat.c | 1155 |
1 files changed, 861 insertions, 294 deletions
diff --git a/fpu/softfloat.c b/fpu/softfloat.c index 7ba51b6f3..e00a6fbca 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -42,6 +42,9 @@ these four paragraphs for those parts of this code that are retained. #include "fpu/softfloat.h" +/* We only need stdlib for abort() */ +#include <stdlib.h> + /*---------------------------------------------------------------------------- | Primitive arithmetic functions, including multi-word arithmetic, and | division and square root approximations. (Can be specialized to target if @@ -59,21 +62,6 @@ these four paragraphs for those parts of this code that are retained. *----------------------------------------------------------------------------*/ #include "softfloat-specialize.h" -void set_float_rounding_mode(int val STATUS_PARAM) -{ - STATUS(float_rounding_mode) = val; -} - -void set_float_exception_flags(int val STATUS_PARAM) -{ - STATUS(float_exception_flags) = val; -} - -void set_floatx80_rounding_precision(int val STATUS_PARAM) -{ - STATUS(floatx80_rounding_precision) = val; -} - /*---------------------------------------------------------------------------- | Returns the fraction bits of the half-precision floating-point value `a'. *----------------------------------------------------------------------------*/ @@ -121,20 +109,22 @@ static int32 roundAndPackInt32( flag zSign, uint64_t absZ STATUS_PARAM) roundingMode = STATUS(float_rounding_mode); roundNearestEven = ( roundingMode == float_round_nearest_even ); - roundIncrement = 0x40; - if ( ! roundNearestEven ) { - if ( roundingMode == float_round_to_zero ) { - roundIncrement = 0; - } - else { - roundIncrement = 0x7F; - if ( zSign ) { - if ( roundingMode == float_round_up ) roundIncrement = 0; - } - else { - if ( roundingMode == float_round_down ) roundIncrement = 0; - } - } + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + roundIncrement = 0x40; + break; + case float_round_to_zero: + roundIncrement = 0; + break; + case float_round_up: + roundIncrement = zSign ? 0 : 0x7f; + break; + case float_round_down: + roundIncrement = zSign ? 0x7f : 0; + break; + default: + abort(); } roundBits = absZ & 0x7F; absZ = ( absZ + roundIncrement )>>7; @@ -170,19 +160,22 @@ static int64 roundAndPackInt64( flag zSign, uint64_t absZ0, uint64_t absZ1 STATU roundingMode = STATUS(float_rounding_mode); roundNearestEven = ( roundingMode == float_round_nearest_even ); - increment = ( (int64_t) absZ1 < 0 ); - if ( ! roundNearestEven ) { - if ( roundingMode == float_round_to_zero ) { - increment = 0; - } - else { - if ( zSign ) { - increment = ( roundingMode == float_round_down ) && absZ1; - } - else { - increment = ( roundingMode == float_round_up ) && absZ1; - } - } + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + increment = ((int64_t) absZ1 < 0); + break; + case float_round_to_zero: + increment = 0; + break; + case float_round_up: + increment = !zSign && absZ1; + break; + case float_round_down: + increment = zSign && absZ1; + break; + default: + abort(); } if ( increment ) { ++absZ0; @@ -204,6 +197,61 @@ static int64 roundAndPackInt64( flag zSign, uint64_t absZ0, uint64_t absZ1 STATU } /*---------------------------------------------------------------------------- +| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and +| `absZ1', with binary point between bits 63 and 64 (between the input words), +| and returns the properly rounded 64-bit unsigned integer corresponding to the +| input. Ordinarily, the fixed-point input is simply rounded to an integer, +| with the inexact exception raised if the input cannot be represented exactly +| as an integer. However, if the fixed-point input is too large, the invalid +| exception is raised and the largest unsigned integer is returned. +*----------------------------------------------------------------------------*/ + +static int64 roundAndPackUint64(flag zSign, uint64_t absZ0, + uint64_t absZ1 STATUS_PARAM) +{ + int8 roundingMode; + flag roundNearestEven, increment; + + roundingMode = STATUS(float_rounding_mode); + roundNearestEven = (roundingMode == float_round_nearest_even); + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + increment = ((int64_t)absZ1 < 0); + break; + case float_round_to_zero: + increment = 0; + break; + case float_round_up: + increment = !zSign && absZ1; + break; + case float_round_down: + increment = zSign && absZ1; + break; + default: + abort(); + } + if (increment) { + ++absZ0; + if (absZ0 == 0) { + float_raise(float_flag_invalid STATUS_VAR); + return LIT64(0xFFFFFFFFFFFFFFFF); + } + absZ0 &= ~(((uint64_t)(absZ1<<1) == 0) & roundNearestEven); + } + + if (zSign && absZ0) { + float_raise(float_flag_invalid STATUS_VAR); + return 0; + } + + if (absZ1) { + STATUS(float_exception_flags) |= float_flag_inexact; + } + return absZ0; +} + +/*---------------------------------------------------------------------------- | Returns the fraction bits of the single-precision floating-point value `a'. *----------------------------------------------------------------------------*/ @@ -240,7 +288,7 @@ INLINE flag extractFloat32Sign( float32 a ) | If `a' is denormal and we are in flush-to-zero mode then set the | input-denormal exception and return zero. Otherwise just return the value. *----------------------------------------------------------------------------*/ -static float32 float32_squash_input_denormal(float32 a STATUS_PARAM) +float32 float32_squash_input_denormal(float32 a STATUS_PARAM) { if (STATUS(flush_inputs_to_zero)) { if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) { @@ -319,20 +367,23 @@ static float32 roundAndPackFloat32(flag zSign, int_fast16_t zExp, uint32_t zSig roundingMode = STATUS(float_rounding_mode); roundNearestEven = ( roundingMode == float_round_nearest_even ); - roundIncrement = 0x40; - if ( ! roundNearestEven ) { - if ( roundingMode == float_round_to_zero ) { - roundIncrement = 0; - } - else { - roundIncrement = 0x7F; - if ( zSign ) { - if ( roundingMode == float_round_up ) roundIncrement = 0; - } - else { - if ( roundingMode == float_round_down ) roundIncrement = 0; - } - } + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + roundIncrement = 0x40; + break; + case float_round_to_zero: + roundIncrement = 0; + break; + case float_round_up: + roundIncrement = zSign ? 0 : 0x7f; + break; + case float_round_down: + roundIncrement = zSign ? 0x7f : 0; + break; + default: + abort(); + break; } roundBits = zSig & 0x7F; if ( 0xFD <= (uint16_t) zExp ) { @@ -422,7 +473,7 @@ INLINE flag extractFloat64Sign( float64 a ) | If `a' is denormal and we are in flush-to-zero mode then set the | input-denormal exception and return zero. Otherwise just return the value. *----------------------------------------------------------------------------*/ -static float64 float64_squash_input_denormal(float64 a STATUS_PARAM) +float64 float64_squash_input_denormal(float64 a STATUS_PARAM) { if (STATUS(flush_inputs_to_zero)) { if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) { @@ -501,20 +552,22 @@ static float64 roundAndPackFloat64(flag zSign, int_fast16_t zExp, uint64_t zSig roundingMode = STATUS(float_rounding_mode); roundNearestEven = ( roundingMode == float_round_nearest_even ); - roundIncrement = 0x200; - if ( ! roundNearestEven ) { - if ( roundingMode == float_round_to_zero ) { - roundIncrement = 0; - } - else { - roundIncrement = 0x3FF; - if ( zSign ) { - if ( roundingMode == float_round_up ) roundIncrement = 0; - } - else { - if ( roundingMode == float_round_down ) roundIncrement = 0; - } - } + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + roundIncrement = 0x200; + break; + case float_round_to_zero: + roundIncrement = 0; + break; + case float_round_up: + roundIncrement = zSign ? 0 : 0x3ff; + break; + case float_round_down: + roundIncrement = zSign ? 0x3ff : 0; + break; + default: + abort(); } roundBits = zSig & 0x3FF; if ( 0x7FD <= (uint16_t) zExp ) { @@ -684,19 +737,21 @@ static floatx80 goto precision80; } zSig0 |= ( zSig1 != 0 ); - if ( ! roundNearestEven ) { - if ( roundingMode == float_round_to_zero ) { - roundIncrement = 0; - } - else { - roundIncrement = roundMask; - if ( zSign ) { - if ( roundingMode == float_round_up ) roundIncrement = 0; - } - else { - if ( roundingMode == float_round_down ) roundIncrement = 0; - } - } + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + break; + case float_round_to_zero: + roundIncrement = 0; + break; + case float_round_up: + roundIncrement = zSign ? 0 : roundMask; + break; + case float_round_down: + roundIncrement = zSign ? roundMask : 0; + break; + default: + abort(); } roundBits = zSig0 & roundMask; if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) { @@ -743,19 +798,22 @@ static floatx80 if ( zSig0 == 0 ) zExp = 0; return packFloatx80( zSign, zExp, zSig0 ); precision80: - increment = ( (int64_t) zSig1 < 0 ); - if ( ! roundNearestEven ) { - if ( roundingMode == float_round_to_zero ) { - increment = 0; - } - else { - if ( zSign ) { - increment = ( roundingMode == float_round_down ) && zSig1; - } - else { - increment = ( roundingMode == float_round_up ) && zSig1; - } - } + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + increment = ((int64_t)zSig1 < 0); + break; + case float_round_to_zero: + increment = 0; + break; + case float_round_up: + increment = !zSign && zSig1; + break; + case float_round_down: + increment = zSign && zSig1; + break; + default: + abort(); } if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) { if ( ( 0x7FFE < zExp ) @@ -785,16 +843,22 @@ static floatx80 zExp = 0; if ( isTiny && zSig1 ) float_raise( float_flag_underflow STATUS_VAR); if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact; - if ( roundNearestEven ) { - increment = ( (int64_t) zSig1 < 0 ); - } - else { - if ( zSign ) { - increment = ( roundingMode == float_round_down ) && zSig1; - } - else { - increment = ( roundingMode == float_round_up ) && zSig1; - } + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + increment = ((int64_t)zSig1 < 0); + break; + case float_round_to_zero: + increment = 0; + break; + case float_round_up: + increment = !zSign && zSig1; + break; + case float_round_down: + increment = zSign && zSig1; + break; + default: + abort(); } if ( increment ) { ++zSig0; @@ -994,19 +1058,22 @@ static float128 roundingMode = STATUS(float_rounding_mode); roundNearestEven = ( roundingMode == float_round_nearest_even ); - increment = ( (int64_t) zSig2 < 0 ); - if ( ! roundNearestEven ) { - if ( roundingMode == float_round_to_zero ) { - increment = 0; - } - else { - if ( zSign ) { - increment = ( roundingMode == float_round_down ) && zSig2; - } - else { - increment = ( roundingMode == float_round_up ) && zSig2; - } - } + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + increment = ((int64_t)zSig2 < 0); + break; + case float_round_to_zero: + increment = 0; + break; + case float_round_up: + increment = !zSign && zSig2; + break; + case float_round_down: + increment = zSign && zSig2; + break; + default: + abort(); } if ( 0x7FFD <= (uint32_t) zExp ) { if ( ( 0x7FFD < zExp ) @@ -1054,16 +1121,22 @@ static float128 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 ); zExp = 0; if ( isTiny && zSig2 ) float_raise( float_flag_underflow STATUS_VAR); - if ( roundNearestEven ) { - increment = ( (int64_t) zSig2 < 0 ); - } - else { - if ( zSign ) { - increment = ( roundingMode == float_round_down ) && zSig2; - } - else { - increment = ( roundingMode == float_round_up ) && zSig2; - } + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + increment = ((int64_t)zSig2 < 0); + break; + case float_round_to_zero: + increment = 0; + break; + case float_round_up: + increment = !zSign && zSig2; + break; + case float_round_down: + increment = zSign && zSig2; + break; + default: + abort(); } } } @@ -1121,7 +1194,7 @@ static float128 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float32 int32_to_float32( int32 a STATUS_PARAM ) +float32 int32_to_float32(int32_t a STATUS_PARAM) { flag zSign; @@ -1138,7 +1211,7 @@ float32 int32_to_float32( int32 a STATUS_PARAM ) | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float64 int32_to_float64( int32 a STATUS_PARAM ) +float64 int32_to_float64(int32_t a STATUS_PARAM) { flag zSign; uint32 absA; @@ -1161,7 +1234,7 @@ float64 int32_to_float64( int32 a STATUS_PARAM ) | Arithmetic. *----------------------------------------------------------------------------*/ -floatx80 int32_to_floatx80( int32 a STATUS_PARAM ) +floatx80 int32_to_floatx80(int32_t a STATUS_PARAM) { flag zSign; uint32 absA; @@ -1183,7 +1256,7 @@ floatx80 int32_to_floatx80( int32 a STATUS_PARAM ) | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float128 int32_to_float128( int32 a STATUS_PARAM ) +float128 int32_to_float128(int32_t a STATUS_PARAM) { flag zSign; uint32 absA; @@ -1205,7 +1278,7 @@ float128 int32_to_float128( int32 a STATUS_PARAM ) | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float32 int64_to_float32( int64 a STATUS_PARAM ) +float32 int64_to_float32(int64_t a STATUS_PARAM) { flag zSign; uint64 absA; @@ -1231,7 +1304,7 @@ float32 int64_to_float32( int64 a STATUS_PARAM ) } -float32 uint64_to_float32( uint64 a STATUS_PARAM ) +float32 uint64_to_float32(uint64_t a STATUS_PARAM) { int8 shiftCount; @@ -1258,7 +1331,7 @@ float32 uint64_to_float32( uint64 a STATUS_PARAM ) | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float64 int64_to_float64( int64 a STATUS_PARAM ) +float64 int64_to_float64(int64_t a STATUS_PARAM) { flag zSign; @@ -1271,7 +1344,7 @@ float64 int64_to_float64( int64 a STATUS_PARAM ) } -float64 uint64_to_float64(uint64 a STATUS_PARAM) +float64 uint64_to_float64(uint64_t a STATUS_PARAM) { int exp = 0x43C; @@ -1292,7 +1365,7 @@ float64 uint64_to_float64(uint64 a STATUS_PARAM) | Arithmetic. *----------------------------------------------------------------------------*/ -floatx80 int64_to_floatx80( int64 a STATUS_PARAM ) +floatx80 int64_to_floatx80(int64_t a STATUS_PARAM) { flag zSign; uint64 absA; @@ -1312,7 +1385,7 @@ floatx80 int64_to_floatx80( int64 a STATUS_PARAM ) | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float128 int64_to_float128( int64 a STATUS_PARAM ) +float128 int64_to_float128(int64_t a STATUS_PARAM) { flag zSign; uint64 absA; @@ -1339,7 +1412,7 @@ float128 int64_to_float128( int64 a STATUS_PARAM ) } -float128 uint64_to_float128(uint64 a STATUS_PARAM) +float128 uint64_to_float128(uint64_t a STATUS_PARAM) { if (a == 0) { return float128_zero; @@ -1509,6 +1582,72 @@ int64 float32_to_int64( float32 a STATUS_PARAM ) /*---------------------------------------------------------------------------- | Returns the result of converting the single-precision floating-point value +| `a' to the 64-bit unsigned integer format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic---which means in particular that the conversion is rounded +| according to the current rounding mode. If `a' is a NaN, the largest +| unsigned integer is returned. Otherwise, if the conversion overflows, the +| largest unsigned integer is returned. If the 'a' is negative, the result +| is rounded and zero is returned; values that do not round to zero will +| raise the inexact exception flag. +*----------------------------------------------------------------------------*/ + +uint64 float32_to_uint64(float32 a STATUS_PARAM) +{ + flag aSign; + int_fast16_t aExp, shiftCount; + uint32_t aSig; + uint64_t aSig64, aSigExtra; + a = float32_squash_input_denormal(a STATUS_VAR); + + aSig = extractFloat32Frac(a); + aExp = extractFloat32Exp(a); + aSign = extractFloat32Sign(a); + if ((aSign) && (aExp > 126)) { + float_raise(float_flag_invalid STATUS_VAR); + if (float32_is_any_nan(a)) { + return LIT64(0xFFFFFFFFFFFFFFFF); + } else { + return 0; + } + } + shiftCount = 0xBE - aExp; + if (aExp) { + aSig |= 0x00800000; + } + if (shiftCount < 0) { + float_raise(float_flag_invalid STATUS_VAR); + return LIT64(0xFFFFFFFFFFFFFFFF); + } + + aSig64 = aSig; + aSig64 <<= 40; + shift64ExtraRightJamming(aSig64, 0, shiftCount, &aSig64, &aSigExtra); + return roundAndPackUint64(aSign, aSig64, aSigExtra STATUS_VAR); +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the single-precision floating-point value +| `a' to the 64-bit unsigned integer format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic, except that the conversion is always rounded toward zero. If +| `a' is a NaN, the largest unsigned integer is returned. Otherwise, if the +| conversion overflows, the largest unsigned integer is returned. If the +| 'a' is negative, the result is rounded and zero is returned; values that do +| not round to zero will raise the inexact flag. +*----------------------------------------------------------------------------*/ + +uint64 float32_to_uint64_round_to_zero(float32 a STATUS_PARAM) +{ + signed char current_rounding_mode = STATUS(float_rounding_mode); + set_float_rounding_mode(float_round_to_zero STATUS_VAR); + int64_t v = float32_to_uint64(a STATUS_VAR); + set_float_rounding_mode(current_rounding_mode STATUS_VAR); + return v; +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the single-precision floating-point value | `a' to the 64-bit two's complement integer format. The conversion is | performed according to the IEC/IEEE Standard for Binary Floating-Point | Arithmetic, except that the conversion is always rounded toward zero. If @@ -1656,7 +1795,6 @@ float32 float32_round_to_int( float32 a STATUS_PARAM) flag aSign; int_fast16_t aExp; uint32_t lastBitMask, roundBitsMask; - int8 roundingMode; uint32_t z; a = float32_squash_input_denormal(a STATUS_VAR); @@ -1677,6 +1815,11 @@ float32 float32_round_to_int( float32 a STATUS_PARAM) return packFloat32( aSign, 0x7F, 0 ); } break; + case float_round_ties_away: + if (aExp == 0x7E) { + return packFloat32(aSign, 0x7F, 0); + } + break; case float_round_down: return make_float32(aSign ? 0xBF800000 : 0); case float_round_up: @@ -1688,15 +1831,30 @@ float32 float32_round_to_int( float32 a STATUS_PARAM) lastBitMask <<= 0x96 - aExp; roundBitsMask = lastBitMask - 1; z = float32_val(a); - roundingMode = STATUS(float_rounding_mode); - if ( roundingMode == float_round_nearest_even ) { + switch (STATUS(float_rounding_mode)) { + case float_round_nearest_even: z += lastBitMask>>1; - if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; - } - else if ( roundingMode != float_round_to_zero ) { - if ( extractFloat32Sign( make_float32(z) ) ^ ( roundingMode == float_round_up ) ) { + if ((z & roundBitsMask) == 0) { + z &= ~lastBitMask; + } + break; + case float_round_ties_away: + z += lastBitMask >> 1; + break; + case float_round_to_zero: + break; + case float_round_up: + if (!extractFloat32Sign(make_float32(z))) { z += roundBitsMask; } + break; + case float_round_down: + if (extractFloat32Sign(make_float32(z))) { + z += roundBitsMask; + } + break; + default: + abort(); } z &= ~ roundBitsMask; if ( z != float32_val(a) ) STATUS(float_exception_flags) |= float_flag_inexact; @@ -2234,6 +2392,17 @@ float32 float32_muladd(float32 a, float32 b, float32 c, int flags STATUS_PARAM) } } /* Zero plus something non-zero : just return the something */ + if (flags & float_muladd_halve_result) { + if (cExp == 0) { + normalizeFloat32Subnormal(cSig, &cExp, &cSig); + } + /* Subtract one to halve, and one again because roundAndPackFloat32 + * wants one less than the true exponent. + */ + cExp -= 2; + cSig = (cSig | 0x00800000) << 7; + return roundAndPackFloat32(cSign ^ signflip, cExp, cSig STATUS_VAR); + } return packFloat32(cSign ^ signflip, cExp, cSig); } @@ -2270,6 +2439,9 @@ float32 float32_muladd(float32 a, float32 b, float32 c, int flags STATUS_PARAM) /* Throw out the special case of c being an exact zero now */ shift64RightJamming(pSig64, 32, &pSig64); pSig = pSig64; + if (flags & float_muladd_halve_result) { + pExp--; + } return roundAndPackFloat32(zSign, pExp - 1, pSig STATUS_VAR); } @@ -2334,6 +2506,10 @@ float32 float32_muladd(float32 a, float32 b, float32 c, int flags STATUS_PARAM) zSig64 <<= shiftcount; zExp -= shiftcount; } + if (flags & float_muladd_halve_result) { + zExp--; + } + shift64RightJamming(zSig64, 32, &zSig64); return roundAndPackFloat32(zSign, zExp, zSig64 STATUS_VAR); } @@ -3005,6 +3181,128 @@ static float16 packFloat16(flag zSign, int_fast16_t zExp, uint16_t zSig) (((uint32_t)zSign) << 15) + (((uint32_t)zExp) << 10) + zSig); } +/*---------------------------------------------------------------------------- +| Takes an abstract floating-point value having sign `zSign', exponent `zExp', +| and significand `zSig', and returns the proper half-precision floating- +| point value corresponding to the abstract input. Ordinarily, the abstract +| value is simply rounded and packed into the half-precision format, with +| the inexact exception raised if the abstract input cannot be represented +| exactly. However, if the abstract value is too large, the overflow and +| inexact exceptions are raised and an infinity or maximal finite value is +| returned. If the abstract value is too small, the input value is rounded to +| a subnormal number, and the underflow and inexact exceptions are raised if +| the abstract input cannot be represented exactly as a subnormal half- +| precision floating-point number. +| The `ieee' flag indicates whether to use IEEE standard half precision, or +| ARM-style "alternative representation", which omits the NaN and Inf +| encodings in order to raise the maximum representable exponent by one. +| The input significand `zSig' has its binary point between bits 22 +| and 23, which is 13 bits to the left of the usual location. This shifted +| significand must be normalized or smaller. If `zSig' is not normalized, +| `zExp' must be 0; in that case, the result returned is a subnormal number, +| and it must not require rounding. In the usual case that `zSig' is +| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. +| Note the slightly odd position of the binary point in zSig compared with the +| other roundAndPackFloat functions. This should probably be fixed if we +| need to implement more float16 routines than just conversion. +| The handling of underflow and overflow follows the IEC/IEEE Standard for +| Binary Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +static float32 roundAndPackFloat16(flag zSign, int_fast16_t zExp, + uint32_t zSig, flag ieee STATUS_PARAM) +{ + int maxexp = ieee ? 29 : 30; + uint32_t mask; + uint32_t increment; + bool rounding_bumps_exp; + bool is_tiny = false; + + /* Calculate the mask of bits of the mantissa which are not + * representable in half-precision and will be lost. + */ + if (zExp < 1) { + /* Will be denormal in halfprec */ + mask = 0x00ffffff; + if (zExp >= -11) { + mask >>= 11 + zExp; + } + } else { + /* Normal number in halfprec */ + mask = 0x00001fff; + } + + switch (STATUS(float_rounding_mode)) { + case float_round_nearest_even: + increment = (mask + 1) >> 1; + if ((zSig & mask) == increment) { + increment = zSig & (increment << 1); + } + break; + case float_round_ties_away: + increment = (mask + 1) >> 1; + break; + case float_round_up: + increment = zSign ? 0 : mask; + break; + case float_round_down: + increment = zSign ? mask : 0; + break; + default: /* round_to_zero */ + increment = 0; + break; + } + + rounding_bumps_exp = (zSig + increment >= 0x01000000); + + if (zExp > maxexp || (zExp == maxexp && rounding_bumps_exp)) { + if (ieee) { + float_raise(float_flag_overflow | float_flag_inexact STATUS_VAR); + return packFloat16(zSign, 0x1f, 0); + } else { + float_raise(float_flag_invalid STATUS_VAR); + return packFloat16(zSign, 0x1f, 0x3ff); + } + } + + if (zExp < 0) { + /* Note that flush-to-zero does not affect half-precision results */ + is_tiny = + (STATUS(float_detect_tininess) == float_tininess_before_rounding) + || (zExp < -1) + || (!rounding_bumps_exp); + } + if (zSig & mask) { + float_raise(float_flag_inexact STATUS_VAR); + if (is_tiny) { + float_raise(float_flag_underflow STATUS_VAR); + } + } + + zSig += increment; + if (rounding_bumps_exp) { + zSig >>= 1; + zExp++; + } + + if (zExp < -10) { + return packFloat16(zSign, 0, 0); + } + if (zExp < 0) { + zSig >>= -zExp; + zExp = 0; + } + return packFloat16(zSign, zExp, zSig >> 13); +} + +static void normalizeFloat16Subnormal(uint32_t aSig, int_fast16_t *zExpPtr, + uint32_t *zSigPtr) +{ + int8_t shiftCount = countLeadingZeros32(aSig) - 21; + *zSigPtr = aSig << shiftCount; + *zExpPtr = 1 - shiftCount; +} + /* Half precision floats come in two formats: standard IEEE and "ARM" format. The latter gains extra exponent range by omitting the NaN/Inf encodings. */ @@ -3025,15 +3323,12 @@ float32 float16_to_float32(float16 a, flag ieee STATUS_PARAM) return packFloat32(aSign, 0xff, 0); } if (aExp == 0) { - int8 shiftCount; - if (aSig == 0) { return packFloat32(aSign, 0, 0); } - shiftCount = countLeadingZeros32( aSig ) - 21; - aSig = aSig << shiftCount; - aExp = -shiftCount; + normalizeFloat16Subnormal(aSig, &aExp, &aSig); + aExp--; } return packFloat32( aSign, aExp + 0x70, aSig << 13); } @@ -3043,9 +3338,7 @@ float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM) flag aSign; int_fast16_t aExp; uint32_t aSig; - uint32_t mask; - uint32_t increment; - int8 roundingMode; + a = float32_squash_input_denormal(a STATUS_VAR); aSig = extractFloat32Frac( a ); @@ -3054,11 +3347,12 @@ float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM) if ( aExp == 0xFF ) { if (aSig) { /* Input is a NaN */ - float16 r = commonNaNToFloat16( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR ); if (!ieee) { + float_raise(float_flag_invalid STATUS_VAR); return packFloat16(aSign, 0, 0); } - return r; + return commonNaNToFloat16( + float32ToCommonNaN(a STATUS_VAR) STATUS_VAR); } /* Infinity */ if (!ieee) { @@ -3070,66 +3364,92 @@ float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM) if (aExp == 0 && aSig == 0) { return packFloat16(aSign, 0, 0); } - /* Decimal point between bits 22 and 23. */ + /* Decimal point between bits 22 and 23. Note that we add the 1 bit + * even if the input is denormal; however this is harmless because + * the largest possible single-precision denormal is still smaller + * than the smallest representable half-precision denormal, and so we + * will end up ignoring aSig and returning via the "always return zero" + * codepath. + */ aSig |= 0x00800000; - aExp -= 0x7f; - if (aExp < -14) { - mask = 0x00ffffff; - if (aExp >= -24) { - mask >>= 25 + aExp; + aExp -= 0x71; + + return roundAndPackFloat16(aSign, aExp, aSig, ieee STATUS_VAR); +} + +float64 float16_to_float64(float16 a, flag ieee STATUS_PARAM) +{ + flag aSign; + int_fast16_t aExp; + uint32_t aSig; + + aSign = extractFloat16Sign(a); + aExp = extractFloat16Exp(a); + aSig = extractFloat16Frac(a); + + if (aExp == 0x1f && ieee) { + if (aSig) { + return commonNaNToFloat64( + float16ToCommonNaN(a STATUS_VAR) STATUS_VAR); } - } else { - mask = 0x00001fff; + return packFloat64(aSign, 0x7ff, 0); } - if (aSig & mask) { - float_raise( float_flag_underflow STATUS_VAR ); - roundingMode = STATUS(float_rounding_mode); - switch (roundingMode) { - case float_round_nearest_even: - increment = (mask + 1) >> 1; - if ((aSig & mask) == increment) { - increment = aSig & (increment << 1); - } - break; - case float_round_up: - increment = aSign ? 0 : mask; - break; - case float_round_down: - increment = aSign ? mask : 0; - break; - default: /* round_to_zero */ - increment = 0; - break; - } - aSig += increment; - if (aSig >= 0x01000000) { - aSig >>= 1; - aExp++; + if (aExp == 0) { + if (aSig == 0) { + return packFloat64(aSign, 0, 0); } - } else if (aExp < -14 - && STATUS(float_detect_tininess) == float_tininess_before_rounding) { - float_raise( float_flag_underflow STATUS_VAR); + + normalizeFloat16Subnormal(aSig, &aExp, &aSig); + aExp--; } + return packFloat64(aSign, aExp + 0x3f0, ((uint64_t)aSig) << 42); +} - if (ieee) { - if (aExp > 15) { - float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR); - return packFloat16(aSign, 0x1f, 0); +float16 float64_to_float16(float64 a, flag ieee STATUS_PARAM) +{ + flag aSign; + int_fast16_t aExp; + uint64_t aSig; + uint32_t zSig; + + a = float64_squash_input_denormal(a STATUS_VAR); + + aSig = extractFloat64Frac(a); + aExp = extractFloat64Exp(a); + aSign = extractFloat64Sign(a); + if (aExp == 0x7FF) { + if (aSig) { + /* Input is a NaN */ + if (!ieee) { + float_raise(float_flag_invalid STATUS_VAR); + return packFloat16(aSign, 0, 0); + } + return commonNaNToFloat16( + float64ToCommonNaN(a STATUS_VAR) STATUS_VAR); } - } else { - if (aExp > 16) { - float_raise(float_flag_invalid | float_flag_inexact STATUS_VAR); + /* Infinity */ + if (!ieee) { + float_raise(float_flag_invalid STATUS_VAR); return packFloat16(aSign, 0x1f, 0x3ff); } + return packFloat16(aSign, 0x1f, 0); } - if (aExp < -24) { + shift64RightJamming(aSig, 29, &aSig); + zSig = aSig; + if (aExp == 0 && zSig == 0) { return packFloat16(aSign, 0, 0); } - if (aExp < -14) { - aSig >>= -14 - aExp; - aExp = -14; - } - return packFloat16(aSign, aExp + 14, aSig >> 13); + /* Decimal point between bits 22 and 23. Note that we add the 1 bit + * even if the input is denormal; however this is harmless because + * the largest possible single-precision denormal is still smaller + * than the smallest representable half-precision denormal, and so we + * will end up ignoring aSig and returning via the "always return zero" + * codepath. + */ + zSig |= 0x00800000; + aExp -= 0x3F1; + + return roundAndPackFloat16(aSign, aExp, zSig, ieee STATUS_VAR); } /*---------------------------------------------------------------------------- @@ -3206,7 +3526,6 @@ float64 float64_round_to_int( float64 a STATUS_PARAM ) flag aSign; int_fast16_t aExp; uint64_t lastBitMask, roundBitsMask; - int8 roundingMode; uint64_t z; a = float64_squash_input_denormal(a STATUS_VAR); @@ -3227,6 +3546,11 @@ float64 float64_round_to_int( float64 a STATUS_PARAM ) return packFloat64( aSign, 0x3FF, 0 ); } break; + case float_round_ties_away: + if (aExp == 0x3FE) { + return packFloat64(aSign, 0x3ff, 0); + } + break; case float_round_down: return make_float64(aSign ? LIT64( 0xBFF0000000000000 ) : 0); case float_round_up: @@ -3239,15 +3563,30 @@ float64 float64_round_to_int( float64 a STATUS_PARAM ) lastBitMask <<= 0x433 - aExp; roundBitsMask = lastBitMask - 1; z = float64_val(a); - roundingMode = STATUS(float_rounding_mode); - if ( roundingMode == float_round_nearest_even ) { - z += lastBitMask>>1; - if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; - } - else if ( roundingMode != float_round_to_zero ) { - if ( extractFloat64Sign( make_float64(z) ) ^ ( roundingMode == float_round_up ) ) { + switch (STATUS(float_rounding_mode)) { + case float_round_nearest_even: + z += lastBitMask >> 1; + if ((z & roundBitsMask) == 0) { + z &= ~lastBitMask; + } + break; + case float_round_ties_away: + z += lastBitMask >> 1; + break; + case float_round_to_zero: + break; + case float_round_up: + if (!extractFloat64Sign(make_float64(z))) { + z += roundBitsMask; + } + break; + case float_round_down: + if (extractFloat64Sign(make_float64(z))) { z += roundBitsMask; } + break; + default: + abort(); } z &= ~ roundBitsMask; if ( z != float64_val(a) ) @@ -3787,6 +4126,17 @@ float64 float64_muladd(float64 a, float64 b, float64 c, int flags STATUS_PARAM) } } /* Zero plus something non-zero : just return the something */ + if (flags & float_muladd_halve_result) { + if (cExp == 0) { + normalizeFloat64Subnormal(cSig, &cExp, &cSig); + } + /* Subtract one to halve, and one again because roundAndPackFloat64 + * wants one less than the true exponent. + */ + cExp -= 2; + cSig = (cSig | 0x0010000000000000ULL) << 10; + return roundAndPackFloat64(cSign ^ signflip, cExp, cSig STATUS_VAR); + } return packFloat64(cSign ^ signflip, cExp, cSig); } @@ -3822,6 +4172,9 @@ float64 float64_muladd(float64 a, float64 b, float64 c, int flags STATUS_PARAM) if (!cSig) { /* Throw out the special case of c being an exact zero now */ shift128RightJamming(pSig0, pSig1, 64, &pSig0, &pSig1); + if (flags & float_muladd_halve_result) { + pExp--; + } return roundAndPackFloat64(zSign, pExp - 1, pSig1 STATUS_VAR); } @@ -3858,6 +4211,9 @@ float64 float64_muladd(float64 a, float64 b, float64 c, int flags STATUS_PARAM) zExp--; } shift128RightJamming(zSig0, zSig1, 64, &zSig0, &zSig1); + if (flags & float_muladd_halve_result) { + zExp--; + } return roundAndPackFloat64(zSign, zExp, zSig1 STATUS_VAR); } else { /* Subtraction */ @@ -3908,6 +4264,9 @@ float64 float64_muladd(float64 a, float64 b, float64 c, int flags STATUS_PARAM) zExp -= (shiftcount + 64); } } + if (flags & float_muladd_halve_result) { + zExp--; + } return roundAndPackFloat64(zSign, zExp, zSig0 STATUS_VAR); } } @@ -4475,7 +4834,6 @@ floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM ) flag aSign; int32 aExp; uint64_t lastBitMask, roundBitsMask; - int8 roundingMode; floatx80 z; aExp = extractFloatx80Exp( a ); @@ -4500,6 +4858,11 @@ floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM ) packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); } break; + case float_round_ties_away: + if (aExp == 0x3FFE) { + return packFloatx80(aSign, 0x3FFF, LIT64(0x8000000000000000)); + } + break; case float_round_down: return aSign ? @@ -4516,15 +4879,30 @@ floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM ) lastBitMask <<= 0x403E - aExp; roundBitsMask = lastBitMask - 1; z = a; - roundingMode = STATUS(float_rounding_mode); - if ( roundingMode == float_round_nearest_even ) { + switch (STATUS(float_rounding_mode)) { + case float_round_nearest_even: z.low += lastBitMask>>1; - if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; - } - else if ( roundingMode != float_round_to_zero ) { - if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) { + if ((z.low & roundBitsMask) == 0) { + z.low &= ~lastBitMask; + } + break; + case float_round_ties_away: + z.low += lastBitMask >> 1; + break; + case float_round_to_zero: + break; + case float_round_up: + if (!extractFloatx80Sign(z)) { + z.low += roundBitsMask; + } + break; + case float_round_down: + if (extractFloatx80Sign(z)) { z.low += roundBitsMask; } + break; + default: + abort(); } z.low &= ~ roundBitsMask; if ( z.low == 0 ) { @@ -5550,7 +5928,6 @@ float128 float128_round_to_int( float128 a STATUS_PARAM ) flag aSign; int32 aExp; uint64_t lastBitMask, roundBitsMask; - int8 roundingMode; float128 z; aExp = extractFloat128Exp( a ); @@ -5567,8 +5944,8 @@ float128 float128_round_to_int( float128 a STATUS_PARAM ) lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1; roundBitsMask = lastBitMask - 1; z = a; - roundingMode = STATUS(float_rounding_mode); - if ( roundingMode == float_round_nearest_even ) { + switch (STATUS(float_rounding_mode)) { + case float_round_nearest_even: if ( lastBitMask ) { add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low ); if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; @@ -5579,12 +5956,30 @@ float128 float128_round_to_int( float128 a STATUS_PARAM ) if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1; } } - } - else if ( roundingMode != float_round_to_zero ) { - if ( extractFloat128Sign( z ) - ^ ( roundingMode == float_round_up ) ) { - add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low ); + break; + case float_round_ties_away: + if (lastBitMask) { + add128(z.high, z.low, 0, lastBitMask >> 1, &z.high, &z.low); + } else { + if ((int64_t) z.low < 0) { + ++z.high; + } + } + break; + case float_round_to_zero: + break; + case float_round_up: + if (!extractFloat128Sign(z)) { + add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low); + } + break; + case float_round_down: + if (extractFloat128Sign(z)) { + add128(z.high, z.low, 0, roundBitsMask, &z.high, &z.low); } + break; + default: + abort(); } z.low &= ~ roundBitsMask; } @@ -5602,6 +5997,11 @@ float128 float128_round_to_int( float128 a STATUS_PARAM ) return packFloat128( aSign, 0x3FFF, 0, 0 ); } break; + case float_round_ties_away: + if (aExp == 0x3FFE) { + return packFloat128(aSign, 0x3FFF, 0, 0); + } + break; case float_round_down: return aSign ? packFloat128( 1, 0x3FFF, 0, 0 ) @@ -5618,19 +6018,32 @@ float128 float128_round_to_int( float128 a STATUS_PARAM ) roundBitsMask = lastBitMask - 1; z.low = 0; z.high = a.high; - roundingMode = STATUS(float_rounding_mode); - if ( roundingMode == float_round_nearest_even ) { + switch (STATUS(float_rounding_mode)) { + case float_round_nearest_even: z.high += lastBitMask>>1; if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) { z.high &= ~ lastBitMask; } - } - else if ( roundingMode != float_round_to_zero ) { - if ( extractFloat128Sign( z ) - ^ ( roundingMode == float_round_up ) ) { + break; + case float_round_ties_away: + z.high += lastBitMask>>1; + break; + case float_round_to_zero: + break; + case float_round_up: + if (!extractFloat128Sign(z)) { z.high |= ( a.low != 0 ); z.high += roundBitsMask; } + break; + case float_round_down: + if (extractFloat128Sign(z)) { + z.high |= (a.low != 0); + z.high += roundBitsMask; + } + break; + default: + abort(); } z.high &= ~ roundBitsMask; } @@ -6418,12 +6831,12 @@ int float128_unordered_quiet( float128 a, float128 b STATUS_PARAM ) } /* misc functions */ -float32 uint32_to_float32( uint32 a STATUS_PARAM ) +float32 uint32_to_float32(uint32_t a STATUS_PARAM) { return int64_to_float32(a STATUS_VAR); } -float64 uint32_to_float64( uint32 a STATUS_PARAM ) +float64 uint32_to_float64(uint32_t a STATUS_PARAM) { return int64_to_float64(a STATUS_VAR); } @@ -6432,17 +6845,18 @@ uint32 float32_to_uint32( float32 a STATUS_PARAM ) { int64_t v; uint32 res; + int old_exc_flags = get_float_exception_flags(status); v = float32_to_int64(a STATUS_VAR); if (v < 0) { res = 0; - float_raise( float_flag_invalid STATUS_VAR); } else if (v > 0xffffffff) { res = 0xffffffff; - float_raise( float_flag_invalid STATUS_VAR); } else { - res = v; + return v; } + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid STATUS_VAR); return res; } @@ -6450,17 +6864,58 @@ uint32 float32_to_uint32_round_to_zero( float32 a STATUS_PARAM ) { int64_t v; uint32 res; + int old_exc_flags = get_float_exception_flags(status); v = float32_to_int64_round_to_zero(a STATUS_VAR); if (v < 0) { res = 0; - float_raise( float_flag_invalid STATUS_VAR); } else if (v > 0xffffffff) { res = 0xffffffff; - float_raise( float_flag_invalid STATUS_VAR); } else { - res = v; + return v; } + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid STATUS_VAR); + return res; +} + +int_fast16_t float32_to_int16(float32 a STATUS_PARAM) +{ + int32_t v; + int_fast16_t res; + int old_exc_flags = get_float_exception_flags(status); + + v = float32_to_int32(a STATUS_VAR); + if (v < -0x8000) { + res = -0x8000; + } else if (v > 0x7fff) { + res = 0x7fff; + } else { + return v; + } + + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid STATUS_VAR); + return res; +} + +uint_fast16_t float32_to_uint16(float32 a STATUS_PARAM) +{ + int32_t v; + uint_fast16_t res; + int old_exc_flags = get_float_exception_flags(status); + + v = float32_to_int32(a STATUS_VAR); + if (v < 0) { + res = 0; + } else if (v > 0xffff) { + res = 0xffff; + } else { + return v; + } + + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid STATUS_VAR); return res; } @@ -6468,53 +6923,92 @@ uint_fast16_t float32_to_uint16_round_to_zero(float32 a STATUS_PARAM) { int64_t v; uint_fast16_t res; + int old_exc_flags = get_float_exception_flags(status); v = float32_to_int64_round_to_zero(a STATUS_VAR); if (v < 0) { res = 0; - float_raise( float_flag_invalid STATUS_VAR); } else if (v > 0xffff) { res = 0xffff; - float_raise( float_flag_invalid STATUS_VAR); } else { - res = v; + return v; } + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid STATUS_VAR); return res; } uint32 float64_to_uint32( float64 a STATUS_PARAM ) { - int64_t v; + uint64_t v; uint32 res; + int old_exc_flags = get_float_exception_flags(status); - v = float64_to_int64(a STATUS_VAR); - if (v < 0) { - res = 0; - float_raise( float_flag_invalid STATUS_VAR); - } else if (v > 0xffffffff) { + v = float64_to_uint64(a STATUS_VAR); + if (v > 0xffffffff) { res = 0xffffffff; - float_raise( float_flag_invalid STATUS_VAR); } else { - res = v; + return v; } + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid STATUS_VAR); return res; } uint32 float64_to_uint32_round_to_zero( float64 a STATUS_PARAM ) { - int64_t v; + uint64_t v; uint32 res; + int old_exc_flags = get_float_exception_flags(status); - v = float64_to_int64_round_to_zero(a STATUS_VAR); + v = float64_to_uint64_round_to_zero(a STATUS_VAR); + if (v > 0xffffffff) { + res = 0xffffffff; + } else { + return v; + } + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid STATUS_VAR); + return res; +} + +int_fast16_t float64_to_int16(float64 a STATUS_PARAM) +{ + int64_t v; + int_fast16_t res; + int old_exc_flags = get_float_exception_flags(status); + + v = float64_to_int32(a STATUS_VAR); + if (v < -0x8000) { + res = -0x8000; + } else if (v > 0x7fff) { + res = 0x7fff; + } else { + return v; + } + + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid STATUS_VAR); + return res; +} + +uint_fast16_t float64_to_uint16(float64 a STATUS_PARAM) +{ + int64_t v; + uint_fast16_t res; + int old_exc_flags = get_float_exception_flags(status); + + v = float64_to_int32(a STATUS_VAR); if (v < 0) { res = 0; - float_raise( float_flag_invalid STATUS_VAR); - } else if (v > 0xffffffff) { - res = 0xffffffff; - float_raise( float_flag_invalid STATUS_VAR); + } else if (v > 0xffff) { + res = 0xffff; } else { - res = v; + return v; } + + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid STATUS_VAR); return res; } @@ -6522,41 +7016,75 @@ uint_fast16_t float64_to_uint16_round_to_zero(float64 a STATUS_PARAM) { int64_t v; uint_fast16_t res; + int old_exc_flags = get_float_exception_flags(status); v = float64_to_int64_round_to_zero(a STATUS_VAR); if (v < 0) { res = 0; - float_raise( float_flag_invalid STATUS_VAR); } else if (v > 0xffff) { res = 0xffff; - float_raise( float_flag_invalid STATUS_VAR); } else { - res = v; + return v; } + set_float_exception_flags(old_exc_flags, status); + float_raise(float_flag_invalid STATUS_VAR); return res; } -/* FIXME: This looks broken. */ -uint64_t float64_to_uint64 (float64 a STATUS_PARAM) -{ - int64_t v; +/*---------------------------------------------------------------------------- +| Returns the result of converting the double-precision floating-point value +| `a' to the 64-bit unsigned integer format. The conversion is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic---which means in particular that the conversion is rounded +| according to the current rounding mode. If `a' is a NaN, the largest +| positive integer is returned. If the conversion overflows, the +| largest unsigned integer is returned. If 'a' is negative, the value is +| rounded and zero is returned; negative values that do not round to zero +| will raise the inexact exception. +*----------------------------------------------------------------------------*/ - v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR)); - v += float64_val(a); - v = float64_to_int64(make_float64(v) STATUS_VAR); +uint64_t float64_to_uint64(float64 a STATUS_PARAM) +{ + flag aSign; + int_fast16_t aExp, shiftCount; + uint64_t aSig, aSigExtra; + a = float64_squash_input_denormal(a STATUS_VAR); - return v - INT64_MIN; + aSig = extractFloat64Frac(a); + aExp = extractFloat64Exp(a); + aSign = extractFloat64Sign(a); + if (aSign && (aExp > 1022)) { + float_raise(float_flag_invalid STATUS_VAR); + if (float64_is_any_nan(a)) { + return LIT64(0xFFFFFFFFFFFFFFFF); + } else { + return 0; + } + } + if (aExp) { + aSig |= LIT64(0x0010000000000000); + } + shiftCount = 0x433 - aExp; + if (shiftCount <= 0) { + if (0x43E < aExp) { + float_raise(float_flag_invalid STATUS_VAR); + return LIT64(0xFFFFFFFFFFFFFFFF); + } + aSigExtra = 0; + aSig <<= -shiftCount; + } else { + shift64ExtraRightJamming(aSig, 0, shiftCount, &aSig, &aSigExtra); + } + return roundAndPackUint64(aSign, aSig, aSigExtra STATUS_VAR); } uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM) { - int64_t v; - - v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR)); - v += float64_val(a); - v = float64_to_int64_round_to_zero(make_float64(v) STATUS_VAR); - - return v - INT64_MIN; + signed char current_rounding_mode = STATUS(float_rounding_mode); + set_float_rounding_mode(float_round_to_zero STATUS_VAR); + int64_t v = float64_to_uint64(a STATUS_VAR); + set_float_rounding_mode(current_rounding_mode STATUS_VAR); + return v; } #define COMPARE(s, nan_exp) \ @@ -6705,10 +7233,17 @@ int float128_compare_quiet( float128 a, float128 b STATUS_PARAM ) /* min() and max() functions. These can't be implemented as * 'compare and pick one input' because that would mishandle * NaNs and +0 vs -0. + * + * minnum() and maxnum() functions. These are similar to the min() + * and max() functions but if one of the arguments is a QNaN and + * the other is numerical then the numerical argument is returned. + * minnum() and maxnum correspond to the IEEE 754-2008 minNum() + * and maxNum() operations. min() and max() are the typical min/max + * semantics provided by many CPUs which predate that specification. */ -#define MINMAX(s, nan_exp) \ +#define MINMAX(s) \ INLINE float ## s float ## s ## _minmax(float ## s a, float ## s b, \ - int ismin STATUS_PARAM ) \ + int ismin, int isieee STATUS_PARAM) \ { \ flag aSign, bSign; \ uint ## s ## _t av, bv; \ @@ -6716,6 +7251,15 @@ INLINE float ## s float ## s ## _minmax(float ## s a, float ## s b, \ b = float ## s ## _squash_input_denormal(b STATUS_VAR); \ if (float ## s ## _is_any_nan(a) || \ float ## s ## _is_any_nan(b)) { \ + if (isieee) { \ + if (float ## s ## _is_quiet_nan(a) && \ + !float ## s ##_is_any_nan(b)) { \ + return b; \ + } else if (float ## s ## _is_quiet_nan(b) && \ + !float ## s ## _is_any_nan(a)) { \ + return a; \ + } \ + } \ return propagateFloat ## s ## NaN(a, b STATUS_VAR); \ } \ aSign = extractFloat ## s ## Sign(a); \ @@ -6739,16 +7283,26 @@ INLINE float ## s float ## s ## _minmax(float ## s a, float ## s b, \ \ float ## s float ## s ## _min(float ## s a, float ## s b STATUS_PARAM) \ { \ - return float ## s ## _minmax(a, b, 1 STATUS_VAR); \ + return float ## s ## _minmax(a, b, 1, 0 STATUS_VAR); \ } \ \ float ## s float ## s ## _max(float ## s a, float ## s b STATUS_PARAM) \ { \ - return float ## s ## _minmax(a, b, 0 STATUS_VAR); \ + return float ## s ## _minmax(a, b, 0, 0 STATUS_VAR); \ +} \ + \ +float ## s float ## s ## _minnum(float ## s a, float ## s b STATUS_PARAM) \ +{ \ + return float ## s ## _minmax(a, b, 1, 1 STATUS_VAR); \ +} \ + \ +float ## s float ## s ## _maxnum(float ## s a, float ## s b STATUS_PARAM) \ +{ \ + return float ## s ## _minmax(a, b, 0, 1 STATUS_VAR); \ } -MINMAX(32, 0xff) -MINMAX(64, 0x7ff) +MINMAX(32) +MINMAX(64) /* Multiply A by 2 raised to the power N. */ @@ -6769,10 +7323,13 @@ float32 float32_scalbn( float32 a, int n STATUS_PARAM ) } return a; } - if ( aExp != 0 ) + if (aExp != 0) { aSig |= 0x00800000; - else if ( aSig == 0 ) + } else if (aSig == 0) { return a; + } else { + aExp++; + } if (n > 0x200) { n = 0x200; @@ -6802,10 +7359,13 @@ float64 float64_scalbn( float64 a, int n STATUS_PARAM ) } return a; } - if ( aExp != 0 ) + if (aExp != 0) { aSig |= LIT64( 0x0010000000000000 ); - else if ( aSig == 0 ) + } else if (aSig == 0) { return a; + } else { + aExp++; + } if (n > 0x1000) { n = 0x1000; @@ -6835,8 +7395,12 @@ floatx80 floatx80_scalbn( floatx80 a, int n STATUS_PARAM ) return a; } - if (aExp == 0 && aSig == 0) - return a; + if (aExp == 0) { + if (aSig == 0) { + return a; + } + aExp++; + } if (n > 0x10000) { n = 0x10000; @@ -6865,10 +7429,13 @@ float128 float128_scalbn( float128 a, int n STATUS_PARAM ) } return a; } - if ( aExp != 0 ) + if (aExp != 0) { aSig0 |= LIT64( 0x0001000000000000 ); - else if ( aSig0 == 0 && aSig1 == 0 ) + } else if (aSig0 == 0 && aSig1 == 0) { return a; + } else { + aExp++; + } if (n > 0x10000) { n = 0x10000; |