summaryrefslogtreecommitdiff
path: root/fpu
diff options
context:
space:
mode:
authorAnas Nashif <anas.nashif@intel.com>2012-11-06 07:50:24 -0800
committerAnas Nashif <anas.nashif@intel.com>2012-11-06 07:50:24 -0800
commit060629c6ef0b7e5c267d84c91600113264d33120 (patch)
tree18fcb144ac71b9c4d08ee5d1dc58e2b16c109a5a /fpu
downloadqemu-060629c6ef0b7e5c267d84c91600113264d33120.tar.gz
qemu-060629c6ef0b7e5c267d84c91600113264d33120.tar.bz2
qemu-060629c6ef0b7e5c267d84c91600113264d33120.zip
Imported Upstream version 1.2.0upstream/1.2.0
Diffstat (limited to 'fpu')
-rw-r--r--fpu/softfloat-macros.h749
-rw-r--r--fpu/softfloat-specialize.h1066
-rw-r--r--fpu/softfloat.c6862
-rw-r--r--fpu/softfloat.h633
4 files changed, 9310 insertions, 0 deletions
diff --git a/fpu/softfloat-macros.h b/fpu/softfloat-macros.h
new file mode 100644
index 000000000..b5164af7f
--- /dev/null
+++ b/fpu/softfloat-macros.h
@@ -0,0 +1,749 @@
+/*
+ * QEMU float support macros
+ *
+ * Derived from SoftFloat.
+ */
+
+/*============================================================================
+
+This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b.
+
+Written by John R. Hauser. This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704. Funding was partially provided by the
+National Science Foundation under grant MIP-9311980. The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek. More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+/*----------------------------------------------------------------------------
+| This macro tests for minimum version of the GNU C compiler.
+*----------------------------------------------------------------------------*/
+#if defined(__GNUC__) && defined(__GNUC_MINOR__)
+# define SOFTFLOAT_GNUC_PREREQ(maj, min) \
+ ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min))
+#else
+# define SOFTFLOAT_GNUC_PREREQ(maj, min) 0
+#endif
+
+
+/*----------------------------------------------------------------------------
+| Shifts `a' right by the number of bits given in `count'. If any nonzero
+| bits are shifted off, they are ``jammed'' into the least significant bit of
+| the result by setting the least significant bit to 1. The value of `count'
+| can be arbitrarily large; in particular, if `count' is greater than 32, the
+| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
+| The result is stored in the location pointed to by `zPtr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void shift32RightJamming(uint32_t a, int_fast16_t count, uint32_t *zPtr)
+{
+ uint32_t z;
+
+ if ( count == 0 ) {
+ z = a;
+ }
+ else if ( count < 32 ) {
+ z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 );
+ }
+ else {
+ z = ( a != 0 );
+ }
+ *zPtr = z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts `a' right by the number of bits given in `count'. If any nonzero
+| bits are shifted off, they are ``jammed'' into the least significant bit of
+| the result by setting the least significant bit to 1. The value of `count'
+| can be arbitrarily large; in particular, if `count' is greater than 64, the
+| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
+| The result is stored in the location pointed to by `zPtr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void shift64RightJamming(uint64_t a, int_fast16_t count, uint64_t *zPtr)
+{
+ uint64_t z;
+
+ if ( count == 0 ) {
+ z = a;
+ }
+ else if ( count < 64 ) {
+ z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 );
+ }
+ else {
+ z = ( a != 0 );
+ }
+ *zPtr = z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
+| _plus_ the number of bits given in `count'. The shifted result is at most
+| 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
+| bits shifted off form a second 64-bit result as follows: The _last_ bit
+| shifted off is the most-significant bit of the extra result, and the other
+| 63 bits of the extra result are all zero if and only if _all_but_the_last_
+| bits shifted off were all zero. This extra result is stored in the location
+| pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
+| (This routine makes more sense if `a0' and `a1' are considered to form
+| a fixed-point value with binary point between `a0' and `a1'. This fixed-
+| point value is shifted right by the number of bits given in `count', and
+| the integer part of the result is returned at the location pointed to by
+| `z0Ptr'. The fractional part of the result may be slightly corrupted as
+| described above, and is returned at the location pointed to by `z1Ptr'.)
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ shift64ExtraRightJamming(
+ uint64_t a0, uint64_t a1, int_fast16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr)
+{
+ uint64_t z0, z1;
+ int8 negCount = ( - count ) & 63;
+
+ if ( count == 0 ) {
+ z1 = a1;
+ z0 = a0;
+ }
+ else if ( count < 64 ) {
+ z1 = ( a0<<negCount ) | ( a1 != 0 );
+ z0 = a0>>count;
+ }
+ else {
+ if ( count == 64 ) {
+ z1 = a0 | ( a1 != 0 );
+ }
+ else {
+ z1 = ( ( a0 | a1 ) != 0 );
+ }
+ z0 = 0;
+ }
+ *z1Ptr = z1;
+ *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
+| number of bits given in `count'. Any bits shifted off are lost. The value
+| of `count' can be arbitrarily large; in particular, if `count' is greater
+| than 128, the result will be 0. The result is broken into two 64-bit pieces
+| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ shift128Right(
+ uint64_t a0, uint64_t a1, int_fast16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr)
+{
+ uint64_t z0, z1;
+ int8 negCount = ( - count ) & 63;
+
+ if ( count == 0 ) {
+ z1 = a1;
+ z0 = a0;
+ }
+ else if ( count < 64 ) {
+ z1 = ( a0<<negCount ) | ( a1>>count );
+ z0 = a0>>count;
+ }
+ else {
+ z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0;
+ z0 = 0;
+ }
+ *z1Ptr = z1;
+ *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
+| number of bits given in `count'. If any nonzero bits are shifted off, they
+| are ``jammed'' into the least significant bit of the result by setting the
+| least significant bit to 1. The value of `count' can be arbitrarily large;
+| in particular, if `count' is greater than 128, the result will be either
+| 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
+| nonzero. The result is broken into two 64-bit pieces which are stored at
+| the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ shift128RightJamming(
+ uint64_t a0, uint64_t a1, int_fast16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr)
+{
+ uint64_t z0, z1;
+ int8 negCount = ( - count ) & 63;
+
+ if ( count == 0 ) {
+ z1 = a1;
+ z0 = a0;
+ }
+ else if ( count < 64 ) {
+ z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 );
+ z0 = a0>>count;
+ }
+ else {
+ if ( count == 64 ) {
+ z1 = a0 | ( a1 != 0 );
+ }
+ else if ( count < 128 ) {
+ z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 );
+ }
+ else {
+ z1 = ( ( a0 | a1 ) != 0 );
+ }
+ z0 = 0;
+ }
+ *z1Ptr = z1;
+ *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
+| by 64 _plus_ the number of bits given in `count'. The shifted result is
+| at most 128 nonzero bits; these are broken into two 64-bit pieces which are
+| stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
+| off form a third 64-bit result as follows: The _last_ bit shifted off is
+| the most-significant bit of the extra result, and the other 63 bits of the
+| extra result are all zero if and only if _all_but_the_last_ bits shifted off
+| were all zero. This extra result is stored in the location pointed to by
+| `z2Ptr'. The value of `count' can be arbitrarily large.
+| (This routine makes more sense if `a0', `a1', and `a2' are considered
+| to form a fixed-point value with binary point between `a1' and `a2'. This
+| fixed-point value is shifted right by the number of bits given in `count',
+| and the integer part of the result is returned at the locations pointed to
+| by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
+| corrupted as described above, and is returned at the location pointed to by
+| `z2Ptr'.)
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ shift128ExtraRightJamming(
+ uint64_t a0,
+ uint64_t a1,
+ uint64_t a2,
+ int_fast16_t count,
+ uint64_t *z0Ptr,
+ uint64_t *z1Ptr,
+ uint64_t *z2Ptr
+ )
+{
+ uint64_t z0, z1, z2;
+ int8 negCount = ( - count ) & 63;
+
+ if ( count == 0 ) {
+ z2 = a2;
+ z1 = a1;
+ z0 = a0;
+ }
+ else {
+ if ( count < 64 ) {
+ z2 = a1<<negCount;
+ z1 = ( a0<<negCount ) | ( a1>>count );
+ z0 = a0>>count;
+ }
+ else {
+ if ( count == 64 ) {
+ z2 = a1;
+ z1 = a0;
+ }
+ else {
+ a2 |= a1;
+ if ( count < 128 ) {
+ z2 = a0<<negCount;
+ z1 = a0>>( count & 63 );
+ }
+ else {
+ z2 = ( count == 128 ) ? a0 : ( a0 != 0 );
+ z1 = 0;
+ }
+ }
+ z0 = 0;
+ }
+ z2 |= ( a2 != 0 );
+ }
+ *z2Ptr = z2;
+ *z1Ptr = z1;
+ *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
+| number of bits given in `count'. Any bits shifted off are lost. The value
+| of `count' must be less than 64. The result is broken into two 64-bit
+| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ shortShift128Left(
+ uint64_t a0, uint64_t a1, int_fast16_t count, uint64_t *z0Ptr, uint64_t *z1Ptr)
+{
+
+ *z1Ptr = a1<<count;
+ *z0Ptr =
+ ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
+| by the number of bits given in `count'. Any bits shifted off are lost.
+| The value of `count' must be less than 64. The result is broken into three
+| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
+| `z1Ptr', and `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ shortShift192Left(
+ uint64_t a0,
+ uint64_t a1,
+ uint64_t a2,
+ int_fast16_t count,
+ uint64_t *z0Ptr,
+ uint64_t *z1Ptr,
+ uint64_t *z2Ptr
+ )
+{
+ uint64_t z0, z1, z2;
+ int8 negCount;
+
+ z2 = a2<<count;
+ z1 = a1<<count;
+ z0 = a0<<count;
+ if ( 0 < count ) {
+ negCount = ( ( - count ) & 63 );
+ z1 |= a2>>negCount;
+ z0 |= a1>>negCount;
+ }
+ *z2Ptr = z2;
+ *z1Ptr = z1;
+ *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
+| value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
+| any carry out is lost. The result is broken into two 64-bit pieces which
+| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ add128(
+ uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
+{
+ uint64_t z1;
+
+ z1 = a1 + b1;
+ *z1Ptr = z1;
+ *z0Ptr = a0 + b0 + ( z1 < a1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
+| 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
+| modulo 2^192, so any carry out is lost. The result is broken into three
+| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
+| `z1Ptr', and `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ add192(
+ uint64_t a0,
+ uint64_t a1,
+ uint64_t a2,
+ uint64_t b0,
+ uint64_t b1,
+ uint64_t b2,
+ uint64_t *z0Ptr,
+ uint64_t *z1Ptr,
+ uint64_t *z2Ptr
+ )
+{
+ uint64_t z0, z1, z2;
+ int8 carry0, carry1;
+
+ z2 = a2 + b2;
+ carry1 = ( z2 < a2 );
+ z1 = a1 + b1;
+ carry0 = ( z1 < a1 );
+ z0 = a0 + b0;
+ z1 += carry1;
+ z0 += ( z1 < carry1 );
+ z0 += carry0;
+ *z2Ptr = z2;
+ *z1Ptr = z1;
+ *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
+| 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
+| 2^128, so any borrow out (carry out) is lost. The result is broken into two
+| 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
+| `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ sub128(
+ uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr )
+{
+
+ *z1Ptr = a1 - b1;
+ *z0Ptr = a0 - b0 - ( a1 < b1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
+| from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
+| Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The
+| result is broken into three 64-bit pieces which are stored at the locations
+| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ sub192(
+ uint64_t a0,
+ uint64_t a1,
+ uint64_t a2,
+ uint64_t b0,
+ uint64_t b1,
+ uint64_t b2,
+ uint64_t *z0Ptr,
+ uint64_t *z1Ptr,
+ uint64_t *z2Ptr
+ )
+{
+ uint64_t z0, z1, z2;
+ int8 borrow0, borrow1;
+
+ z2 = a2 - b2;
+ borrow1 = ( a2 < b2 );
+ z1 = a1 - b1;
+ borrow0 = ( a1 < b1 );
+ z0 = a0 - b0;
+ z0 -= ( z1 < borrow1 );
+ z1 -= borrow1;
+ z0 -= borrow0;
+ *z2Ptr = z2;
+ *z1Ptr = z1;
+ *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
+| into two 64-bit pieces which are stored at the locations pointed to by
+| `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void mul64To128( uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr )
+{
+ uint32_t aHigh, aLow, bHigh, bLow;
+ uint64_t z0, zMiddleA, zMiddleB, z1;
+
+ aLow = a;
+ aHigh = a>>32;
+ bLow = b;
+ bHigh = b>>32;
+ z1 = ( (uint64_t) aLow ) * bLow;
+ zMiddleA = ( (uint64_t) aLow ) * bHigh;
+ zMiddleB = ( (uint64_t) aHigh ) * bLow;
+ z0 = ( (uint64_t) aHigh ) * bHigh;
+ zMiddleA += zMiddleB;
+ z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 );
+ zMiddleA <<= 32;
+ z1 += zMiddleA;
+ z0 += ( z1 < zMiddleA );
+ *z1Ptr = z1;
+ *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
+| `b' to obtain a 192-bit product. The product is broken into three 64-bit
+| pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
+| `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ mul128By64To192(
+ uint64_t a0,
+ uint64_t a1,
+ uint64_t b,
+ uint64_t *z0Ptr,
+ uint64_t *z1Ptr,
+ uint64_t *z2Ptr
+ )
+{
+ uint64_t z0, z1, z2, more1;
+
+ mul64To128( a1, b, &z1, &z2 );
+ mul64To128( a0, b, &z0, &more1 );
+ add128( z0, more1, 0, z1, &z0, &z1 );
+ *z2Ptr = z2;
+ *z1Ptr = z1;
+ *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
+| 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
+| product. The product is broken into four 64-bit pieces which are stored at
+| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
+*----------------------------------------------------------------------------*/
+
+INLINE void
+ mul128To256(
+ uint64_t a0,
+ uint64_t a1,
+ uint64_t b0,
+ uint64_t b1,
+ uint64_t *z0Ptr,
+ uint64_t *z1Ptr,
+ uint64_t *z2Ptr,
+ uint64_t *z3Ptr
+ )
+{
+ uint64_t z0, z1, z2, z3;
+ uint64_t more1, more2;
+
+ mul64To128( a1, b1, &z2, &z3 );
+ mul64To128( a1, b0, &z1, &more2 );
+ add128( z1, more2, 0, z2, &z1, &z2 );
+ mul64To128( a0, b0, &z0, &more1 );
+ add128( z0, more1, 0, z1, &z0, &z1 );
+ mul64To128( a0, b1, &more1, &more2 );
+ add128( more1, more2, 0, z2, &more1, &z2 );
+ add128( z0, z1, 0, more1, &z0, &z1 );
+ *z3Ptr = z3;
+ *z2Ptr = z2;
+ *z1Ptr = z1;
+ *z0Ptr = z0;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns an approximation to the 64-bit integer quotient obtained by dividing
+| `b' into the 128-bit value formed by concatenating `a0' and `a1'. The
+| divisor `b' must be at least 2^63. If q is the exact quotient truncated
+| toward zero, the approximation returned lies between q and q + 2 inclusive.
+| If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
+| unsigned integer is returned.
+*----------------------------------------------------------------------------*/
+
+static uint64_t estimateDiv128To64( uint64_t a0, uint64_t a1, uint64_t b )
+{
+ uint64_t b0, b1;
+ uint64_t rem0, rem1, term0, term1;
+ uint64_t z;
+
+ if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF );
+ b0 = b>>32;
+ z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32;
+ mul64To128( b, z, &term0, &term1 );
+ sub128( a0, a1, term0, term1, &rem0, &rem1 );
+ while ( ( (int64_t) rem0 ) < 0 ) {
+ z -= LIT64( 0x100000000 );
+ b1 = b<<32;
+ add128( rem0, rem1, b0, b1, &rem0, &rem1 );
+ }
+ rem0 = ( rem0<<32 ) | ( rem1>>32 );
+ z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0;
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns an approximation to the square root of the 32-bit significand given
+| by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
+| `aExp' (the least significant bit) is 1, the integer returned approximates
+| 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'
+| is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either
+| case, the approximation returned lies strictly within +/-2 of the exact
+| value.
+*----------------------------------------------------------------------------*/
+
+static uint32_t estimateSqrt32(int_fast16_t aExp, uint32_t a)
+{
+ static const uint16_t sqrtOddAdjustments[] = {
+ 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
+ 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
+ };
+ static const uint16_t sqrtEvenAdjustments[] = {
+ 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
+ 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
+ };
+ int8 index;
+ uint32_t z;
+
+ index = ( a>>27 ) & 15;
+ if ( aExp & 1 ) {
+ z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ (int)index ];
+ z = ( ( a / z )<<14 ) + ( z<<15 );
+ a >>= 1;
+ }
+ else {
+ z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ];
+ z = a / z + z;
+ z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 );
+ if ( z <= a ) return (uint32_t) ( ( (int32_t) a )>>1 );
+ }
+ return ( (uint32_t) ( ( ( (uint64_t) a )<<31 ) / z ) ) + ( z>>1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the number of leading 0 bits before the most-significant 1 bit of
+| `a'. If `a' is zero, 32 is returned.
+*----------------------------------------------------------------------------*/
+
+static int8 countLeadingZeros32( uint32_t a )
+{
+#if SOFTFLOAT_GNUC_PREREQ(3, 4)
+ if (a) {
+ return __builtin_clz(a);
+ } else {
+ return 32;
+ }
+#else
+ static const int8 countLeadingZerosHigh[] = {
+ 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ };
+ int8 shiftCount;
+
+ shiftCount = 0;
+ if ( a < 0x10000 ) {
+ shiftCount += 16;
+ a <<= 16;
+ }
+ if ( a < 0x1000000 ) {
+ shiftCount += 8;
+ a <<= 8;
+ }
+ shiftCount += countLeadingZerosHigh[ a>>24 ];
+ return shiftCount;
+#endif
+}
+
+/*----------------------------------------------------------------------------
+| Returns the number of leading 0 bits before the most-significant 1 bit of
+| `a'. If `a' is zero, 64 is returned.
+*----------------------------------------------------------------------------*/
+
+static int8 countLeadingZeros64( uint64_t a )
+{
+#if SOFTFLOAT_GNUC_PREREQ(3, 4)
+ if (a) {
+ return __builtin_clzll(a);
+ } else {
+ return 64;
+ }
+#else
+ int8 shiftCount;
+
+ shiftCount = 0;
+ if ( a < ( (uint64_t) 1 )<<32 ) {
+ shiftCount += 32;
+ }
+ else {
+ a >>= 32;
+ }
+ shiftCount += countLeadingZeros32( a );
+ return shiftCount;
+#endif
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
+| is equal to the 128-bit value formed by concatenating `b0' and `b1'.
+| Otherwise, returns 0.
+*----------------------------------------------------------------------------*/
+
+INLINE flag eq128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
+{
+
+ return ( a0 == b0 ) && ( a1 == b1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
+| than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
+| Otherwise, returns 0.
+*----------------------------------------------------------------------------*/
+
+INLINE flag le128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
+{
+
+ return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
+| than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
+| returns 0.
+*----------------------------------------------------------------------------*/
+
+INLINE flag lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
+{
+
+ return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
+| not equal to the 128-bit value formed by concatenating `b0' and `b1'.
+| Otherwise, returns 0.
+*----------------------------------------------------------------------------*/
+
+INLINE flag ne128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
+{
+
+ return ( a0 != b0 ) || ( a1 != b1 );
+
+}
diff --git a/fpu/softfloat-specialize.h b/fpu/softfloat-specialize.h
new file mode 100644
index 000000000..490245004
--- /dev/null
+++ b/fpu/softfloat-specialize.h
@@ -0,0 +1,1066 @@
+/*
+ * QEMU float support
+ *
+ * Derived from SoftFloat.
+ */
+
+/*============================================================================
+
+This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b.
+
+Written by John R. Hauser. This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704. Funding was partially provided by the
+National Science Foundation under grant MIP-9311980. The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek. More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+#if defined(TARGET_MIPS) || defined(TARGET_SH4) || defined(TARGET_UNICORE32)
+#define SNAN_BIT_IS_ONE 1
+#else
+#define SNAN_BIT_IS_ONE 0
+#endif
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated half-precision NaN.
+*----------------------------------------------------------------------------*/
+#if defined(TARGET_ARM)
+const float16 float16_default_nan = const_float16(0x7E00);
+#elif SNAN_BIT_IS_ONE
+const float16 float16_default_nan = const_float16(0x7DFF);
+#else
+const float16 float16_default_nan = const_float16(0xFE00);
+#endif
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated single-precision NaN.
+*----------------------------------------------------------------------------*/
+#if defined(TARGET_SPARC)
+const float32 float32_default_nan = const_float32(0x7FFFFFFF);
+#elif defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA)
+const float32 float32_default_nan = const_float32(0x7FC00000);
+#elif SNAN_BIT_IS_ONE
+const float32 float32_default_nan = const_float32(0x7FBFFFFF);
+#else
+const float32 float32_default_nan = const_float32(0xFFC00000);
+#endif
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated double-precision NaN.
+*----------------------------------------------------------------------------*/
+#if defined(TARGET_SPARC)
+const float64 float64_default_nan = const_float64(LIT64( 0x7FFFFFFFFFFFFFFF ));
+#elif defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA)
+const float64 float64_default_nan = const_float64(LIT64( 0x7FF8000000000000 ));
+#elif SNAN_BIT_IS_ONE
+const float64 float64_default_nan = const_float64(LIT64( 0x7FF7FFFFFFFFFFFF ));
+#else
+const float64 float64_default_nan = const_float64(LIT64( 0xFFF8000000000000 ));
+#endif
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN.
+*----------------------------------------------------------------------------*/
+#if SNAN_BIT_IS_ONE
+#define floatx80_default_nan_high 0x7FFF
+#define floatx80_default_nan_low LIT64( 0xBFFFFFFFFFFFFFFF )
+#else
+#define floatx80_default_nan_high 0xFFFF
+#define floatx80_default_nan_low LIT64( 0xC000000000000000 )
+#endif
+
+const floatx80 floatx80_default_nan
+ = make_floatx80_init(floatx80_default_nan_high, floatx80_default_nan_low);
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated quadruple-precision NaN. The `high' and
+| `low' values hold the most- and least-significant bits, respectively.
+*----------------------------------------------------------------------------*/
+#if SNAN_BIT_IS_ONE
+#define float128_default_nan_high LIT64( 0x7FFF7FFFFFFFFFFF )
+#define float128_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF )
+#else
+#define float128_default_nan_high LIT64( 0xFFFF800000000000 )
+#define float128_default_nan_low LIT64( 0x0000000000000000 )
+#endif
+
+const float128 float128_default_nan
+ = make_float128_init(float128_default_nan_high, float128_default_nan_low);
+
+/*----------------------------------------------------------------------------
+| Raises the exceptions specified by `flags'. Floating-point traps can be
+| defined here if desired. It is currently not possible for such a trap
+| to substitute a result value. If traps are not implemented, this routine
+| should be simply `float_exception_flags |= flags;'.
+*----------------------------------------------------------------------------*/
+
+void float_raise( int8 flags STATUS_PARAM )
+{
+ STATUS(float_exception_flags) |= flags;
+}
+
+/*----------------------------------------------------------------------------
+| Internal canonical NaN format.
+*----------------------------------------------------------------------------*/
+typedef struct {
+ flag sign;
+ uint64_t high, low;
+} commonNaNT;
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the half-precision floating-point value `a' is a quiet
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+int float16_is_quiet_nan(float16 a_)
+{
+ uint16_t a = float16_val(a_);
+#if SNAN_BIT_IS_ONE
+ return (((a >> 9) & 0x3F) == 0x3E) && (a & 0x1FF);
+#else
+ return ((a & ~0x8000) >= 0x7c80);
+#endif
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the half-precision floating-point value `a' is a signaling
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+int float16_is_signaling_nan(float16 a_)
+{
+ uint16_t a = float16_val(a_);
+#if SNAN_BIT_IS_ONE
+ return ((a & ~0x8000) >= 0x7c80);
+#else
+ return (((a >> 9) & 0x3F) == 0x3E) && (a & 0x1FF);
+#endif
+}
+
+/*----------------------------------------------------------------------------
+| Returns a quiet NaN if the half-precision floating point value `a' is a
+| signaling NaN; otherwise returns `a'.
+*----------------------------------------------------------------------------*/
+float16 float16_maybe_silence_nan(float16 a_)
+{
+ if (float16_is_signaling_nan(a_)) {
+#if SNAN_BIT_IS_ONE
+# if defined(TARGET_MIPS) || defined(TARGET_SH4) || defined(TARGET_UNICORE32)
+ return float16_default_nan;
+# else
+# error Rules for silencing a signaling NaN are target-specific
+# endif
+#else
+ uint16_t a = float16_val(a_);
+ a |= (1 << 9);
+ return make_float16(a);
+#endif
+ }
+ return a_;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the half-precision floating-point NaN
+| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+
+static commonNaNT float16ToCommonNaN( float16 a STATUS_PARAM )
+{
+ commonNaNT z;
+
+ if ( float16_is_signaling_nan( a ) ) float_raise( float_flag_invalid STATUS_VAR );
+ z.sign = float16_val(a) >> 15;
+ z.low = 0;
+ z.high = ((uint64_t) float16_val(a))<<54;
+ return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the half-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+static float16 commonNaNToFloat16(commonNaNT a STATUS_PARAM)
+{
+ uint16_t mantissa = a.high>>54;
+
+ if (STATUS(default_nan_mode)) {
+ return float16_default_nan;
+ }
+
+ if (mantissa) {
+ return make_float16(((((uint16_t) a.sign) << 15)
+ | (0x1F << 10) | mantissa));
+ } else {
+ return float16_default_nan;
+ }
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is a quiet
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+int float32_is_quiet_nan( float32 a_ )
+{
+ uint32_t a = float32_val(a_);
+#if SNAN_BIT_IS_ONE
+ return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
+#else
+ return ( 0xFF800000 <= (uint32_t) ( a<<1 ) );
+#endif
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is a signaling
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+int float32_is_signaling_nan( float32 a_ )
+{
+ uint32_t a = float32_val(a_);
+#if SNAN_BIT_IS_ONE
+ return ( 0xFF800000 <= (uint32_t) ( a<<1 ) );
+#else
+ return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
+#endif
+}
+
+/*----------------------------------------------------------------------------
+| Returns a quiet NaN if the single-precision floating point value `a' is a
+| signaling NaN; otherwise returns `a'.
+*----------------------------------------------------------------------------*/
+
+float32 float32_maybe_silence_nan( float32 a_ )
+{
+ if (float32_is_signaling_nan(a_)) {
+#if SNAN_BIT_IS_ONE
+# if defined(TARGET_MIPS) || defined(TARGET_SH4) || defined(TARGET_UNICORE32)
+ return float32_default_nan;
+# else
+# error Rules for silencing a signaling NaN are target-specific
+# endif
+#else
+ uint32_t a = float32_val(a_);
+ a |= (1 << 22);
+ return make_float32(a);
+#endif
+ }
+ return a_;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point NaN
+| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+
+static commonNaNT float32ToCommonNaN( float32 a STATUS_PARAM )
+{
+ commonNaNT z;
+
+ if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid STATUS_VAR );
+ z.sign = float32_val(a)>>31;
+ z.low = 0;
+ z.high = ( (uint64_t) float32_val(a) )<<41;
+ return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the single-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+static float32 commonNaNToFloat32( commonNaNT a STATUS_PARAM)
+{
+ uint32_t mantissa = a.high>>41;
+
+ if ( STATUS(default_nan_mode) ) {
+ return float32_default_nan;
+ }
+
+ if ( mantissa )
+ return make_float32(
+ ( ( (uint32_t) a.sign )<<31 ) | 0x7F800000 | ( a.high>>41 ) );
+ else
+ return float32_default_nan;
+}
+
+/*----------------------------------------------------------------------------
+| Select which NaN to propagate for a two-input operation.
+| IEEE754 doesn't specify all the details of this, so the
+| algorithm is target-specific.
+| The routine is passed various bits of information about the
+| two NaNs and should return 0 to select NaN a and 1 for NaN b.
+| Note that signalling NaNs are always squashed to quiet NaNs
+| by the caller, by calling floatXX_maybe_silence_nan() before
+| returning them.
+|
+| aIsLargerSignificand is only valid if both a and b are NaNs
+| of some kind, and is true if a has the larger significand,
+| or if both a and b have the same significand but a is
+| positive but b is negative. It is only needed for the x87
+| tie-break rule.
+*----------------------------------------------------------------------------*/
+
+#if defined(TARGET_ARM)
+static int pickNaN(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN,
+ flag aIsLargerSignificand)
+{
+ /* ARM mandated NaN propagation rules: take the first of:
+ * 1. A if it is signaling
+ * 2. B if it is signaling
+ * 3. A (quiet)
+ * 4. B (quiet)
+ * A signaling NaN is always quietened before returning it.
+ */
+ if (aIsSNaN) {
+ return 0;
+ } else if (bIsSNaN) {
+ return 1;
+ } else if (aIsQNaN) {
+ return 0;
+ } else {
+ return 1;
+ }
+}
+#elif defined(TARGET_MIPS)
+static int pickNaN(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN,
+ flag aIsLargerSignificand)
+{
+ /* According to MIPS specifications, if one of the two operands is
+ * a sNaN, a new qNaN has to be generated. This is done in
+ * floatXX_maybe_silence_nan(). For qNaN inputs the specifications
+ * says: "When possible, this QNaN result is one of the operand QNaN
+ * values." In practice it seems that most implementations choose
+ * the first operand if both operands are qNaN. In short this gives
+ * the following rules:
+ * 1. A if it is signaling
+ * 2. B if it is signaling
+ * 3. A (quiet)
+ * 4. B (quiet)
+ * A signaling NaN is always silenced before returning it.
+ */
+ if (aIsSNaN) {
+ return 0;
+ } else if (bIsSNaN) {
+ return 1;
+ } else if (aIsQNaN) {
+ return 0;
+ } else {
+ return 1;
+ }
+}
+#elif defined(TARGET_PPC)
+static int pickNaN(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN,
+ flag aIsLargerSignificand)
+{
+ /* PowerPC propagation rules:
+ * 1. A if it sNaN or qNaN
+ * 2. B if it sNaN or qNaN
+ * A signaling NaN is always silenced before returning it.
+ */
+ if (aIsSNaN || aIsQNaN) {
+ return 0;
+ } else {
+ return 1;
+ }
+}
+#else
+static int pickNaN(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN,
+ flag aIsLargerSignificand)
+{
+ /* This implements x87 NaN propagation rules:
+ * SNaN + QNaN => return the QNaN
+ * two SNaNs => return the one with the larger significand, silenced
+ * two QNaNs => return the one with the larger significand
+ * SNaN and a non-NaN => return the SNaN, silenced
+ * QNaN and a non-NaN => return the QNaN
+ *
+ * If we get down to comparing significands and they are the same,
+ * return the NaN with the positive sign bit (if any).
+ */
+ if (aIsSNaN) {
+ if (bIsSNaN) {
+ return aIsLargerSignificand ? 0 : 1;
+ }
+ return bIsQNaN ? 1 : 0;
+ }
+ else if (aIsQNaN) {
+ if (bIsSNaN || !bIsQNaN)
+ return 0;
+ else {
+ return aIsLargerSignificand ? 0 : 1;
+ }
+ } else {
+ return 1;
+ }
+}
+#endif
+
+/*----------------------------------------------------------------------------
+| Select which NaN to propagate for a three-input operation.
+| For the moment we assume that no CPU needs the 'larger significand'
+| information.
+| Return values : 0 : a; 1 : b; 2 : c; 3 : default-NaN
+*----------------------------------------------------------------------------*/
+#if defined(TARGET_ARM)
+static int pickNaNMulAdd(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN,
+ flag cIsQNaN, flag cIsSNaN, flag infzero STATUS_PARAM)
+{
+ /* For ARM, the (inf,zero,qnan) case sets InvalidOp and returns
+ * the default NaN
+ */
+ if (infzero && cIsQNaN) {
+ float_raise(float_flag_invalid STATUS_VAR);
+ return 3;
+ }
+
+ /* This looks different from the ARM ARM pseudocode, because the ARM ARM
+ * puts the operands to a fused mac operation (a*b)+c in the order c,a,b.
+ */
+ if (cIsSNaN) {
+ return 2;
+ } else if (aIsSNaN) {
+ return 0;
+ } else if (bIsSNaN) {
+ return 1;
+ } else if (cIsQNaN) {
+ return 2;
+ } else if (aIsQNaN) {
+ return 0;
+ } else {
+ return 1;
+ }
+}
+#elif defined(TARGET_PPC)
+static int pickNaNMulAdd(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN,
+ flag cIsQNaN, flag cIsSNaN, flag infzero STATUS_PARAM)
+{
+ /* For PPC, the (inf,zero,qnan) case sets InvalidOp, but we prefer
+ * to return an input NaN if we have one (ie c) rather than generating
+ * a default NaN
+ */
+ if (infzero) {
+ float_raise(float_flag_invalid STATUS_VAR);
+ return 2;
+ }
+
+ /* If fRA is a NaN return it; otherwise if fRB is a NaN return it;
+ * otherwise return fRC. Note that muladd on PPC is (fRA * fRC) + frB
+ */
+ if (aIsSNaN || aIsQNaN) {
+ return 0;
+ } else if (cIsSNaN || cIsQNaN) {
+ return 2;
+ } else {
+ return 1;
+ }
+}
+#else
+/* A default implementation: prefer a to b to c.
+ * This is unlikely to actually match any real implementation.
+ */
+static int pickNaNMulAdd(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN,
+ flag cIsQNaN, flag cIsSNaN, flag infzero STATUS_PARAM)
+{
+ if (aIsSNaN || aIsQNaN) {
+ return 0;
+ } else if (bIsSNaN || bIsQNaN) {
+ return 1;
+ } else {
+ return 2;
+ }
+}
+#endif
+
+/*----------------------------------------------------------------------------
+| Takes two single-precision floating-point values `a' and `b', one of which
+| is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a
+| signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+static float32 propagateFloat32NaN( float32 a, float32 b STATUS_PARAM)
+{
+ flag aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN;
+ flag aIsLargerSignificand;
+ uint32_t av, bv;
+
+ aIsQuietNaN = float32_is_quiet_nan( a );
+ aIsSignalingNaN = float32_is_signaling_nan( a );
+ bIsQuietNaN = float32_is_quiet_nan( b );
+ bIsSignalingNaN = float32_is_signaling_nan( b );
+ av = float32_val(a);
+ bv = float32_val(b);
+
+ if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid STATUS_VAR);
+
+ if ( STATUS(default_nan_mode) )
+ return float32_default_nan;
+
+ if ((uint32_t)(av<<1) < (uint32_t)(bv<<1)) {
+ aIsLargerSignificand = 0;
+ } else if ((uint32_t)(bv<<1) < (uint32_t)(av<<1)) {
+ aIsLargerSignificand = 1;
+ } else {
+ aIsLargerSignificand = (av < bv) ? 1 : 0;
+ }
+
+ if (pickNaN(aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN,
+ aIsLargerSignificand)) {
+ return float32_maybe_silence_nan(b);
+ } else {
+ return float32_maybe_silence_nan(a);
+ }
+}
+
+/*----------------------------------------------------------------------------
+| Takes three single-precision floating-point values `a', `b' and `c', one of
+| which is a NaN, and returns the appropriate NaN result. If any of `a',
+| `b' or `c' is a signaling NaN, the invalid exception is raised.
+| The input infzero indicates whether a*b was 0*inf or inf*0 (in which case
+| obviously c is a NaN, and whether to propagate c or some other NaN is
+| implementation defined).
+*----------------------------------------------------------------------------*/
+
+static float32 propagateFloat32MulAddNaN(float32 a, float32 b,
+ float32 c, flag infzero STATUS_PARAM)
+{
+ flag aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN,
+ cIsQuietNaN, cIsSignalingNaN;
+ int which;
+
+ aIsQuietNaN = float32_is_quiet_nan(a);
+ aIsSignalingNaN = float32_is_signaling_nan(a);
+ bIsQuietNaN = float32_is_quiet_nan(b);
+ bIsSignalingNaN = float32_is_signaling_nan(b);
+ cIsQuietNaN = float32_is_quiet_nan(c);
+ cIsSignalingNaN = float32_is_signaling_nan(c);
+
+ if (aIsSignalingNaN | bIsSignalingNaN | cIsSignalingNaN) {
+ float_raise(float_flag_invalid STATUS_VAR);
+ }
+
+ which = pickNaNMulAdd(aIsQuietNaN, aIsSignalingNaN,
+ bIsQuietNaN, bIsSignalingNaN,
+ cIsQuietNaN, cIsSignalingNaN, infzero STATUS_VAR);
+
+ if (STATUS(default_nan_mode)) {
+ /* Note that this check is after pickNaNMulAdd so that function
+ * has an opportunity to set the Invalid flag.
+ */
+ return float32_default_nan;
+ }
+
+ switch (which) {
+ case 0:
+ return float32_maybe_silence_nan(a);
+ case 1:
+ return float32_maybe_silence_nan(b);
+ case 2:
+ return float32_maybe_silence_nan(c);
+ case 3:
+ default:
+ return float32_default_nan;
+ }
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is a quiet
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+int float64_is_quiet_nan( float64 a_ )
+{
+ uint64_t a = float64_val(a_);
+#if SNAN_BIT_IS_ONE
+ return
+ ( ( ( a>>51 ) & 0xFFF ) == 0xFFE )
+ && ( a & LIT64( 0x0007FFFFFFFFFFFF ) );
+#else
+ return ( LIT64( 0xFFF0000000000000 ) <= (uint64_t) ( a<<1 ) );
+#endif
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is a signaling
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+int float64_is_signaling_nan( float64 a_ )
+{
+ uint64_t a = float64_val(a_);
+#if SNAN_BIT_IS_ONE
+ return ( LIT64( 0xFFF0000000000000 ) <= (uint64_t) ( a<<1 ) );
+#else
+ return
+ ( ( ( a>>51 ) & 0xFFF ) == 0xFFE )
+ && ( a & LIT64( 0x0007FFFFFFFFFFFF ) );
+#endif
+}
+
+/*----------------------------------------------------------------------------
+| Returns a quiet NaN if the double-precision floating point value `a' is a
+| signaling NaN; otherwise returns `a'.
+*----------------------------------------------------------------------------*/
+
+float64 float64_maybe_silence_nan( float64 a_ )
+{
+ if (float64_is_signaling_nan(a_)) {
+#if SNAN_BIT_IS_ONE
+# if defined(TARGET_MIPS) || defined(TARGET_SH4) || defined(TARGET_UNICORE32)
+ return float64_default_nan;
+# else
+# error Rules for silencing a signaling NaN are target-specific
+# endif
+#else
+ uint64_t a = float64_val(a_);
+ a |= LIT64( 0x0008000000000000 );
+ return make_float64(a);
+#endif
+ }
+ return a_;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point NaN
+| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+
+static commonNaNT float64ToCommonNaN( float64 a STATUS_PARAM)
+{
+ commonNaNT z;
+
+ if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid STATUS_VAR);
+ z.sign = float64_val(a)>>63;
+ z.low = 0;
+ z.high = float64_val(a)<<12;
+ return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the double-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+static float64 commonNaNToFloat64( commonNaNT a STATUS_PARAM)
+{
+ uint64_t mantissa = a.high>>12;
+
+ if ( STATUS(default_nan_mode) ) {
+ return float64_default_nan;
+ }
+
+ if ( mantissa )
+ return make_float64(
+ ( ( (uint64_t) a.sign )<<63 )
+ | LIT64( 0x7FF0000000000000 )
+ | ( a.high>>12 ));
+ else
+ return float64_default_nan;
+}
+
+/*----------------------------------------------------------------------------
+| Takes two double-precision floating-point values `a' and `b', one of which
+| is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a
+| signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+static float64 propagateFloat64NaN( float64 a, float64 b STATUS_PARAM)
+{
+ flag aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN;
+ flag aIsLargerSignificand;
+ uint64_t av, bv;
+
+ aIsQuietNaN = float64_is_quiet_nan( a );
+ aIsSignalingNaN = float64_is_signaling_nan( a );
+ bIsQuietNaN = float64_is_quiet_nan( b );
+ bIsSignalingNaN = float64_is_signaling_nan( b );
+ av = float64_val(a);
+ bv = float64_val(b);
+
+ if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid STATUS_VAR);
+
+ if ( STATUS(default_nan_mode) )
+ return float64_default_nan;
+
+ if ((uint64_t)(av<<1) < (uint64_t)(bv<<1)) {
+ aIsLargerSignificand = 0;
+ } else if ((uint64_t)(bv<<1) < (uint64_t)(av<<1)) {
+ aIsLargerSignificand = 1;
+ } else {
+ aIsLargerSignificand = (av < bv) ? 1 : 0;
+ }
+
+ if (pickNaN(aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN,
+ aIsLargerSignificand)) {
+ return float64_maybe_silence_nan(b);
+ } else {
+ return float64_maybe_silence_nan(a);
+ }
+}
+
+/*----------------------------------------------------------------------------
+| Takes three double-precision floating-point values `a', `b' and `c', one of
+| which is a NaN, and returns the appropriate NaN result. If any of `a',
+| `b' or `c' is a signaling NaN, the invalid exception is raised.
+| The input infzero indicates whether a*b was 0*inf or inf*0 (in which case
+| obviously c is a NaN, and whether to propagate c or some other NaN is
+| implementation defined).
+*----------------------------------------------------------------------------*/
+
+static float64 propagateFloat64MulAddNaN(float64 a, float64 b,
+ float64 c, flag infzero STATUS_PARAM)
+{
+ flag aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN,
+ cIsQuietNaN, cIsSignalingNaN;
+ int which;
+
+ aIsQuietNaN = float64_is_quiet_nan(a);
+ aIsSignalingNaN = float64_is_signaling_nan(a);
+ bIsQuietNaN = float64_is_quiet_nan(b);
+ bIsSignalingNaN = float64_is_signaling_nan(b);
+ cIsQuietNaN = float64_is_quiet_nan(c);
+ cIsSignalingNaN = float64_is_signaling_nan(c);
+
+ if (aIsSignalingNaN | bIsSignalingNaN | cIsSignalingNaN) {
+ float_raise(float_flag_invalid STATUS_VAR);
+ }
+
+ which = pickNaNMulAdd(aIsQuietNaN, aIsSignalingNaN,
+ bIsQuietNaN, bIsSignalingNaN,
+ cIsQuietNaN, cIsSignalingNaN, infzero STATUS_VAR);
+
+ if (STATUS(default_nan_mode)) {
+ /* Note that this check is after pickNaNMulAdd so that function
+ * has an opportunity to set the Invalid flag.
+ */
+ return float64_default_nan;
+ }
+
+ switch (which) {
+ case 0:
+ return float64_maybe_silence_nan(a);
+ case 1:
+ return float64_maybe_silence_nan(b);
+ case 2:
+ return float64_maybe_silence_nan(c);
+ case 3:
+ default:
+ return float64_default_nan;
+ }
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is a
+| quiet NaN; otherwise returns 0. This slightly differs from the same
+| function for other types as floatx80 has an explicit bit.
+*----------------------------------------------------------------------------*/
+
+int floatx80_is_quiet_nan( floatx80 a )
+{
+#if SNAN_BIT_IS_ONE
+ uint64_t aLow;
+
+ aLow = a.low & ~ LIT64( 0x4000000000000000 );
+ return
+ ( ( a.high & 0x7FFF ) == 0x7FFF )
+ && (uint64_t) ( aLow<<1 )
+ && ( a.low == aLow );
+#else
+ return ( ( a.high & 0x7FFF ) == 0x7FFF )
+ && (LIT64( 0x8000000000000000 ) <= ((uint64_t) ( a.low<<1 )));
+#endif
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is a
+| signaling NaN; otherwise returns 0. This slightly differs from the same
+| function for other types as floatx80 has an explicit bit.
+*----------------------------------------------------------------------------*/
+
+int floatx80_is_signaling_nan( floatx80 a )
+{
+#if SNAN_BIT_IS_ONE
+ return ( ( a.high & 0x7FFF ) == 0x7FFF )
+ && (LIT64( 0x8000000000000000 ) <= ((uint64_t) ( a.low<<1 )));
+#else
+ uint64_t aLow;
+
+ aLow = a.low & ~ LIT64( 0x4000000000000000 );
+ return
+ ( ( a.high & 0x7FFF ) == 0x7FFF )
+ && (uint64_t) ( aLow<<1 )
+ && ( a.low == aLow );
+#endif
+}
+
+/*----------------------------------------------------------------------------
+| Returns a quiet NaN if the extended double-precision floating point value
+| `a' is a signaling NaN; otherwise returns `a'.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_maybe_silence_nan( floatx80 a )
+{
+ if (floatx80_is_signaling_nan(a)) {
+#if SNAN_BIT_IS_ONE
+# if defined(TARGET_MIPS) || defined(TARGET_SH4) || defined(TARGET_UNICORE32)
+ a.low = floatx80_default_nan_low;
+ a.high = floatx80_default_nan_high;
+# else
+# error Rules for silencing a signaling NaN are target-specific
+# endif
+#else
+ a.low |= LIT64( 0xC000000000000000 );
+ return a;
+#endif
+ }
+ return a;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point NaN `a' to the canonical NaN format. If `a' is a signaling NaN, the
+| invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+static commonNaNT floatx80ToCommonNaN( floatx80 a STATUS_PARAM)
+{
+ commonNaNT z;
+
+ if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_invalid STATUS_VAR);
+ if ( a.low >> 63 ) {
+ z.sign = a.high >> 15;
+ z.low = 0;
+ z.high = a.low << 1;
+ } else {
+ z.sign = floatx80_default_nan_high >> 15;
+ z.low = 0;
+ z.high = floatx80_default_nan_low << 1;
+ }
+ return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the extended
+| double-precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+static floatx80 commonNaNToFloatx80( commonNaNT a STATUS_PARAM)
+{
+ floatx80 z;
+
+ if ( STATUS(default_nan_mode) ) {
+ z.low = floatx80_default_nan_low;
+ z.high = floatx80_default_nan_high;
+ return z;
+ }
+
+ if (a.high >> 1) {
+ z.low = LIT64( 0x8000000000000000 ) | a.high >> 1;
+ z.high = ( ( (uint16_t) a.sign )<<15 ) | 0x7FFF;
+ } else {
+ z.low = floatx80_default_nan_low;
+ z.high = floatx80_default_nan_high;
+ }
+
+ return z;
+}
+
+/*----------------------------------------------------------------------------
+| Takes two extended double-precision floating-point values `a' and `b', one
+| of which is a NaN, and returns the appropriate NaN result. If either `a' or
+| `b' is a signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+static floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b STATUS_PARAM)
+{
+ flag aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN;
+ flag aIsLargerSignificand;
+
+ aIsQuietNaN = floatx80_is_quiet_nan( a );
+ aIsSignalingNaN = floatx80_is_signaling_nan( a );
+ bIsQuietNaN = floatx80_is_quiet_nan( b );
+ bIsSignalingNaN = floatx80_is_signaling_nan( b );
+
+ if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid STATUS_VAR);
+
+ if ( STATUS(default_nan_mode) ) {
+ a.low = floatx80_default_nan_low;
+ a.high = floatx80_default_nan_high;
+ return a;
+ }
+
+ if (a.low < b.low) {
+ aIsLargerSignificand = 0;
+ } else if (b.low < a.low) {
+ aIsLargerSignificand = 1;
+ } else {
+ aIsLargerSignificand = (a.high < b.high) ? 1 : 0;
+ }
+
+ if (pickNaN(aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN,
+ aIsLargerSignificand)) {
+ return floatx80_maybe_silence_nan(b);
+ } else {
+ return floatx80_maybe_silence_nan(a);
+ }
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is a quiet
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+int float128_is_quiet_nan( float128 a )
+{
+#if SNAN_BIT_IS_ONE
+ return
+ ( ( ( a.high>>47 ) & 0xFFFF ) == 0xFFFE )
+ && ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) );
+#else
+ return
+ ( LIT64( 0xFFFE000000000000 ) <= (uint64_t) ( a.high<<1 ) )
+ && ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) );
+#endif
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is a
+| signaling NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+int float128_is_signaling_nan( float128 a )
+{
+#if SNAN_BIT_IS_ONE
+ return
+ ( LIT64( 0xFFFE000000000000 ) <= (uint64_t) ( a.high<<1 ) )
+ && ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) );
+#else
+ return
+ ( ( ( a.high>>47 ) & 0xFFFF ) == 0xFFFE )
+ && ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) );
+#endif
+}
+
+/*----------------------------------------------------------------------------
+| Returns a quiet NaN if the quadruple-precision floating point value `a' is
+| a signaling NaN; otherwise returns `a'.
+*----------------------------------------------------------------------------*/
+
+float128 float128_maybe_silence_nan( float128 a )
+{
+ if (float128_is_signaling_nan(a)) {
+#if SNAN_BIT_IS_ONE
+# if defined(TARGET_MIPS) || defined(TARGET_SH4) || defined(TARGET_UNICORE32)
+ a.low = float128_default_nan_low;
+ a.high = float128_default_nan_high;
+# else
+# error Rules for silencing a signaling NaN are target-specific
+# endif
+#else
+ a.high |= LIT64( 0x0000800000000000 );
+ return a;
+#endif
+ }
+ return a;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point NaN
+| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+
+static commonNaNT float128ToCommonNaN( float128 a STATUS_PARAM)
+{
+ commonNaNT z;
+
+ if ( float128_is_signaling_nan( a ) ) float_raise( float_flag_invalid STATUS_VAR);
+ z.sign = a.high>>63;
+ shortShift128Left( a.high, a.low, 16, &z.high, &z.low );
+ return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the quadruple-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+static float128 commonNaNToFloat128( commonNaNT a STATUS_PARAM)
+{
+ float128 z;
+
+ if ( STATUS(default_nan_mode) ) {
+ z.low = float128_default_nan_low;
+ z.high = float128_default_nan_high;
+ return z;
+ }
+
+ shift128Right( a.high, a.low, 16, &z.high, &z.low );
+ z.high |= ( ( (uint64_t) a.sign )<<63 ) | LIT64( 0x7FFF000000000000 );
+ return z;
+}
+
+/*----------------------------------------------------------------------------
+| Takes two quadruple-precision floating-point values `a' and `b', one of
+| which is a NaN, and returns the appropriate NaN result. If either `a' or
+| `b' is a signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+static float128 propagateFloat128NaN( float128 a, float128 b STATUS_PARAM)
+{
+ flag aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN;
+ flag aIsLargerSignificand;
+
+ aIsQuietNaN = float128_is_quiet_nan( a );
+ aIsSignalingNaN = float128_is_signaling_nan( a );
+ bIsQuietNaN = float128_is_quiet_nan( b );
+ bIsSignalingNaN = float128_is_signaling_nan( b );
+
+ if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid STATUS_VAR);
+
+ if ( STATUS(default_nan_mode) ) {
+ a.low = float128_default_nan_low;
+ a.high = float128_default_nan_high;
+ return a;
+ }
+
+ if (lt128(a.high<<1, a.low, b.high<<1, b.low)) {
+ aIsLargerSignificand = 0;
+ } else if (lt128(b.high<<1, b.low, a.high<<1, a.low)) {
+ aIsLargerSignificand = 1;
+ } else {
+ aIsLargerSignificand = (a.high < b.high) ? 1 : 0;
+ }
+
+ if (pickNaN(aIsQuietNaN, aIsSignalingNaN, bIsQuietNaN, bIsSignalingNaN,
+ aIsLargerSignificand)) {
+ return float128_maybe_silence_nan(b);
+ } else {
+ return float128_maybe_silence_nan(a);
+ }
+}
+
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
new file mode 100644
index 000000000..b29256a8e
--- /dev/null
+++ b/fpu/softfloat.c
@@ -0,0 +1,6862 @@
+/*
+ * QEMU float support
+ *
+ * Derived from SoftFloat.
+ */
+
+/*============================================================================
+
+This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser. This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704. Funding was partially provided by the
+National Science Foundation under grant MIP-9311980. The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek. More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+/* softfloat (and in particular the code in softfloat-specialize.h) is
+ * target-dependent and needs the TARGET_* macros.
+ */
+#include "config.h"
+
+#include "softfloat.h"
+
+/*----------------------------------------------------------------------------
+| Primitive arithmetic functions, including multi-word arithmetic, and
+| division and square root approximations. (Can be specialized to target if
+| desired.)
+*----------------------------------------------------------------------------*/
+#include "softfloat-macros.h"
+
+/*----------------------------------------------------------------------------
+| Functions and definitions to determine: (1) whether tininess for underflow
+| is detected before or after rounding by default, (2) what (if anything)
+| happens when exceptions are raised, (3) how signaling NaNs are distinguished
+| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
+| are propagated from function inputs to output. These details are target-
+| specific.
+*----------------------------------------------------------------------------*/
+#include "softfloat-specialize.h"
+
+void set_float_rounding_mode(int val STATUS_PARAM)
+{
+ STATUS(float_rounding_mode) = val;
+}
+
+void set_float_exception_flags(int val STATUS_PARAM)
+{
+ STATUS(float_exception_flags) = val;
+}
+
+void set_floatx80_rounding_precision(int val STATUS_PARAM)
+{
+ STATUS(floatx80_rounding_precision) = val;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the half-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE uint32_t extractFloat16Frac(float16 a)
+{
+ return float16_val(a) & 0x3ff;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the half-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE int_fast16_t extractFloat16Exp(float16 a)
+{
+ return (float16_val(a) >> 10) & 0x1f;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the single-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE flag extractFloat16Sign(float16 a)
+{
+ return float16_val(a)>>15;
+}
+
+/*----------------------------------------------------------------------------
+| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
+| and 7, and returns the properly rounded 32-bit integer corresponding to the
+| input. If `zSign' is 1, the input is negated before being converted to an
+| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
+| is simply rounded to an integer, with the inexact exception raised if the
+| input cannot be represented exactly as an integer. However, if the fixed-
+| point input is too large, the invalid exception is raised and the largest
+| positive or negative integer is returned.
+*----------------------------------------------------------------------------*/
+
+static int32 roundAndPackInt32( flag zSign, uint64_t absZ STATUS_PARAM)
+{
+ int8 roundingMode;
+ flag roundNearestEven;
+ int8 roundIncrement, roundBits;
+ int32_t z;
+
+ roundingMode = STATUS(float_rounding_mode);
+ roundNearestEven = ( roundingMode == float_round_nearest_even );
+ roundIncrement = 0x40;
+ if ( ! roundNearestEven ) {
+ if ( roundingMode == float_round_to_zero ) {
+ roundIncrement = 0;
+ }
+ else {
+ roundIncrement = 0x7F;
+ if ( zSign ) {
+ if ( roundingMode == float_round_up ) roundIncrement = 0;
+ }
+ else {
+ if ( roundingMode == float_round_down ) roundIncrement = 0;
+ }
+ }
+ }
+ roundBits = absZ & 0x7F;
+ absZ = ( absZ + roundIncrement )>>7;
+ absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
+ z = absZ;
+ if ( zSign ) z = - z;
+ if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
+ }
+ if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
+| `absZ1', with binary point between bits 63 and 64 (between the input words),
+| and returns the properly rounded 64-bit integer corresponding to the input.
+| If `zSign' is 1, the input is negated before being converted to an integer.
+| Ordinarily, the fixed-point input is simply rounded to an integer, with
+| the inexact exception raised if the input cannot be represented exactly as
+| an integer. However, if the fixed-point input is too large, the invalid
+| exception is raised and the largest positive or negative integer is
+| returned.
+*----------------------------------------------------------------------------*/
+
+static int64 roundAndPackInt64( flag zSign, uint64_t absZ0, uint64_t absZ1 STATUS_PARAM)
+{
+ int8 roundingMode;
+ flag roundNearestEven, increment;
+ int64_t z;
+
+ roundingMode = STATUS(float_rounding_mode);
+ roundNearestEven = ( roundingMode == float_round_nearest_even );
+ increment = ( (int64_t) absZ1 < 0 );
+ if ( ! roundNearestEven ) {
+ if ( roundingMode == float_round_to_zero ) {
+ increment = 0;
+ }
+ else {
+ if ( zSign ) {
+ increment = ( roundingMode == float_round_down ) && absZ1;
+ }
+ else {
+ increment = ( roundingMode == float_round_up ) && absZ1;
+ }
+ }
+ }
+ if ( increment ) {
+ ++absZ0;
+ if ( absZ0 == 0 ) goto overflow;
+ absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
+ }
+ z = absZ0;
+ if ( zSign ) z = - z;
+ if ( z && ( ( z < 0 ) ^ zSign ) ) {
+ overflow:
+ float_raise( float_flag_invalid STATUS_VAR);
+ return
+ zSign ? (int64_t) LIT64( 0x8000000000000000 )
+ : LIT64( 0x7FFFFFFFFFFFFFFF );
+ }
+ if ( absZ1 ) STATUS(float_exception_flags) |= float_flag_inexact;
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the single-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE uint32_t extractFloat32Frac( float32 a )
+{
+
+ return float32_val(a) & 0x007FFFFF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the single-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE int_fast16_t extractFloat32Exp(float32 a)
+{
+
+ return ( float32_val(a)>>23 ) & 0xFF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the single-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE flag extractFloat32Sign( float32 a )
+{
+
+ return float32_val(a)>>31;
+
+}
+
+/*----------------------------------------------------------------------------
+| If `a' is denormal and we are in flush-to-zero mode then set the
+| input-denormal exception and return zero. Otherwise just return the value.
+*----------------------------------------------------------------------------*/
+static float32 float32_squash_input_denormal(float32 a STATUS_PARAM)
+{
+ if (STATUS(flush_inputs_to_zero)) {
+ if (extractFloat32Exp(a) == 0 && extractFloat32Frac(a) != 0) {
+ float_raise(float_flag_input_denormal STATUS_VAR);
+ return make_float32(float32_val(a) & 0x80000000);
+ }
+ }
+ return a;
+}
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal single-precision floating-point value represented
+| by the denormalized significand `aSig'. The normalized exponent and
+| significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+static void
+ normalizeFloat32Subnormal(uint32_t aSig, int_fast16_t *zExpPtr, uint32_t *zSigPtr)
+{
+ int8 shiftCount;
+
+ shiftCount = countLeadingZeros32( aSig ) - 8;
+ *zSigPtr = aSig<<shiftCount;
+ *zExpPtr = 1 - shiftCount;
+
+}
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
+| single-precision floating-point value, returning the result. After being
+| shifted into the proper positions, the three fields are simply added
+| together to form the result. This means that any integer portion of `zSig'
+| will be added into the exponent. Since a properly normalized significand
+| will have an integer portion equal to 1, the `zExp' input should be 1 less
+| than the desired result exponent whenever `zSig' is a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+
+INLINE float32 packFloat32(flag zSign, int_fast16_t zExp, uint32_t zSig)
+{
+
+ return make_float32(
+ ( ( (uint32_t) zSign )<<31 ) + ( ( (uint32_t) zExp )<<23 ) + zSig);
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper single-precision floating-
+| point value corresponding to the abstract input. Ordinarily, the abstract
+| value is simply rounded and packed into the single-precision format, with
+| the inexact exception raised if the abstract input cannot be represented
+| exactly. However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned. If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal single-
+| precision floating-point number.
+| The input significand `zSig' has its binary point between bits 30
+| and 29, which is 7 bits to the left of the usual location. This shifted
+| significand must be normalized or smaller. If `zSig' is not normalized,
+| `zExp' must be 0; in that case, the result returned is a subnormal number,
+| and it must not require rounding. In the usual case that `zSig' is
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+| The handling of underflow and overflow follows the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float32 roundAndPackFloat32(flag zSign, int_fast16_t zExp, uint32_t zSig STATUS_PARAM)
+{
+ int8 roundingMode;
+ flag roundNearestEven;
+ int8 roundIncrement, roundBits;
+ flag isTiny;
+
+ roundingMode = STATUS(float_rounding_mode);
+ roundNearestEven = ( roundingMode == float_round_nearest_even );
+ roundIncrement = 0x40;
+ if ( ! roundNearestEven ) {
+ if ( roundingMode == float_round_to_zero ) {
+ roundIncrement = 0;
+ }
+ else {
+ roundIncrement = 0x7F;
+ if ( zSign ) {
+ if ( roundingMode == float_round_up ) roundIncrement = 0;
+ }
+ else {
+ if ( roundingMode == float_round_down ) roundIncrement = 0;
+ }
+ }
+ }
+ roundBits = zSig & 0x7F;
+ if ( 0xFD <= (uint16_t) zExp ) {
+ if ( ( 0xFD < zExp )
+ || ( ( zExp == 0xFD )
+ && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
+ ) {
+ float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
+ return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
+ }
+ if ( zExp < 0 ) {
+ if (STATUS(flush_to_zero)) {
+ float_raise(float_flag_output_denormal STATUS_VAR);
+ return packFloat32(zSign, 0, 0);
+ }
+ isTiny =
+ ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
+ || ( zExp < -1 )
+ || ( zSig + roundIncrement < 0x80000000 );
+ shift32RightJamming( zSig, - zExp, &zSig );
+ zExp = 0;
+ roundBits = zSig & 0x7F;
+ if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
+ }
+ }
+ if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
+ zSig = ( zSig + roundIncrement )>>7;
+ zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
+ if ( zSig == 0 ) zExp = 0;
+ return packFloat32( zSign, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper single-precision floating-
+| point value corresponding to the abstract input. This routine is just like
+| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
+| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
+| floating-point exponent.
+*----------------------------------------------------------------------------*/
+
+static float32
+ normalizeRoundAndPackFloat32(flag zSign, int_fast16_t zExp, uint32_t zSig STATUS_PARAM)
+{
+ int8 shiftCount;
+
+ shiftCount = countLeadingZeros32( zSig ) - 1;
+ return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the double-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE uint64_t extractFloat64Frac( float64 a )
+{
+
+ return float64_val(a) & LIT64( 0x000FFFFFFFFFFFFF );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the double-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE int_fast16_t extractFloat64Exp(float64 a)
+{
+
+ return ( float64_val(a)>>52 ) & 0x7FF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the double-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE flag extractFloat64Sign( float64 a )
+{
+
+ return float64_val(a)>>63;
+
+}
+
+/*----------------------------------------------------------------------------
+| If `a' is denormal and we are in flush-to-zero mode then set the
+| input-denormal exception and return zero. Otherwise just return the value.
+*----------------------------------------------------------------------------*/
+static float64 float64_squash_input_denormal(float64 a STATUS_PARAM)
+{
+ if (STATUS(flush_inputs_to_zero)) {
+ if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
+ float_raise(float_flag_input_denormal STATUS_VAR);
+ return make_float64(float64_val(a) & (1ULL << 63));
+ }
+ }
+ return a;
+}
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal double-precision floating-point value represented
+| by the denormalized significand `aSig'. The normalized exponent and
+| significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+static void
+ normalizeFloat64Subnormal(uint64_t aSig, int_fast16_t *zExpPtr, uint64_t *zSigPtr)
+{
+ int8 shiftCount;
+
+ shiftCount = countLeadingZeros64( aSig ) - 11;
+ *zSigPtr = aSig<<shiftCount;
+ *zExpPtr = 1 - shiftCount;
+
+}
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
+| double-precision floating-point value, returning the result. After being
+| shifted into the proper positions, the three fields are simply added
+| together to form the result. This means that any integer portion of `zSig'
+| will be added into the exponent. Since a properly normalized significand
+| will have an integer portion equal to 1, the `zExp' input should be 1 less
+| than the desired result exponent whenever `zSig' is a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+
+INLINE float64 packFloat64(flag zSign, int_fast16_t zExp, uint64_t zSig)
+{
+
+ return make_float64(
+ ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper double-precision floating-
+| point value corresponding to the abstract input. Ordinarily, the abstract
+| value is simply rounded and packed into the double-precision format, with
+| the inexact exception raised if the abstract input cannot be represented
+| exactly. However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned. If the abstract value is too small, the input value is rounded
+| to a subnormal number, and the underflow and inexact exceptions are raised
+| if the abstract input cannot be represented exactly as a subnormal double-
+| precision floating-point number.
+| The input significand `zSig' has its binary point between bits 62
+| and 61, which is 10 bits to the left of the usual location. This shifted
+| significand must be normalized or smaller. If `zSig' is not normalized,
+| `zExp' must be 0; in that case, the result returned is a subnormal number,
+| and it must not require rounding. In the usual case that `zSig' is
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+| The handling of underflow and overflow follows the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float64 roundAndPackFloat64(flag zSign, int_fast16_t zExp, uint64_t zSig STATUS_PARAM)
+{
+ int8 roundingMode;
+ flag roundNearestEven;
+ int_fast16_t roundIncrement, roundBits;
+ flag isTiny;
+
+ roundingMode = STATUS(float_rounding_mode);
+ roundNearestEven = ( roundingMode == float_round_nearest_even );
+ roundIncrement = 0x200;
+ if ( ! roundNearestEven ) {
+ if ( roundingMode == float_round_to_zero ) {
+ roundIncrement = 0;
+ }
+ else {
+ roundIncrement = 0x3FF;
+ if ( zSign ) {
+ if ( roundingMode == float_round_up ) roundIncrement = 0;
+ }
+ else {
+ if ( roundingMode == float_round_down ) roundIncrement = 0;
+ }
+ }
+ }
+ roundBits = zSig & 0x3FF;
+ if ( 0x7FD <= (uint16_t) zExp ) {
+ if ( ( 0x7FD < zExp )
+ || ( ( zExp == 0x7FD )
+ && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
+ ) {
+ float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
+ return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 ));
+ }
+ if ( zExp < 0 ) {
+ if (STATUS(flush_to_zero)) {
+ float_raise(float_flag_output_denormal STATUS_VAR);
+ return packFloat64(zSign, 0, 0);
+ }
+ isTiny =
+ ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
+ || ( zExp < -1 )
+ || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
+ shift64RightJamming( zSig, - zExp, &zSig );
+ zExp = 0;
+ roundBits = zSig & 0x3FF;
+ if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
+ }
+ }
+ if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
+ zSig = ( zSig + roundIncrement )>>10;
+ zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
+ if ( zSig == 0 ) zExp = 0;
+ return packFloat64( zSign, zExp, zSig );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper double-precision floating-
+| point value corresponding to the abstract input. This routine is just like
+| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
+| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
+| floating-point exponent.
+*----------------------------------------------------------------------------*/
+
+static float64
+ normalizeRoundAndPackFloat64(flag zSign, int_fast16_t zExp, uint64_t zSig STATUS_PARAM)
+{
+ int8 shiftCount;
+
+ shiftCount = countLeadingZeros64( zSig ) - 1;
+ return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount STATUS_VAR);
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the extended double-precision floating-point
+| value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE uint64_t extractFloatx80Frac( floatx80 a )
+{
+
+ return a.low;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the extended double-precision floating-point
+| value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE int32 extractFloatx80Exp( floatx80 a )
+{
+
+ return a.high & 0x7FFF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the extended double-precision floating-point value
+| `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE flag extractFloatx80Sign( floatx80 a )
+{
+
+ return a.high>>15;
+
+}
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal extended double-precision floating-point value
+| represented by the denormalized significand `aSig'. The normalized exponent
+| and significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+static void
+ normalizeFloatx80Subnormal( uint64_t aSig, int32 *zExpPtr, uint64_t *zSigPtr )
+{
+ int8 shiftCount;
+
+ shiftCount = countLeadingZeros64( aSig );
+ *zSigPtr = aSig<<shiftCount;
+ *zExpPtr = 1 - shiftCount;
+
+}
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
+| extended double-precision floating-point value, returning the result.
+*----------------------------------------------------------------------------*/
+
+INLINE floatx80 packFloatx80( flag zSign, int32 zExp, uint64_t zSig )
+{
+ floatx80 z;
+
+ z.low = zSig;
+ z.high = ( ( (uint16_t) zSign )<<15 ) + zExp;
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and extended significand formed by the concatenation of `zSig0' and `zSig1',
+| and returns the proper extended double-precision floating-point value
+| corresponding to the abstract input. Ordinarily, the abstract value is
+| rounded and packed into the extended double-precision format, with the
+| inexact exception raised if the abstract input cannot be represented
+| exactly. However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned. If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal extended
+| double-precision floating-point number.
+| If `roundingPrecision' is 32 or 64, the result is rounded to the same
+| number of bits as single or double precision, respectively. Otherwise, the
+| result is rounded to the full precision of the extended double-precision
+| format.
+| The input significand must be normalized or smaller. If the input
+| significand is not normalized, `zExp' must be 0; in that case, the result
+| returned is a subnormal number, and it must not require rounding. The
+| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static floatx80
+ roundAndPackFloatx80(
+ int8 roundingPrecision, flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1
+ STATUS_PARAM)
+{
+ int8 roundingMode;
+ flag roundNearestEven, increment, isTiny;
+ int64 roundIncrement, roundMask, roundBits;
+
+ roundingMode = STATUS(float_rounding_mode);
+ roundNearestEven = ( roundingMode == float_round_nearest_even );
+ if ( roundingPrecision == 80 ) goto precision80;
+ if ( roundingPrecision == 64 ) {
+ roundIncrement = LIT64( 0x0000000000000400 );
+ roundMask = LIT64( 0x00000000000007FF );
+ }
+ else if ( roundingPrecision == 32 ) {
+ roundIncrement = LIT64( 0x0000008000000000 );
+ roundMask = LIT64( 0x000000FFFFFFFFFF );
+ }
+ else {
+ goto precision80;
+ }
+ zSig0 |= ( zSig1 != 0 );
+ if ( ! roundNearestEven ) {
+ if ( roundingMode == float_round_to_zero ) {
+ roundIncrement = 0;
+ }
+ else {
+ roundIncrement = roundMask;
+ if ( zSign ) {
+ if ( roundingMode == float_round_up ) roundIncrement = 0;
+ }
+ else {
+ if ( roundingMode == float_round_down ) roundIncrement = 0;
+ }
+ }
+ }
+ roundBits = zSig0 & roundMask;
+ if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
+ if ( ( 0x7FFE < zExp )
+ || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
+ ) {
+ goto overflow;
+ }
+ if ( zExp <= 0 ) {
+ if (STATUS(flush_to_zero)) {
+ float_raise(float_flag_output_denormal STATUS_VAR);
+ return packFloatx80(zSign, 0, 0);
+ }
+ isTiny =
+ ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
+ || ( zExp < 0 )
+ || ( zSig0 <= zSig0 + roundIncrement );
+ shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
+ zExp = 0;
+ roundBits = zSig0 & roundMask;
+ if ( isTiny && roundBits ) float_raise( float_flag_underflow STATUS_VAR);
+ if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
+ zSig0 += roundIncrement;
+ if ( (int64_t) zSig0 < 0 ) zExp = 1;
+ roundIncrement = roundMask + 1;
+ if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
+ roundMask |= roundIncrement;
+ }
+ zSig0 &= ~ roundMask;
+ return packFloatx80( zSign, zExp, zSig0 );
+ }
+ }
+ if ( roundBits ) STATUS(float_exception_flags) |= float_flag_inexact;
+ zSig0 += roundIncrement;
+ if ( zSig0 < roundIncrement ) {
+ ++zExp;
+ zSig0 = LIT64( 0x8000000000000000 );
+ }
+ roundIncrement = roundMask + 1;
+ if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
+ roundMask |= roundIncrement;
+ }
+ zSig0 &= ~ roundMask;
+ if ( zSig0 == 0 ) zExp = 0;
+ return packFloatx80( zSign, zExp, zSig0 );
+ precision80:
+ increment = ( (int64_t) zSig1 < 0 );
+ if ( ! roundNearestEven ) {
+ if ( roundingMode == float_round_to_zero ) {
+ increment = 0;
+ }
+ else {
+ if ( zSign ) {
+ increment = ( roundingMode == float_round_down ) && zSig1;
+ }
+ else {
+ increment = ( roundingMode == float_round_up ) && zSig1;
+ }
+ }
+ }
+ if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
+ if ( ( 0x7FFE < zExp )
+ || ( ( zExp == 0x7FFE )
+ && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
+ && increment
+ )
+ ) {
+ roundMask = 0;
+ overflow:
+ float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
+ if ( ( roundingMode == float_round_to_zero )
+ || ( zSign && ( roundingMode == float_round_up ) )
+ || ( ! zSign && ( roundingMode == float_round_down ) )
+ ) {
+ return packFloatx80( zSign, 0x7FFE, ~ roundMask );
+ }
+ return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+ }
+ if ( zExp <= 0 ) {
+ isTiny =
+ ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
+ || ( zExp < 0 )
+ || ! increment
+ || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
+ shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
+ zExp = 0;
+ if ( isTiny && zSig1 ) float_raise( float_flag_underflow STATUS_VAR);
+ if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
+ if ( roundNearestEven ) {
+ increment = ( (int64_t) zSig1 < 0 );
+ }
+ else {
+ if ( zSign ) {
+ increment = ( roundingMode == float_round_down ) && zSig1;
+ }
+ else {
+ increment = ( roundingMode == float_round_up ) && zSig1;
+ }
+ }
+ if ( increment ) {
+ ++zSig0;
+ zSig0 &=
+ ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
+ if ( (int64_t) zSig0 < 0 ) zExp = 1;
+ }
+ return packFloatx80( zSign, zExp, zSig0 );
+ }
+ }
+ if ( zSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
+ if ( increment ) {
+ ++zSig0;
+ if ( zSig0 == 0 ) {
+ ++zExp;
+ zSig0 = LIT64( 0x8000000000000000 );
+ }
+ else {
+ zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
+ }
+ }
+ else {
+ if ( zSig0 == 0 ) zExp = 0;
+ }
+ return packFloatx80( zSign, zExp, zSig0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent
+| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
+| and returns the proper extended double-precision floating-point value
+| corresponding to the abstract input. This routine is just like
+| `roundAndPackFloatx80' except that the input significand does not have to be
+| normalized.
+*----------------------------------------------------------------------------*/
+
+static floatx80
+ normalizeRoundAndPackFloatx80(
+ int8 roundingPrecision, flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1
+ STATUS_PARAM)
+{
+ int8 shiftCount;
+
+ if ( zSig0 == 0 ) {
+ zSig0 = zSig1;
+ zSig1 = 0;
+ zExp -= 64;
+ }
+ shiftCount = countLeadingZeros64( zSig0 );
+ shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
+ zExp -= shiftCount;
+ return
+ roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 STATUS_VAR);
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the least-significant 64 fraction bits of the quadruple-precision
+| floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE uint64_t extractFloat128Frac1( float128 a )
+{
+
+ return a.low;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the most-significant 48 fraction bits of the quadruple-precision
+| floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE uint64_t extractFloat128Frac0( float128 a )
+{
+
+ return a.high & LIT64( 0x0000FFFFFFFFFFFF );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the quadruple-precision floating-point value
+| `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE int32 extractFloat128Exp( float128 a )
+{
+
+ return ( a.high>>48 ) & 0x7FFF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the quadruple-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+INLINE flag extractFloat128Sign( float128 a )
+{
+
+ return a.high>>63;
+
+}
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal quadruple-precision floating-point value
+| represented by the denormalized significand formed by the concatenation of
+| `aSig0' and `aSig1'. The normalized exponent is stored at the location
+| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
+| significand are stored at the location pointed to by `zSig0Ptr', and the
+| least significant 64 bits of the normalized significand are stored at the
+| location pointed to by `zSig1Ptr'.
+*----------------------------------------------------------------------------*/
+
+static void
+ normalizeFloat128Subnormal(
+ uint64_t aSig0,
+ uint64_t aSig1,
+ int32 *zExpPtr,
+ uint64_t *zSig0Ptr,
+ uint64_t *zSig1Ptr
+ )
+{
+ int8 shiftCount;
+
+ if ( aSig0 == 0 ) {
+ shiftCount = countLeadingZeros64( aSig1 ) - 15;
+ if ( shiftCount < 0 ) {
+ *zSig0Ptr = aSig1>>( - shiftCount );
+ *zSig1Ptr = aSig1<<( shiftCount & 63 );
+ }
+ else {
+ *zSig0Ptr = aSig1<<shiftCount;
+ *zSig1Ptr = 0;
+ }
+ *zExpPtr = - shiftCount - 63;
+ }
+ else {
+ shiftCount = countLeadingZeros64( aSig0 ) - 15;
+ shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
+ *zExpPtr = 1 - shiftCount;
+ }
+
+}
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', the exponent `zExp', and the significand formed
+| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
+| floating-point value, returning the result. After being shifted into the
+| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
+| added together to form the most significant 32 bits of the result. This
+| means that any integer portion of `zSig0' will be added into the exponent.
+| Since a properly normalized significand will have an integer portion equal
+| to 1, the `zExp' input should be 1 less than the desired result exponent
+| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+
+INLINE float128
+ packFloat128( flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1 )
+{
+ float128 z;
+
+ z.low = zSig1;
+ z.high = ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<48 ) + zSig0;
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and extended significand formed by the concatenation of `zSig0', `zSig1',
+| and `zSig2', and returns the proper quadruple-precision floating-point value
+| corresponding to the abstract input. Ordinarily, the abstract value is
+| simply rounded and packed into the quadruple-precision format, with the
+| inexact exception raised if the abstract input cannot be represented
+| exactly. However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned. If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal quadruple-
+| precision floating-point number.
+| The input significand must be normalized or smaller. If the input
+| significand is not normalized, `zExp' must be 0; in that case, the result
+| returned is a subnormal number, and it must not require rounding. In the
+| usual case that the input significand is normalized, `zExp' must be 1 less
+| than the ``true'' floating-point exponent. The handling of underflow and
+| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float128
+ roundAndPackFloat128(
+ flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1, uint64_t zSig2 STATUS_PARAM)
+{
+ int8 roundingMode;
+ flag roundNearestEven, increment, isTiny;
+
+ roundingMode = STATUS(float_rounding_mode);
+ roundNearestEven = ( roundingMode == float_round_nearest_even );
+ increment = ( (int64_t) zSig2 < 0 );
+ if ( ! roundNearestEven ) {
+ if ( roundingMode == float_round_to_zero ) {
+ increment = 0;
+ }
+ else {
+ if ( zSign ) {
+ increment = ( roundingMode == float_round_down ) && zSig2;
+ }
+ else {
+ increment = ( roundingMode == float_round_up ) && zSig2;
+ }
+ }
+ }
+ if ( 0x7FFD <= (uint32_t) zExp ) {
+ if ( ( 0x7FFD < zExp )
+ || ( ( zExp == 0x7FFD )
+ && eq128(
+ LIT64( 0x0001FFFFFFFFFFFF ),
+ LIT64( 0xFFFFFFFFFFFFFFFF ),
+ zSig0,
+ zSig1
+ )
+ && increment
+ )
+ ) {
+ float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
+ if ( ( roundingMode == float_round_to_zero )
+ || ( zSign && ( roundingMode == float_round_up ) )
+ || ( ! zSign && ( roundingMode == float_round_down ) )
+ ) {
+ return
+ packFloat128(
+ zSign,
+ 0x7FFE,
+ LIT64( 0x0000FFFFFFFFFFFF ),
+ LIT64( 0xFFFFFFFFFFFFFFFF )
+ );
+ }
+ return packFloat128( zSign, 0x7FFF, 0, 0 );
+ }
+ if ( zExp < 0 ) {
+ if (STATUS(flush_to_zero)) {
+ float_raise(float_flag_output_denormal STATUS_VAR);
+ return packFloat128(zSign, 0, 0, 0);
+ }
+ isTiny =
+ ( STATUS(float_detect_tininess) == float_tininess_before_rounding )
+ || ( zExp < -1 )
+ || ! increment
+ || lt128(
+ zSig0,
+ zSig1,
+ LIT64( 0x0001FFFFFFFFFFFF ),
+ LIT64( 0xFFFFFFFFFFFFFFFF )
+ );
+ shift128ExtraRightJamming(
+ zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
+ zExp = 0;
+ if ( isTiny && zSig2 ) float_raise( float_flag_underflow STATUS_VAR);
+ if ( roundNearestEven ) {
+ increment = ( (int64_t) zSig2 < 0 );
+ }
+ else {
+ if ( zSign ) {
+ increment = ( roundingMode == float_round_down ) && zSig2;
+ }
+ else {
+ increment = ( roundingMode == float_round_up ) && zSig2;
+ }
+ }
+ }
+ }
+ if ( zSig2 ) STATUS(float_exception_flags) |= float_flag_inexact;
+ if ( increment ) {
+ add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
+ zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
+ }
+ else {
+ if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
+ }
+ return packFloat128( zSign, zExp, zSig0, zSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand formed by the concatenation of `zSig0' and `zSig1', and
+| returns the proper quadruple-precision floating-point value corresponding
+| to the abstract input. This routine is just like `roundAndPackFloat128'
+| except that the input significand has fewer bits and does not have to be
+| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
+| point exponent.
+*----------------------------------------------------------------------------*/
+
+static float128
+ normalizeRoundAndPackFloat128(
+ flag zSign, int32 zExp, uint64_t zSig0, uint64_t zSig1 STATUS_PARAM)
+{
+ int8 shiftCount;
+ uint64_t zSig2;
+
+ if ( zSig0 == 0 ) {
+ zSig0 = zSig1;
+ zSig1 = 0;
+ zExp -= 64;
+ }
+ shiftCount = countLeadingZeros64( zSig0 ) - 15;
+ if ( 0 <= shiftCount ) {
+ zSig2 = 0;
+ shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
+ }
+ else {
+ shift128ExtraRightJamming(
+ zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
+ }
+ zExp -= shiftCount;
+ return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR);
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 32-bit two's complement integer `a'
+| to the single-precision floating-point format. The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 int32_to_float32( int32 a STATUS_PARAM )
+{
+ flag zSign;
+
+ if ( a == 0 ) return float32_zero;
+ if ( a == (int32_t) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
+ zSign = ( a < 0 );
+ return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 32-bit two's complement integer `a'
+| to the double-precision floating-point format. The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 int32_to_float64( int32 a STATUS_PARAM )
+{
+ flag zSign;
+ uint32 absA;
+ int8 shiftCount;
+ uint64_t zSig;
+
+ if ( a == 0 ) return float64_zero;
+ zSign = ( a < 0 );
+ absA = zSign ? - a : a;
+ shiftCount = countLeadingZeros32( absA ) + 21;
+ zSig = absA;
+ return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 32-bit two's complement integer `a'
+| to the extended double-precision floating-point format. The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 int32_to_floatx80( int32 a STATUS_PARAM )
+{
+ flag zSign;
+ uint32 absA;
+ int8 shiftCount;
+ uint64_t zSig;
+
+ if ( a == 0 ) return packFloatx80( 0, 0, 0 );
+ zSign = ( a < 0 );
+ absA = zSign ? - a : a;
+ shiftCount = countLeadingZeros32( absA ) + 32;
+ zSig = absA;
+ return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 32-bit two's complement integer `a' to
+| the quadruple-precision floating-point format. The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 int32_to_float128( int32 a STATUS_PARAM )
+{
+ flag zSign;
+ uint32 absA;
+ int8 shiftCount;
+ uint64_t zSig0;
+
+ if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
+ zSign = ( a < 0 );
+ absA = zSign ? - a : a;
+ shiftCount = countLeadingZeros32( absA ) + 17;
+ zSig0 = absA;
+ return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit two's complement integer `a'
+| to the single-precision floating-point format. The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 int64_to_float32( int64 a STATUS_PARAM )
+{
+ flag zSign;
+ uint64 absA;
+ int8 shiftCount;
+
+ if ( a == 0 ) return float32_zero;
+ zSign = ( a < 0 );
+ absA = zSign ? - a : a;
+ shiftCount = countLeadingZeros64( absA ) - 40;
+ if ( 0 <= shiftCount ) {
+ return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
+ }
+ else {
+ shiftCount += 7;
+ if ( shiftCount < 0 ) {
+ shift64RightJamming( absA, - shiftCount, &absA );
+ }
+ else {
+ absA <<= shiftCount;
+ }
+ return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA STATUS_VAR );
+ }
+
+}
+
+float32 uint64_to_float32( uint64 a STATUS_PARAM )
+{
+ int8 shiftCount;
+
+ if ( a == 0 ) return float32_zero;
+ shiftCount = countLeadingZeros64( a ) - 40;
+ if ( 0 <= shiftCount ) {
+ return packFloat32( 1 > 0, 0x95 - shiftCount, a<<shiftCount );
+ }
+ else {
+ shiftCount += 7;
+ if ( shiftCount < 0 ) {
+ shift64RightJamming( a, - shiftCount, &a );
+ }
+ else {
+ a <<= shiftCount;
+ }
+ return roundAndPackFloat32( 1 > 0, 0x9C - shiftCount, a STATUS_VAR );
+ }
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit two's complement integer `a'
+| to the double-precision floating-point format. The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 int64_to_float64( int64 a STATUS_PARAM )
+{
+ flag zSign;
+
+ if ( a == 0 ) return float64_zero;
+ if ( a == (int64_t) LIT64( 0x8000000000000000 ) ) {
+ return packFloat64( 1, 0x43E, 0 );
+ }
+ zSign = ( a < 0 );
+ return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a STATUS_VAR );
+
+}
+
+float64 uint64_to_float64( uint64 a STATUS_PARAM )
+{
+ if ( a == 0 ) return float64_zero;
+ return normalizeRoundAndPackFloat64( 0, 0x43C, a STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit two's complement integer `a'
+| to the extended double-precision floating-point format. The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 int64_to_floatx80( int64 a STATUS_PARAM )
+{
+ flag zSign;
+ uint64 absA;
+ int8 shiftCount;
+
+ if ( a == 0 ) return packFloatx80( 0, 0, 0 );
+ zSign = ( a < 0 );
+ absA = zSign ? - a : a;
+ shiftCount = countLeadingZeros64( absA );
+ return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit two's complement integer `a' to
+| the quadruple-precision floating-point format. The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 int64_to_float128( int64 a STATUS_PARAM )
+{
+ flag zSign;
+ uint64 absA;
+ int8 shiftCount;
+ int32 zExp;
+ uint64_t zSig0, zSig1;
+
+ if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
+ zSign = ( a < 0 );
+ absA = zSign ? - a : a;
+ shiftCount = countLeadingZeros64( absA ) + 49;
+ zExp = 0x406E - shiftCount;
+ if ( 64 <= shiftCount ) {
+ zSig1 = 0;
+ zSig0 = absA;
+ shiftCount -= 64;
+ }
+ else {
+ zSig1 = absA;
+ zSig0 = 0;
+ }
+ shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
+ return packFloat128( zSign, zExp, zSig0, zSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 32-bit two's complement integer format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode. If `a' is a NaN, the largest
+| positive integer is returned. Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int32 float32_to_int32( float32 a STATUS_PARAM )
+{
+ flag aSign;
+ int_fast16_t aExp, shiftCount;
+ uint32_t aSig;
+ uint64_t aSig64;
+
+ a = float32_squash_input_denormal(a STATUS_VAR);
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ aSign = extractFloat32Sign( a );
+ if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
+ if ( aExp ) aSig |= 0x00800000;
+ shiftCount = 0xAF - aExp;
+ aSig64 = aSig;
+ aSig64 <<= 32;
+ if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
+ return roundAndPackInt32( aSign, aSig64 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 32-bit two's complement integer format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
+| the conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int32 float32_to_int32_round_to_zero( float32 a STATUS_PARAM )
+{
+ flag aSign;
+ int_fast16_t aExp, shiftCount;
+ uint32_t aSig;
+ int32_t z;
+ a = float32_squash_input_denormal(a STATUS_VAR);
+
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ aSign = extractFloat32Sign( a );
+ shiftCount = aExp - 0x9E;
+ if ( 0 <= shiftCount ) {
+ if ( float32_val(a) != 0xCF000000 ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
+ }
+ return (int32_t) 0x80000000;
+ }
+ else if ( aExp <= 0x7E ) {
+ if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
+ return 0;
+ }
+ aSig = ( aSig | 0x00800000 )<<8;
+ z = aSig>>( - shiftCount );
+ if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ }
+ if ( aSign ) z = - z;
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 16-bit two's complement integer format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
+| the conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int_fast16_t float32_to_int16_round_to_zero(float32 a STATUS_PARAM)
+{
+ flag aSign;
+ int_fast16_t aExp, shiftCount;
+ uint32_t aSig;
+ int32 z;
+
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ aSign = extractFloat32Sign( a );
+ shiftCount = aExp - 0x8E;
+ if ( 0 <= shiftCount ) {
+ if ( float32_val(a) != 0xC7000000 ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
+ return 0x7FFF;
+ }
+ }
+ return (int32_t) 0xffff8000;
+ }
+ else if ( aExp <= 0x7E ) {
+ if ( aExp | aSig ) {
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ }
+ return 0;
+ }
+ shiftCount -= 0x10;
+ aSig = ( aSig | 0x00800000 )<<8;
+ z = aSig>>( - shiftCount );
+ if ( (uint32_t) ( aSig<<( shiftCount & 31 ) ) ) {
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ }
+ if ( aSign ) {
+ z = - z;
+ }
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 64-bit two's complement integer format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode. If `a' is a NaN, the largest
+| positive integer is returned. Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int64 float32_to_int64( float32 a STATUS_PARAM )
+{
+ flag aSign;
+ int_fast16_t aExp, shiftCount;
+ uint32_t aSig;
+ uint64_t aSig64, aSigExtra;
+ a = float32_squash_input_denormal(a STATUS_VAR);
+
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ aSign = extractFloat32Sign( a );
+ shiftCount = 0xBE - aExp;
+ if ( shiftCount < 0 ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
+ return LIT64( 0x7FFFFFFFFFFFFFFF );
+ }
+ return (int64_t) LIT64( 0x8000000000000000 );
+ }
+ if ( aExp ) aSig |= 0x00800000;
+ aSig64 = aSig;
+ aSig64 <<= 40;
+ shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
+ return roundAndPackInt64( aSign, aSig64, aSigExtra STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 64-bit two's complement integer format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero. If
+| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
+| conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int64 float32_to_int64_round_to_zero( float32 a STATUS_PARAM )
+{
+ flag aSign;
+ int_fast16_t aExp, shiftCount;
+ uint32_t aSig;
+ uint64_t aSig64;
+ int64 z;
+ a = float32_squash_input_denormal(a STATUS_VAR);
+
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ aSign = extractFloat32Sign( a );
+ shiftCount = aExp - 0xBE;
+ if ( 0 <= shiftCount ) {
+ if ( float32_val(a) != 0xDF000000 ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
+ return LIT64( 0x7FFFFFFFFFFFFFFF );
+ }
+ }
+ return (int64_t) LIT64( 0x8000000000000000 );
+ }
+ else if ( aExp <= 0x7E ) {
+ if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
+ return 0;
+ }
+ aSig64 = aSig | 0x00800000;
+ aSig64 <<= 40;
+ z = aSig64>>( - shiftCount );
+ if ( (uint64_t) ( aSig64<<( shiftCount & 63 ) ) ) {
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ }
+ if ( aSign ) z = - z;
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the double-precision floating-point format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float32_to_float64( float32 a STATUS_PARAM )
+{
+ flag aSign;
+ int_fast16_t aExp;
+ uint32_t aSig;
+ a = float32_squash_input_denormal(a STATUS_VAR);
+
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ aSign = extractFloat32Sign( a );
+ if ( aExp == 0xFF ) {
+ if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
+ return packFloat64( aSign, 0x7FF, 0 );
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
+ normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+ --aExp;
+ }
+ return packFloat64( aSign, aExp + 0x380, ( (uint64_t) aSig )<<29 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the extended double-precision floating-point format. The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 float32_to_floatx80( float32 a STATUS_PARAM )
+{
+ flag aSign;
+ int_fast16_t aExp;
+ uint32_t aSig;
+
+ a = float32_squash_input_denormal(a STATUS_VAR);
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ aSign = extractFloat32Sign( a );
+ if ( aExp == 0xFF ) {
+ if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
+ return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
+ normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+ }
+ aSig |= 0x00800000;
+ return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the double-precision floating-point format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float32_to_float128( float32 a STATUS_PARAM )
+{
+ flag aSign;
+ int_fast16_t aExp;
+ uint32_t aSig;
+
+ a = float32_squash_input_denormal(a STATUS_VAR);
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ aSign = extractFloat32Sign( a );
+ if ( aExp == 0xFF ) {
+ if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
+ return packFloat128( aSign, 0x7FFF, 0, 0 );
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
+ normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+ --aExp;
+ }
+ return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Rounds the single-precision floating-point value `a' to an integer, and
+| returns the result as a single-precision floating-point value. The
+| operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_round_to_int( float32 a STATUS_PARAM)
+{
+ flag aSign;
+ int_fast16_t aExp;
+ uint32_t lastBitMask, roundBitsMask;
+ int8 roundingMode;
+ uint32_t z;
+ a = float32_squash_input_denormal(a STATUS_VAR);
+
+ aExp = extractFloat32Exp( a );
+ if ( 0x96 <= aExp ) {
+ if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
+ return propagateFloat32NaN( a, a STATUS_VAR );
+ }
+ return a;
+ }
+ if ( aExp <= 0x7E ) {
+ if ( (uint32_t) ( float32_val(a)<<1 ) == 0 ) return a;
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ aSign = extractFloat32Sign( a );
+ switch ( STATUS(float_rounding_mode) ) {
+ case float_round_nearest_even:
+ if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
+ return packFloat32( aSign, 0x7F, 0 );
+ }
+ break;
+ case float_round_down:
+ return make_float32(aSign ? 0xBF800000 : 0);
+ case float_round_up:
+ return make_float32(aSign ? 0x80000000 : 0x3F800000);
+ }
+ return packFloat32( aSign, 0, 0 );
+ }
+ lastBitMask = 1;
+ lastBitMask <<= 0x96 - aExp;
+ roundBitsMask = lastBitMask - 1;
+ z = float32_val(a);
+ roundingMode = STATUS(float_rounding_mode);
+ if ( roundingMode == float_round_nearest_even ) {
+ z += lastBitMask>>1;
+ if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
+ }
+ else if ( roundingMode != float_round_to_zero ) {
+ if ( extractFloat32Sign( make_float32(z) ) ^ ( roundingMode == float_round_up ) ) {
+ z += roundBitsMask;
+ }
+ }
+ z &= ~ roundBitsMask;
+ if ( z != float32_val(a) ) STATUS(float_exception_flags) |= float_flag_inexact;
+ return make_float32(z);
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the single-precision
+| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
+| before being returned. `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float32 addFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
+{
+ int_fast16_t aExp, bExp, zExp;
+ uint32_t aSig, bSig, zSig;
+ int_fast16_t expDiff;
+
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ bSig = extractFloat32Frac( b );
+ bExp = extractFloat32Exp( b );
+ expDiff = aExp - bExp;
+ aSig <<= 6;
+ bSig <<= 6;
+ if ( 0 < expDiff ) {
+ if ( aExp == 0xFF ) {
+ if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
+ return a;
+ }
+ if ( bExp == 0 ) {
+ --expDiff;
+ }
+ else {
+ bSig |= 0x20000000;
+ }
+ shift32RightJamming( bSig, expDiff, &bSig );
+ zExp = aExp;
+ }
+ else if ( expDiff < 0 ) {
+ if ( bExp == 0xFF ) {
+ if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
+ return packFloat32( zSign, 0xFF, 0 );
+ }
+ if ( aExp == 0 ) {
+ ++expDiff;
+ }
+ else {
+ aSig |= 0x20000000;
+ }
+ shift32RightJamming( aSig, - expDiff, &aSig );
+ zExp = bExp;
+ }
+ else {
+ if ( aExp == 0xFF ) {
+ if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
+ return a;
+ }
+ if ( aExp == 0 ) {
+ if (STATUS(flush_to_zero)) {
+ if (aSig | bSig) {
+ float_raise(float_flag_output_denormal STATUS_VAR);
+ }
+ return packFloat32(zSign, 0, 0);
+ }
+ return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
+ }
+ zSig = 0x40000000 + aSig + bSig;
+ zExp = aExp;
+ goto roundAndPack;
+ }
+ aSig |= 0x20000000;
+ zSig = ( aSig + bSig )<<1;
+ --zExp;
+ if ( (int32_t) zSig < 0 ) {
+ zSig = aSig + bSig;
+ ++zExp;
+ }
+ roundAndPack:
+ return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the single-
+| precision floating-point values `a' and `b'. If `zSign' is 1, the
+| difference is negated before being returned. `zSign' is ignored if the
+| result is a NaN. The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float32 subFloat32Sigs( float32 a, float32 b, flag zSign STATUS_PARAM)
+{
+ int_fast16_t aExp, bExp, zExp;
+ uint32_t aSig, bSig, zSig;
+ int_fast16_t expDiff;
+
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ bSig = extractFloat32Frac( b );
+ bExp = extractFloat32Exp( b );
+ expDiff = aExp - bExp;
+ aSig <<= 7;
+ bSig <<= 7;
+ if ( 0 < expDiff ) goto aExpBigger;
+ if ( expDiff < 0 ) goto bExpBigger;
+ if ( aExp == 0xFF ) {
+ if ( aSig | bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float32_default_nan;
+ }
+ if ( aExp == 0 ) {
+ aExp = 1;
+ bExp = 1;
+ }
+ if ( bSig < aSig ) goto aBigger;
+ if ( aSig < bSig ) goto bBigger;
+ return packFloat32( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
+ bExpBigger:
+ if ( bExp == 0xFF ) {
+ if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
+ return packFloat32( zSign ^ 1, 0xFF, 0 );
+ }
+ if ( aExp == 0 ) {
+ ++expDiff;
+ }
+ else {
+ aSig |= 0x40000000;
+ }
+ shift32RightJamming( aSig, - expDiff, &aSig );
+ bSig |= 0x40000000;
+ bBigger:
+ zSig = bSig - aSig;
+ zExp = bExp;
+ zSign ^= 1;
+ goto normalizeRoundAndPack;
+ aExpBigger:
+ if ( aExp == 0xFF ) {
+ if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
+ return a;
+ }
+ if ( bExp == 0 ) {
+ --expDiff;
+ }
+ else {
+ bSig |= 0x40000000;
+ }
+ shift32RightJamming( bSig, expDiff, &bSig );
+ aSig |= 0x40000000;
+ aBigger:
+ zSig = aSig - bSig;
+ zExp = aExp;
+ normalizeRoundAndPack:
+ --zExp;
+ return normalizeRoundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the single-precision floating-point values `a'
+| and `b'. The operation is performed according to the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_add( float32 a, float32 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+ a = float32_squash_input_denormal(a STATUS_VAR);
+ b = float32_squash_input_denormal(b STATUS_VAR);
+
+ aSign = extractFloat32Sign( a );
+ bSign = extractFloat32Sign( b );
+ if ( aSign == bSign ) {
+ return addFloat32Sigs( a, b, aSign STATUS_VAR);
+ }
+ else {
+ return subFloat32Sigs( a, b, aSign STATUS_VAR );
+ }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the single-precision floating-point values
+| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_sub( float32 a, float32 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+ a = float32_squash_input_denormal(a STATUS_VAR);
+ b = float32_squash_input_denormal(b STATUS_VAR);
+
+ aSign = extractFloat32Sign( a );
+ bSign = extractFloat32Sign( b );
+ if ( aSign == bSign ) {
+ return subFloat32Sigs( a, b, aSign STATUS_VAR );
+ }
+ else {
+ return addFloat32Sigs( a, b, aSign STATUS_VAR );
+ }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the single-precision floating-point values
+| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_mul( float32 a, float32 b STATUS_PARAM )
+{
+ flag aSign, bSign, zSign;
+ int_fast16_t aExp, bExp, zExp;
+ uint32_t aSig, bSig;
+ uint64_t zSig64;
+ uint32_t zSig;
+
+ a = float32_squash_input_denormal(a STATUS_VAR);
+ b = float32_squash_input_denormal(b STATUS_VAR);
+
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ aSign = extractFloat32Sign( a );
+ bSig = extractFloat32Frac( b );
+ bExp = extractFloat32Exp( b );
+ bSign = extractFloat32Sign( b );
+ zSign = aSign ^ bSign;
+ if ( aExp == 0xFF ) {
+ if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
+ return propagateFloat32NaN( a, b STATUS_VAR );
+ }
+ if ( ( bExp | bSig ) == 0 ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float32_default_nan;
+ }
+ return packFloat32( zSign, 0xFF, 0 );
+ }
+ if ( bExp == 0xFF ) {
+ if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
+ if ( ( aExp | aSig ) == 0 ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float32_default_nan;
+ }
+ return packFloat32( zSign, 0xFF, 0 );
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
+ normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+ }
+ if ( bExp == 0 ) {
+ if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
+ normalizeFloat32Subnormal( bSig, &bExp, &bSig );
+ }
+ zExp = aExp + bExp - 0x7F;
+ aSig = ( aSig | 0x00800000 )<<7;
+ bSig = ( bSig | 0x00800000 )<<8;
+ shift64RightJamming( ( (uint64_t) aSig ) * bSig, 32, &zSig64 );
+ zSig = zSig64;
+ if ( 0 <= (int32_t) ( zSig<<1 ) ) {
+ zSig <<= 1;
+ --zExp;
+ }
+ return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of dividing the single-precision floating-point value `a'
+| by the corresponding value `b'. The operation is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_div( float32 a, float32 b STATUS_PARAM )
+{
+ flag aSign, bSign, zSign;
+ int_fast16_t aExp, bExp, zExp;
+ uint32_t aSig, bSig, zSig;
+ a = float32_squash_input_denormal(a STATUS_VAR);
+ b = float32_squash_input_denormal(b STATUS_VAR);
+
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ aSign = extractFloat32Sign( a );
+ bSig = extractFloat32Frac( b );
+ bExp = extractFloat32Exp( b );
+ bSign = extractFloat32Sign( b );
+ zSign = aSign ^ bSign;
+ if ( aExp == 0xFF ) {
+ if ( aSig ) return propagateFloat32NaN( a, b STATUS_VAR );
+ if ( bExp == 0xFF ) {
+ if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float32_default_nan;
+ }
+ return packFloat32( zSign, 0xFF, 0 );
+ }
+ if ( bExp == 0xFF ) {
+ if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
+ return packFloat32( zSign, 0, 0 );
+ }
+ if ( bExp == 0 ) {
+ if ( bSig == 0 ) {
+ if ( ( aExp | aSig ) == 0 ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float32_default_nan;
+ }
+ float_raise( float_flag_divbyzero STATUS_VAR);
+ return packFloat32( zSign, 0xFF, 0 );
+ }
+ normalizeFloat32Subnormal( bSig, &bExp, &bSig );
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
+ normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+ }
+ zExp = aExp - bExp + 0x7D;
+ aSig = ( aSig | 0x00800000 )<<7;
+ bSig = ( bSig | 0x00800000 )<<8;
+ if ( bSig <= ( aSig + aSig ) ) {
+ aSig >>= 1;
+ ++zExp;
+ }
+ zSig = ( ( (uint64_t) aSig )<<32 ) / bSig;
+ if ( ( zSig & 0x3F ) == 0 ) {
+ zSig |= ( (uint64_t) bSig * zSig != ( (uint64_t) aSig )<<32 );
+ }
+ return roundAndPackFloat32( zSign, zExp, zSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the remainder of the single-precision floating-point value `a'
+| with respect to the corresponding value `b'. The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_rem( float32 a, float32 b STATUS_PARAM )
+{
+ flag aSign, zSign;
+ int_fast16_t aExp, bExp, expDiff;
+ uint32_t aSig, bSig;
+ uint32_t q;
+ uint64_t aSig64, bSig64, q64;
+ uint32_t alternateASig;
+ int32_t sigMean;
+ a = float32_squash_input_denormal(a STATUS_VAR);
+ b = float32_squash_input_denormal(b STATUS_VAR);
+
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ aSign = extractFloat32Sign( a );
+ bSig = extractFloat32Frac( b );
+ bExp = extractFloat32Exp( b );
+ if ( aExp == 0xFF ) {
+ if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
+ return propagateFloat32NaN( a, b STATUS_VAR );
+ }
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float32_default_nan;
+ }
+ if ( bExp == 0xFF ) {
+ if ( bSig ) return propagateFloat32NaN( a, b STATUS_VAR );
+ return a;
+ }
+ if ( bExp == 0 ) {
+ if ( bSig == 0 ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float32_default_nan;
+ }
+ normalizeFloat32Subnormal( bSig, &bExp, &bSig );
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return a;
+ normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+ }
+ expDiff = aExp - bExp;
+ aSig |= 0x00800000;
+ bSig |= 0x00800000;
+ if ( expDiff < 32 ) {
+ aSig <<= 8;
+ bSig <<= 8;
+ if ( expDiff < 0 ) {
+ if ( expDiff < -1 ) return a;
+ aSig >>= 1;
+ }
+ q = ( bSig <= aSig );
+ if ( q ) aSig -= bSig;
+ if ( 0 < expDiff ) {
+ q = ( ( (uint64_t) aSig )<<32 ) / bSig;
+ q >>= 32 - expDiff;
+ bSig >>= 2;
+ aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
+ }
+ else {
+ aSig >>= 2;
+ bSig >>= 2;
+ }
+ }
+ else {
+ if ( bSig <= aSig ) aSig -= bSig;
+ aSig64 = ( (uint64_t) aSig )<<40;
+ bSig64 = ( (uint64_t) bSig )<<40;
+ expDiff -= 64;
+ while ( 0 < expDiff ) {
+ q64 = estimateDiv128To64( aSig64, 0, bSig64 );
+ q64 = ( 2 < q64 ) ? q64 - 2 : 0;
+ aSig64 = - ( ( bSig * q64 )<<38 );
+ expDiff -= 62;
+ }
+ expDiff += 64;
+ q64 = estimateDiv128To64( aSig64, 0, bSig64 );
+ q64 = ( 2 < q64 ) ? q64 - 2 : 0;
+ q = q64>>( 64 - expDiff );
+ bSig <<= 6;
+ aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
+ }
+ do {
+ alternateASig = aSig;
+ ++q;
+ aSig -= bSig;
+ } while ( 0 <= (int32_t) aSig );
+ sigMean = aSig + alternateASig;
+ if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
+ aSig = alternateASig;
+ }
+ zSign = ( (int32_t) aSig < 0 );
+ if ( zSign ) aSig = - aSig;
+ return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the single-precision floating-point values
+| `a' and `b' then adding 'c', with no intermediate rounding step after the
+| multiplication. The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic 754-2008.
+| The flags argument allows the caller to select negation of the
+| addend, the intermediate product, or the final result. (The difference
+| between this and having the caller do a separate negation is that negating
+| externally will flip the sign bit on NaNs.)
+*----------------------------------------------------------------------------*/
+
+float32 float32_muladd(float32 a, float32 b, float32 c, int flags STATUS_PARAM)
+{
+ flag aSign, bSign, cSign, zSign;
+ int_fast16_t aExp, bExp, cExp, pExp, zExp, expDiff;
+ uint32_t aSig, bSig, cSig;
+ flag pInf, pZero, pSign;
+ uint64_t pSig64, cSig64, zSig64;
+ uint32_t pSig;
+ int shiftcount;
+ flag signflip, infzero;
+
+ a = float32_squash_input_denormal(a STATUS_VAR);
+ b = float32_squash_input_denormal(b STATUS_VAR);
+ c = float32_squash_input_denormal(c STATUS_VAR);
+ aSig = extractFloat32Frac(a);
+ aExp = extractFloat32Exp(a);
+ aSign = extractFloat32Sign(a);
+ bSig = extractFloat32Frac(b);
+ bExp = extractFloat32Exp(b);
+ bSign = extractFloat32Sign(b);
+ cSig = extractFloat32Frac(c);
+ cExp = extractFloat32Exp(c);
+ cSign = extractFloat32Sign(c);
+
+ infzero = ((aExp == 0 && aSig == 0 && bExp == 0xff && bSig == 0) ||
+ (aExp == 0xff && aSig == 0 && bExp == 0 && bSig == 0));
+
+ /* It is implementation-defined whether the cases of (0,inf,qnan)
+ * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
+ * they return if they do), so we have to hand this information
+ * off to the target-specific pick-a-NaN routine.
+ */
+ if (((aExp == 0xff) && aSig) ||
+ ((bExp == 0xff) && bSig) ||
+ ((cExp == 0xff) && cSig)) {
+ return propagateFloat32MulAddNaN(a, b, c, infzero STATUS_VAR);
+ }
+
+ if (infzero) {
+ float_raise(float_flag_invalid STATUS_VAR);
+ return float32_default_nan;
+ }
+
+ if (flags & float_muladd_negate_c) {
+ cSign ^= 1;
+ }
+
+ signflip = (flags & float_muladd_negate_result) ? 1 : 0;
+
+ /* Work out the sign and type of the product */
+ pSign = aSign ^ bSign;
+ if (flags & float_muladd_negate_product) {
+ pSign ^= 1;
+ }
+ pInf = (aExp == 0xff) || (bExp == 0xff);
+ pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0);
+
+ if (cExp == 0xff) {
+ if (pInf && (pSign ^ cSign)) {
+ /* addition of opposite-signed infinities => InvalidOperation */
+ float_raise(float_flag_invalid STATUS_VAR);
+ return float32_default_nan;
+ }
+ /* Otherwise generate an infinity of the same sign */
+ return packFloat32(cSign ^ signflip, 0xff, 0);
+ }
+
+ if (pInf) {
+ return packFloat32(pSign ^ signflip, 0xff, 0);
+ }
+
+ if (pZero) {
+ if (cExp == 0) {
+ if (cSig == 0) {
+ /* Adding two exact zeroes */
+ if (pSign == cSign) {
+ zSign = pSign;
+ } else if (STATUS(float_rounding_mode) == float_round_down) {
+ zSign = 1;
+ } else {
+ zSign = 0;
+ }
+ return packFloat32(zSign ^ signflip, 0, 0);
+ }
+ /* Exact zero plus a denorm */
+ if (STATUS(flush_to_zero)) {
+ float_raise(float_flag_output_denormal STATUS_VAR);
+ return packFloat32(cSign ^ signflip, 0, 0);
+ }
+ }
+ /* Zero plus something non-zero : just return the something */
+ return make_float32(float32_val(c) ^ (signflip << 31));
+ }
+
+ if (aExp == 0) {
+ normalizeFloat32Subnormal(aSig, &aExp, &aSig);
+ }
+ if (bExp == 0) {
+ normalizeFloat32Subnormal(bSig, &bExp, &bSig);
+ }
+
+ /* Calculate the actual result a * b + c */
+
+ /* Multiply first; this is easy. */
+ /* NB: we subtract 0x7e where float32_mul() subtracts 0x7f
+ * because we want the true exponent, not the "one-less-than"
+ * flavour that roundAndPackFloat32() takes.
+ */
+ pExp = aExp + bExp - 0x7e;
+ aSig = (aSig | 0x00800000) << 7;
+ bSig = (bSig | 0x00800000) << 8;
+ pSig64 = (uint64_t)aSig * bSig;
+ if ((int64_t)(pSig64 << 1) >= 0) {
+ pSig64 <<= 1;
+ pExp--;
+ }
+
+ zSign = pSign ^ signflip;
+
+ /* Now pSig64 is the significand of the multiply, with the explicit bit in
+ * position 62.
+ */
+ if (cExp == 0) {
+ if (!cSig) {
+ /* Throw out the special case of c being an exact zero now */
+ shift64RightJamming(pSig64, 32, &pSig64);
+ pSig = pSig64;
+ return roundAndPackFloat32(zSign, pExp - 1,
+ pSig STATUS_VAR);
+ }
+ normalizeFloat32Subnormal(cSig, &cExp, &cSig);
+ }
+
+ cSig64 = (uint64_t)cSig << (62 - 23);
+ cSig64 |= LIT64(0x4000000000000000);
+ expDiff = pExp - cExp;
+
+ if (pSign == cSign) {
+ /* Addition */
+ if (expDiff > 0) {
+ /* scale c to match p */
+ shift64RightJamming(cSig64, expDiff, &cSig64);
+ zExp = pExp;
+ } else if (expDiff < 0) {
+ /* scale p to match c */
+ shift64RightJamming(pSig64, -expDiff, &pSig64);
+ zExp = cExp;
+ } else {
+ /* no scaling needed */
+ zExp = cExp;
+ }
+ /* Add significands and make sure explicit bit ends up in posn 62 */
+ zSig64 = pSig64 + cSig64;
+ if ((int64_t)zSig64 < 0) {
+ shift64RightJamming(zSig64, 1, &zSig64);
+ } else {
+ zExp--;
+ }
+ } else {
+ /* Subtraction */
+ if (expDiff > 0) {
+ shift64RightJamming(cSig64, expDiff, &cSig64);
+ zSig64 = pSig64 - cSig64;
+ zExp = pExp;
+ } else if (expDiff < 0) {
+ shift64RightJamming(pSig64, -expDiff, &pSig64);
+ zSig64 = cSig64 - pSig64;
+ zExp = cExp;
+ zSign ^= 1;
+ } else {
+ zExp = pExp;
+ if (cSig64 < pSig64) {
+ zSig64 = pSig64 - cSig64;
+ } else if (pSig64 < cSig64) {
+ zSig64 = cSig64 - pSig64;
+ zSign ^= 1;
+ } else {
+ /* Exact zero */
+ zSign = signflip;
+ if (STATUS(float_rounding_mode) == float_round_down) {
+ zSign ^= 1;
+ }
+ return packFloat32(zSign, 0, 0);
+ }
+ }
+ --zExp;
+ /* Normalize to put the explicit bit back into bit 62. */
+ shiftcount = countLeadingZeros64(zSig64) - 1;
+ zSig64 <<= shiftcount;
+ zExp -= shiftcount;
+ }
+ shift64RightJamming(zSig64, 32, &zSig64);
+ return roundAndPackFloat32(zSign, zExp, zSig64 STATUS_VAR);
+}
+
+
+/*----------------------------------------------------------------------------
+| Returns the square root of the single-precision floating-point value `a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_sqrt( float32 a STATUS_PARAM )
+{
+ flag aSign;
+ int_fast16_t aExp, zExp;
+ uint32_t aSig, zSig;
+ uint64_t rem, term;
+ a = float32_squash_input_denormal(a STATUS_VAR);
+
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ aSign = extractFloat32Sign( a );
+ if ( aExp == 0xFF ) {
+ if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
+ if ( ! aSign ) return a;
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float32_default_nan;
+ }
+ if ( aSign ) {
+ if ( ( aExp | aSig ) == 0 ) return a;
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float32_default_nan;
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return float32_zero;
+ normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+ }
+ zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
+ aSig = ( aSig | 0x00800000 )<<8;
+ zSig = estimateSqrt32( aExp, aSig ) + 2;
+ if ( ( zSig & 0x7F ) <= 5 ) {
+ if ( zSig < 2 ) {
+ zSig = 0x7FFFFFFF;
+ goto roundAndPack;
+ }
+ aSig >>= aExp & 1;
+ term = ( (uint64_t) zSig ) * zSig;
+ rem = ( ( (uint64_t) aSig )<<32 ) - term;
+ while ( (int64_t) rem < 0 ) {
+ --zSig;
+ rem += ( ( (uint64_t) zSig )<<1 ) | 1;
+ }
+ zSig |= ( rem != 0 );
+ }
+ shift32RightJamming( zSig, 1, &zSig );
+ roundAndPack:
+ return roundAndPackFloat32( 0, zExp, zSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the binary exponential of the single-precision floating-point value
+| `a'. The operation is performed according to the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+|
+| Uses the following identities:
+|
+| 1. -------------------------------------------------------------------------
+| x x*ln(2)
+| 2 = e
+|
+| 2. -------------------------------------------------------------------------
+| 2 3 4 5 n
+| x x x x x x x
+| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
+| 1! 2! 3! 4! 5! n!
+*----------------------------------------------------------------------------*/
+
+static const float64 float32_exp2_coefficients[15] =
+{
+ const_float64( 0x3ff0000000000000ll ), /* 1 */
+ const_float64( 0x3fe0000000000000ll ), /* 2 */
+ const_float64( 0x3fc5555555555555ll ), /* 3 */
+ const_float64( 0x3fa5555555555555ll ), /* 4 */
+ const_float64( 0x3f81111111111111ll ), /* 5 */
+ const_float64( 0x3f56c16c16c16c17ll ), /* 6 */
+ const_float64( 0x3f2a01a01a01a01all ), /* 7 */
+ const_float64( 0x3efa01a01a01a01all ), /* 8 */
+ const_float64( 0x3ec71de3a556c734ll ), /* 9 */
+ const_float64( 0x3e927e4fb7789f5cll ), /* 10 */
+ const_float64( 0x3e5ae64567f544e4ll ), /* 11 */
+ const_float64( 0x3e21eed8eff8d898ll ), /* 12 */
+ const_float64( 0x3de6124613a86d09ll ), /* 13 */
+ const_float64( 0x3da93974a8c07c9dll ), /* 14 */
+ const_float64( 0x3d6ae7f3e733b81fll ), /* 15 */
+};
+
+float32 float32_exp2( float32 a STATUS_PARAM )
+{
+ flag aSign;
+ int_fast16_t aExp;
+ uint32_t aSig;
+ float64 r, x, xn;
+ int i;
+ a = float32_squash_input_denormal(a STATUS_VAR);
+
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ aSign = extractFloat32Sign( a );
+
+ if ( aExp == 0xFF) {
+ if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
+ return (aSign) ? float32_zero : a;
+ }
+ if (aExp == 0) {
+ if (aSig == 0) return float32_one;
+ }
+
+ float_raise( float_flag_inexact STATUS_VAR);
+
+ /* ******************************* */
+ /* using float64 for approximation */
+ /* ******************************* */
+ x = float32_to_float64(a STATUS_VAR);
+ x = float64_mul(x, float64_ln2 STATUS_VAR);
+
+ xn = x;
+ r = float64_one;
+ for (i = 0 ; i < 15 ; i++) {
+ float64 f;
+
+ f = float64_mul(xn, float32_exp2_coefficients[i] STATUS_VAR);
+ r = float64_add(r, f STATUS_VAR);
+
+ xn = float64_mul(xn, x STATUS_VAR);
+ }
+
+ return float64_to_float32(r, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the binary log of the single-precision floating-point value `a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+float32 float32_log2( float32 a STATUS_PARAM )
+{
+ flag aSign, zSign;
+ int_fast16_t aExp;
+ uint32_t aSig, zSig, i;
+
+ a = float32_squash_input_denormal(a STATUS_VAR);
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ aSign = extractFloat32Sign( a );
+
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return packFloat32( 1, 0xFF, 0 );
+ normalizeFloat32Subnormal( aSig, &aExp, &aSig );
+ }
+ if ( aSign ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float32_default_nan;
+ }
+ if ( aExp == 0xFF ) {
+ if ( aSig ) return propagateFloat32NaN( a, float32_zero STATUS_VAR );
+ return a;
+ }
+
+ aExp -= 0x7F;
+ aSig |= 0x00800000;
+ zSign = aExp < 0;
+ zSig = aExp << 23;
+
+ for (i = 1 << 22; i > 0; i >>= 1) {
+ aSig = ( (uint64_t)aSig * aSig ) >> 23;
+ if ( aSig & 0x01000000 ) {
+ aSig >>= 1;
+ zSig |= i;
+ }
+ }
+
+ if ( zSign )
+ zSig = -zSig;
+
+ return normalizeRoundAndPackFloat32( zSign, 0x85, zSig STATUS_VAR );
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is equal to
+| the corresponding value `b', and 0 otherwise. The invalid exception is
+| raised if either operand is a NaN. Otherwise, the comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float32_eq( float32 a, float32 b STATUS_PARAM )
+{
+ uint32_t av, bv;
+ a = float32_squash_input_denormal(a STATUS_VAR);
+ b = float32_squash_input_denormal(b STATUS_VAR);
+
+ if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+ || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+ ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return 0;
+ }
+ av = float32_val(a);
+ bv = float32_val(b);
+ return ( av == bv ) || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is less than
+| or equal to the corresponding value `b', and 0 otherwise. The invalid
+| exception is raised if either operand is a NaN. The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float32_le( float32 a, float32 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+ uint32_t av, bv;
+ a = float32_squash_input_denormal(a STATUS_VAR);
+ b = float32_squash_input_denormal(b STATUS_VAR);
+
+ if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+ || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+ ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return 0;
+ }
+ aSign = extractFloat32Sign( a );
+ bSign = extractFloat32Sign( b );
+ av = float32_val(a);
+ bv = float32_val(b);
+ if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
+ return ( av == bv ) || ( aSign ^ ( av < bv ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise. The invalid exception is
+| raised if either operand is a NaN. The comparison is performed according
+| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float32_lt( float32 a, float32 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+ uint32_t av, bv;
+ a = float32_squash_input_denormal(a STATUS_VAR);
+ b = float32_squash_input_denormal(b STATUS_VAR);
+
+ if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+ || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+ ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return 0;
+ }
+ aSign = extractFloat32Sign( a );
+ bSign = extractFloat32Sign( b );
+ av = float32_val(a);
+ bv = float32_val(b);
+ if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
+ return ( av != bv ) && ( aSign ^ ( av < bv ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point values `a' and `b' cannot
+| be compared, and 0 otherwise. The invalid exception is raised if either
+| operand is a NaN. The comparison is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float32_unordered( float32 a, float32 b STATUS_PARAM )
+{
+ a = float32_squash_input_denormal(a STATUS_VAR);
+ b = float32_squash_input_denormal(b STATUS_VAR);
+
+ if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+ || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+ ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return 1;
+ }
+ return 0;
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is equal to
+| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
+| exception. The comparison is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float32_eq_quiet( float32 a, float32 b STATUS_PARAM )
+{
+ a = float32_squash_input_denormal(a STATUS_VAR);
+ b = float32_squash_input_denormal(b STATUS_VAR);
+
+ if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+ || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+ ) {
+ if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ }
+ return 0;
+ }
+ return ( float32_val(a) == float32_val(b) ) ||
+ ( (uint32_t) ( ( float32_val(a) | float32_val(b) )<<1 ) == 0 );
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is less than or
+| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
+| cause an exception. Otherwise, the comparison is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float32_le_quiet( float32 a, float32 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+ uint32_t av, bv;
+ a = float32_squash_input_denormal(a STATUS_VAR);
+ b = float32_squash_input_denormal(b STATUS_VAR);
+
+ if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+ || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+ ) {
+ if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ }
+ return 0;
+ }
+ aSign = extractFloat32Sign( a );
+ bSign = extractFloat32Sign( b );
+ av = float32_val(a);
+ bv = float32_val(b);
+ if ( aSign != bSign ) return aSign || ( (uint32_t) ( ( av | bv )<<1 ) == 0 );
+ return ( av == bv ) || ( aSign ^ ( av < bv ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
+| exception. Otherwise, the comparison is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float32_lt_quiet( float32 a, float32 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+ uint32_t av, bv;
+ a = float32_squash_input_denormal(a STATUS_VAR);
+ b = float32_squash_input_denormal(b STATUS_VAR);
+
+ if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+ || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+ ) {
+ if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ }
+ return 0;
+ }
+ aSign = extractFloat32Sign( a );
+ bSign = extractFloat32Sign( b );
+ av = float32_val(a);
+ bv = float32_val(b);
+ if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av | bv )<<1 ) != 0 );
+ return ( av != bv ) && ( aSign ^ ( av < bv ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point values `a' and `b' cannot
+| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
+| comparison is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float32_unordered_quiet( float32 a, float32 b STATUS_PARAM )
+{
+ a = float32_squash_input_denormal(a STATUS_VAR);
+ b = float32_squash_input_denormal(b STATUS_VAR);
+
+ if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
+ || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
+ ) {
+ if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ }
+ return 1;
+ }
+ return 0;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 32-bit two's complement integer format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode. If `a' is a NaN, the largest
+| positive integer is returned. Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int32 float64_to_int32( float64 a STATUS_PARAM )
+{
+ flag aSign;
+ int_fast16_t aExp, shiftCount;
+ uint64_t aSig;
+ a = float64_squash_input_denormal(a STATUS_VAR);
+
+ aSig = extractFloat64Frac( a );
+ aExp = extractFloat64Exp( a );
+ aSign = extractFloat64Sign( a );
+ if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
+ if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
+ shiftCount = 0x42C - aExp;
+ if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
+ return roundAndPackInt32( aSign, aSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 32-bit two's complement integer format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
+| the conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int32 float64_to_int32_round_to_zero( float64 a STATUS_PARAM )
+{
+ flag aSign;
+ int_fast16_t aExp, shiftCount;
+ uint64_t aSig, savedASig;
+ int32_t z;
+ a = float64_squash_input_denormal(a STATUS_VAR);
+
+ aSig = extractFloat64Frac( a );
+ aExp = extractFloat64Exp( a );
+ aSign = extractFloat64Sign( a );
+ if ( 0x41E < aExp ) {
+ if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
+ goto invalid;
+ }
+ else if ( aExp < 0x3FF ) {
+ if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
+ return 0;
+ }
+ aSig |= LIT64( 0x0010000000000000 );
+ shiftCount = 0x433 - aExp;
+ savedASig = aSig;
+ aSig >>= shiftCount;
+ z = aSig;
+ if ( aSign ) z = - z;
+ if ( ( z < 0 ) ^ aSign ) {
+ invalid:
+ float_raise( float_flag_invalid STATUS_VAR);
+ return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
+ }
+ if ( ( aSig<<shiftCount ) != savedASig ) {
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ }
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 16-bit two's complement integer format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
+| the conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int_fast16_t float64_to_int16_round_to_zero(float64 a STATUS_PARAM)
+{
+ flag aSign;
+ int_fast16_t aExp, shiftCount;
+ uint64_t aSig, savedASig;
+ int32 z;
+
+ aSig = extractFloat64Frac( a );
+ aExp = extractFloat64Exp( a );
+ aSign = extractFloat64Sign( a );
+ if ( 0x40E < aExp ) {
+ if ( ( aExp == 0x7FF ) && aSig ) {
+ aSign = 0;
+ }
+ goto invalid;
+ }
+ else if ( aExp < 0x3FF ) {
+ if ( aExp || aSig ) {
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ }
+ return 0;
+ }
+ aSig |= LIT64( 0x0010000000000000 );
+ shiftCount = 0x433 - aExp;
+ savedASig = aSig;
+ aSig >>= shiftCount;
+ z = aSig;
+ if ( aSign ) {
+ z = - z;
+ }
+ if ( ( (int16_t)z < 0 ) ^ aSign ) {
+ invalid:
+ float_raise( float_flag_invalid STATUS_VAR);
+ return aSign ? (int32_t) 0xffff8000 : 0x7FFF;
+ }
+ if ( ( aSig<<shiftCount ) != savedASig ) {
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ }
+ return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 64-bit two's complement integer format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode. If `a' is a NaN, the largest
+| positive integer is returned. Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int64 float64_to_int64( float64 a STATUS_PARAM )
+{
+ flag aSign;
+ int_fast16_t aExp, shiftCount;
+ uint64_t aSig, aSigExtra;
+ a = float64_squash_input_denormal(a STATUS_VAR);
+
+ aSig = extractFloat64Frac( a );
+ aExp = extractFloat64Exp( a );
+ aSign = extractFloat64Sign( a );
+ if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
+ shiftCount = 0x433 - aExp;
+ if ( shiftCount <= 0 ) {
+ if ( 0x43E < aExp ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ if ( ! aSign
+ || ( ( aExp == 0x7FF )
+ && ( aSig != LIT64( 0x0010000000000000 ) ) )
+ ) {
+ return LIT64( 0x7FFFFFFFFFFFFFFF );
+ }
+ return (int64_t) LIT64( 0x8000000000000000 );
+ }
+ aSigExtra = 0;
+ aSig <<= - shiftCount;
+ }
+ else {
+ shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
+ }
+ return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 64-bit two's complement integer format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
+| the conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int64 float64_to_int64_round_to_zero( float64 a STATUS_PARAM )
+{
+ flag aSign;
+ int_fast16_t aExp, shiftCount;
+ uint64_t aSig;
+ int64 z;
+ a = float64_squash_input_denormal(a STATUS_VAR);
+
+ aSig = extractFloat64Frac( a );
+ aExp = extractFloat64Exp( a );
+ aSign = extractFloat64Sign( a );
+ if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
+ shiftCount = aExp - 0x433;
+ if ( 0 <= shiftCount ) {
+ if ( 0x43E <= aExp ) {
+ if ( float64_val(a) != LIT64( 0xC3E0000000000000 ) ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ if ( ! aSign
+ || ( ( aExp == 0x7FF )
+ && ( aSig != LIT64( 0x0010000000000000 ) ) )
+ ) {
+ return LIT64( 0x7FFFFFFFFFFFFFFF );
+ }
+ }
+ return (int64_t) LIT64( 0x8000000000000000 );
+ }
+ z = aSig<<shiftCount;
+ }
+ else {
+ if ( aExp < 0x3FE ) {
+ if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
+ return 0;
+ }
+ z = aSig>>( - shiftCount );
+ if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ }
+ }
+ if ( aSign ) z = - z;
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the single-precision floating-point format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float64_to_float32( float64 a STATUS_PARAM )
+{
+ flag aSign;
+ int_fast16_t aExp;
+ uint64_t aSig;
+ uint32_t zSig;
+ a = float64_squash_input_denormal(a STATUS_VAR);
+
+ aSig = extractFloat64Frac( a );
+ aExp = extractFloat64Exp( a );
+ aSign = extractFloat64Sign( a );
+ if ( aExp == 0x7FF ) {
+ if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
+ return packFloat32( aSign, 0xFF, 0 );
+ }
+ shift64RightJamming( aSig, 22, &aSig );
+ zSig = aSig;
+ if ( aExp || zSig ) {
+ zSig |= 0x40000000;
+ aExp -= 0x381;
+ }
+ return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
+
+}
+
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
+| half-precision floating-point value, returning the result. After being
+| shifted into the proper positions, the three fields are simply added
+| together to form the result. This means that any integer portion of `zSig'
+| will be added into the exponent. Since a properly normalized significand
+| will have an integer portion equal to 1, the `zExp' input should be 1 less
+| than the desired result exponent whenever `zSig' is a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+static float16 packFloat16(flag zSign, int_fast16_t zExp, uint16_t zSig)
+{
+ return make_float16(
+ (((uint32_t)zSign) << 15) + (((uint32_t)zExp) << 10) + zSig);
+}
+
+/* Half precision floats come in two formats: standard IEEE and "ARM" format.
+ The latter gains extra exponent range by omitting the NaN/Inf encodings. */
+
+float32 float16_to_float32(float16 a, flag ieee STATUS_PARAM)
+{
+ flag aSign;
+ int_fast16_t aExp;
+ uint32_t aSig;
+
+ aSign = extractFloat16Sign(a);
+ aExp = extractFloat16Exp(a);
+ aSig = extractFloat16Frac(a);
+
+ if (aExp == 0x1f && ieee) {
+ if (aSig) {
+ return commonNaNToFloat32(float16ToCommonNaN(a STATUS_VAR) STATUS_VAR);
+ }
+ return packFloat32(aSign, 0xff, aSig << 13);
+ }
+ if (aExp == 0) {
+ int8 shiftCount;
+
+ if (aSig == 0) {
+ return packFloat32(aSign, 0, 0);
+ }
+
+ shiftCount = countLeadingZeros32( aSig ) - 21;
+ aSig = aSig << shiftCount;
+ aExp = -shiftCount;
+ }
+ return packFloat32( aSign, aExp + 0x70, aSig << 13);
+}
+
+float16 float32_to_float16(float32 a, flag ieee STATUS_PARAM)
+{
+ flag aSign;
+ int_fast16_t aExp;
+ uint32_t aSig;
+ uint32_t mask;
+ uint32_t increment;
+ int8 roundingMode;
+ a = float32_squash_input_denormal(a STATUS_VAR);
+
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ aSign = extractFloat32Sign( a );
+ if ( aExp == 0xFF ) {
+ if (aSig) {
+ /* Input is a NaN */
+ float16 r = commonNaNToFloat16( float32ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
+ if (!ieee) {
+ return packFloat16(aSign, 0, 0);
+ }
+ return r;
+ }
+ /* Infinity */
+ if (!ieee) {
+ float_raise(float_flag_invalid STATUS_VAR);
+ return packFloat16(aSign, 0x1f, 0x3ff);
+ }
+ return packFloat16(aSign, 0x1f, 0);
+ }
+ if (aExp == 0 && aSig == 0) {
+ return packFloat16(aSign, 0, 0);
+ }
+ /* Decimal point between bits 22 and 23. */
+ aSig |= 0x00800000;
+ aExp -= 0x7f;
+ if (aExp < -14) {
+ mask = 0x00ffffff;
+ if (aExp >= -24) {
+ mask >>= 25 + aExp;
+ }
+ } else {
+ mask = 0x00001fff;
+ }
+ if (aSig & mask) {
+ float_raise( float_flag_underflow STATUS_VAR );
+ roundingMode = STATUS(float_rounding_mode);
+ switch (roundingMode) {
+ case float_round_nearest_even:
+ increment = (mask + 1) >> 1;
+ if ((aSig & mask) == increment) {
+ increment = aSig & (increment << 1);
+ }
+ break;
+ case float_round_up:
+ increment = aSign ? 0 : mask;
+ break;
+ case float_round_down:
+ increment = aSign ? mask : 0;
+ break;
+ default: /* round_to_zero */
+ increment = 0;
+ break;
+ }
+ aSig += increment;
+ if (aSig >= 0x01000000) {
+ aSig >>= 1;
+ aExp++;
+ }
+ } else if (aExp < -14
+ && STATUS(float_detect_tininess) == float_tininess_before_rounding) {
+ float_raise( float_flag_underflow STATUS_VAR);
+ }
+
+ if (ieee) {
+ if (aExp > 15) {
+ float_raise( float_flag_overflow | float_flag_inexact STATUS_VAR);
+ return packFloat16(aSign, 0x1f, 0);
+ }
+ } else {
+ if (aExp > 16) {
+ float_raise(float_flag_invalid | float_flag_inexact STATUS_VAR);
+ return packFloat16(aSign, 0x1f, 0x3ff);
+ }
+ }
+ if (aExp < -24) {
+ return packFloat16(aSign, 0, 0);
+ }
+ if (aExp < -14) {
+ aSig >>= -14 - aExp;
+ aExp = -14;
+ }
+ return packFloat16(aSign, aExp + 14, aSig >> 13);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the extended double-precision floating-point format. The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 float64_to_floatx80( float64 a STATUS_PARAM )
+{
+ flag aSign;
+ int_fast16_t aExp;
+ uint64_t aSig;
+
+ a = float64_squash_input_denormal(a STATUS_VAR);
+ aSig = extractFloat64Frac( a );
+ aExp = extractFloat64Exp( a );
+ aSign = extractFloat64Sign( a );
+ if ( aExp == 0x7FF ) {
+ if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
+ return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
+ normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+ }
+ return
+ packFloatx80(
+ aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the quadruple-precision floating-point format. The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float64_to_float128( float64 a STATUS_PARAM )
+{
+ flag aSign;
+ int_fast16_t aExp;
+ uint64_t aSig, zSig0, zSig1;
+
+ a = float64_squash_input_denormal(a STATUS_VAR);
+ aSig = extractFloat64Frac( a );
+ aExp = extractFloat64Exp( a );
+ aSign = extractFloat64Sign( a );
+ if ( aExp == 0x7FF ) {
+ if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
+ return packFloat128( aSign, 0x7FFF, 0, 0 );
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
+ normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+ --aExp;
+ }
+ shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
+ return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Rounds the double-precision floating-point value `a' to an integer, and
+| returns the result as a double-precision floating-point value. The
+| operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_round_to_int( float64 a STATUS_PARAM )
+{
+ flag aSign;
+ int_fast16_t aExp;
+ uint64_t lastBitMask, roundBitsMask;
+ int8 roundingMode;
+ uint64_t z;
+ a = float64_squash_input_denormal(a STATUS_VAR);
+
+ aExp = extractFloat64Exp( a );
+ if ( 0x433 <= aExp ) {
+ if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
+ return propagateFloat64NaN( a, a STATUS_VAR );
+ }
+ return a;
+ }
+ if ( aExp < 0x3FF ) {
+ if ( (uint64_t) ( float64_val(a)<<1 ) == 0 ) return a;
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ aSign = extractFloat64Sign( a );
+ switch ( STATUS(float_rounding_mode) ) {
+ case float_round_nearest_even:
+ if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
+ return packFloat64( aSign, 0x3FF, 0 );
+ }
+ break;
+ case float_round_down:
+ return make_float64(aSign ? LIT64( 0xBFF0000000000000 ) : 0);
+ case float_round_up:
+ return make_float64(
+ aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ));
+ }
+ return packFloat64( aSign, 0, 0 );
+ }
+ lastBitMask = 1;
+ lastBitMask <<= 0x433 - aExp;
+ roundBitsMask = lastBitMask - 1;
+ z = float64_val(a);
+ roundingMode = STATUS(float_rounding_mode);
+ if ( roundingMode == float_round_nearest_even ) {
+ z += lastBitMask>>1;
+ if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
+ }
+ else if ( roundingMode != float_round_to_zero ) {
+ if ( extractFloat64Sign( make_float64(z) ) ^ ( roundingMode == float_round_up ) ) {
+ z += roundBitsMask;
+ }
+ }
+ z &= ~ roundBitsMask;
+ if ( z != float64_val(a) )
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ return make_float64(z);
+
+}
+
+float64 float64_trunc_to_int( float64 a STATUS_PARAM)
+{
+ int oldmode;
+ float64 res;
+ oldmode = STATUS(float_rounding_mode);
+ STATUS(float_rounding_mode) = float_round_to_zero;
+ res = float64_round_to_int(a STATUS_VAR);
+ STATUS(float_rounding_mode) = oldmode;
+ return res;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the double-precision
+| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
+| before being returned. `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float64 addFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
+{
+ int_fast16_t aExp, bExp, zExp;
+ uint64_t aSig, bSig, zSig;
+ int_fast16_t expDiff;
+
+ aSig = extractFloat64Frac( a );
+ aExp = extractFloat64Exp( a );
+ bSig = extractFloat64Frac( b );
+ bExp = extractFloat64Exp( b );
+ expDiff = aExp - bExp;
+ aSig <<= 9;
+ bSig <<= 9;
+ if ( 0 < expDiff ) {
+ if ( aExp == 0x7FF ) {
+ if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
+ return a;
+ }
+ if ( bExp == 0 ) {
+ --expDiff;
+ }
+ else {
+ bSig |= LIT64( 0x2000000000000000 );
+ }
+ shift64RightJamming( bSig, expDiff, &bSig );
+ zExp = aExp;
+ }
+ else if ( expDiff < 0 ) {
+ if ( bExp == 0x7FF ) {
+ if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
+ return packFloat64( zSign, 0x7FF, 0 );
+ }
+ if ( aExp == 0 ) {
+ ++expDiff;
+ }
+ else {
+ aSig |= LIT64( 0x2000000000000000 );
+ }
+ shift64RightJamming( aSig, - expDiff, &aSig );
+ zExp = bExp;
+ }
+ else {
+ if ( aExp == 0x7FF ) {
+ if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
+ return a;
+ }
+ if ( aExp == 0 ) {
+ if (STATUS(flush_to_zero)) {
+ if (aSig | bSig) {
+ float_raise(float_flag_output_denormal STATUS_VAR);
+ }
+ return packFloat64(zSign, 0, 0);
+ }
+ return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
+ }
+ zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
+ zExp = aExp;
+ goto roundAndPack;
+ }
+ aSig |= LIT64( 0x2000000000000000 );
+ zSig = ( aSig + bSig )<<1;
+ --zExp;
+ if ( (int64_t) zSig < 0 ) {
+ zSig = aSig + bSig;
+ ++zExp;
+ }
+ roundAndPack:
+ return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the double-
+| precision floating-point values `a' and `b'. If `zSign' is 1, the
+| difference is negated before being returned. `zSign' is ignored if the
+| result is a NaN. The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float64 subFloat64Sigs( float64 a, float64 b, flag zSign STATUS_PARAM )
+{
+ int_fast16_t aExp, bExp, zExp;
+ uint64_t aSig, bSig, zSig;
+ int_fast16_t expDiff;
+
+ aSig = extractFloat64Frac( a );
+ aExp = extractFloat64Exp( a );
+ bSig = extractFloat64Frac( b );
+ bExp = extractFloat64Exp( b );
+ expDiff = aExp - bExp;
+ aSig <<= 10;
+ bSig <<= 10;
+ if ( 0 < expDiff ) goto aExpBigger;
+ if ( expDiff < 0 ) goto bExpBigger;
+ if ( aExp == 0x7FF ) {
+ if ( aSig | bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float64_default_nan;
+ }
+ if ( aExp == 0 ) {
+ aExp = 1;
+ bExp = 1;
+ }
+ if ( bSig < aSig ) goto aBigger;
+ if ( aSig < bSig ) goto bBigger;
+ return packFloat64( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
+ bExpBigger:
+ if ( bExp == 0x7FF ) {
+ if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
+ return packFloat64( zSign ^ 1, 0x7FF, 0 );
+ }
+ if ( aExp == 0 ) {
+ ++expDiff;
+ }
+ else {
+ aSig |= LIT64( 0x4000000000000000 );
+ }
+ shift64RightJamming( aSig, - expDiff, &aSig );
+ bSig |= LIT64( 0x4000000000000000 );
+ bBigger:
+ zSig = bSig - aSig;
+ zExp = bExp;
+ zSign ^= 1;
+ goto normalizeRoundAndPack;
+ aExpBigger:
+ if ( aExp == 0x7FF ) {
+ if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
+ return a;
+ }
+ if ( bExp == 0 ) {
+ --expDiff;
+ }
+ else {
+ bSig |= LIT64( 0x4000000000000000 );
+ }
+ shift64RightJamming( bSig, expDiff, &bSig );
+ aSig |= LIT64( 0x4000000000000000 );
+ aBigger:
+ zSig = aSig - bSig;
+ zExp = aExp;
+ normalizeRoundAndPack:
+ --zExp;
+ return normalizeRoundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the double-precision floating-point values `a'
+| and `b'. The operation is performed according to the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_add( float64 a, float64 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+ a = float64_squash_input_denormal(a STATUS_VAR);
+ b = float64_squash_input_denormal(b STATUS_VAR);
+
+ aSign = extractFloat64Sign( a );
+ bSign = extractFloat64Sign( b );
+ if ( aSign == bSign ) {
+ return addFloat64Sigs( a, b, aSign STATUS_VAR );
+ }
+ else {
+ return subFloat64Sigs( a, b, aSign STATUS_VAR );
+ }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the double-precision floating-point values
+| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_sub( float64 a, float64 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+ a = float64_squash_input_denormal(a STATUS_VAR);
+ b = float64_squash_input_denormal(b STATUS_VAR);
+
+ aSign = extractFloat64Sign( a );
+ bSign = extractFloat64Sign( b );
+ if ( aSign == bSign ) {
+ return subFloat64Sigs( a, b, aSign STATUS_VAR );
+ }
+ else {
+ return addFloat64Sigs( a, b, aSign STATUS_VAR );
+ }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the double-precision floating-point values
+| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_mul( float64 a, float64 b STATUS_PARAM )
+{
+ flag aSign, bSign, zSign;
+ int_fast16_t aExp, bExp, zExp;
+ uint64_t aSig, bSig, zSig0, zSig1;
+
+ a = float64_squash_input_denormal(a STATUS_VAR);
+ b = float64_squash_input_denormal(b STATUS_VAR);
+
+ aSig = extractFloat64Frac( a );
+ aExp = extractFloat64Exp( a );
+ aSign = extractFloat64Sign( a );
+ bSig = extractFloat64Frac( b );
+ bExp = extractFloat64Exp( b );
+ bSign = extractFloat64Sign( b );
+ zSign = aSign ^ bSign;
+ if ( aExp == 0x7FF ) {
+ if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
+ return propagateFloat64NaN( a, b STATUS_VAR );
+ }
+ if ( ( bExp | bSig ) == 0 ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float64_default_nan;
+ }
+ return packFloat64( zSign, 0x7FF, 0 );
+ }
+ if ( bExp == 0x7FF ) {
+ if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
+ if ( ( aExp | aSig ) == 0 ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float64_default_nan;
+ }
+ return packFloat64( zSign, 0x7FF, 0 );
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
+ normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+ }
+ if ( bExp == 0 ) {
+ if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
+ normalizeFloat64Subnormal( bSig, &bExp, &bSig );
+ }
+ zExp = aExp + bExp - 0x3FF;
+ aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
+ bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
+ mul64To128( aSig, bSig, &zSig0, &zSig1 );
+ zSig0 |= ( zSig1 != 0 );
+ if ( 0 <= (int64_t) ( zSig0<<1 ) ) {
+ zSig0 <<= 1;
+ --zExp;
+ }
+ return roundAndPackFloat64( zSign, zExp, zSig0 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of dividing the double-precision floating-point value `a'
+| by the corresponding value `b'. The operation is performed according to
+| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_div( float64 a, float64 b STATUS_PARAM )
+{
+ flag aSign, bSign, zSign;
+ int_fast16_t aExp, bExp, zExp;
+ uint64_t aSig, bSig, zSig;
+ uint64_t rem0, rem1;
+ uint64_t term0, term1;
+ a = float64_squash_input_denormal(a STATUS_VAR);
+ b = float64_squash_input_denormal(b STATUS_VAR);
+
+ aSig = extractFloat64Frac( a );
+ aExp = extractFloat64Exp( a );
+ aSign = extractFloat64Sign( a );
+ bSig = extractFloat64Frac( b );
+ bExp = extractFloat64Exp( b );
+ bSign = extractFloat64Sign( b );
+ zSign = aSign ^ bSign;
+ if ( aExp == 0x7FF ) {
+ if ( aSig ) return propagateFloat64NaN( a, b STATUS_VAR );
+ if ( bExp == 0x7FF ) {
+ if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float64_default_nan;
+ }
+ return packFloat64( zSign, 0x7FF, 0 );
+ }
+ if ( bExp == 0x7FF ) {
+ if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
+ return packFloat64( zSign, 0, 0 );
+ }
+ if ( bExp == 0 ) {
+ if ( bSig == 0 ) {
+ if ( ( aExp | aSig ) == 0 ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float64_default_nan;
+ }
+ float_raise( float_flag_divbyzero STATUS_VAR);
+ return packFloat64( zSign, 0x7FF, 0 );
+ }
+ normalizeFloat64Subnormal( bSig, &bExp, &bSig );
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
+ normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+ }
+ zExp = aExp - bExp + 0x3FD;
+ aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
+ bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
+ if ( bSig <= ( aSig + aSig ) ) {
+ aSig >>= 1;
+ ++zExp;
+ }
+ zSig = estimateDiv128To64( aSig, 0, bSig );
+ if ( ( zSig & 0x1FF ) <= 2 ) {
+ mul64To128( bSig, zSig, &term0, &term1 );
+ sub128( aSig, 0, term0, term1, &rem0, &rem1 );
+ while ( (int64_t) rem0 < 0 ) {
+ --zSig;
+ add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
+ }
+ zSig |= ( rem1 != 0 );
+ }
+ return roundAndPackFloat64( zSign, zExp, zSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the remainder of the double-precision floating-point value `a'
+| with respect to the corresponding value `b'. The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_rem( float64 a, float64 b STATUS_PARAM )
+{
+ flag aSign, zSign;
+ int_fast16_t aExp, bExp, expDiff;
+ uint64_t aSig, bSig;
+ uint64_t q, alternateASig;
+ int64_t sigMean;
+
+ a = float64_squash_input_denormal(a STATUS_VAR);
+ b = float64_squash_input_denormal(b STATUS_VAR);
+ aSig = extractFloat64Frac( a );
+ aExp = extractFloat64Exp( a );
+ aSign = extractFloat64Sign( a );
+ bSig = extractFloat64Frac( b );
+ bExp = extractFloat64Exp( b );
+ if ( aExp == 0x7FF ) {
+ if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
+ return propagateFloat64NaN( a, b STATUS_VAR );
+ }
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float64_default_nan;
+ }
+ if ( bExp == 0x7FF ) {
+ if ( bSig ) return propagateFloat64NaN( a, b STATUS_VAR );
+ return a;
+ }
+ if ( bExp == 0 ) {
+ if ( bSig == 0 ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float64_default_nan;
+ }
+ normalizeFloat64Subnormal( bSig, &bExp, &bSig );
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return a;
+ normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+ }
+ expDiff = aExp - bExp;
+ aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
+ bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
+ if ( expDiff < 0 ) {
+ if ( expDiff < -1 ) return a;
+ aSig >>= 1;
+ }
+ q = ( bSig <= aSig );
+ if ( q ) aSig -= bSig;
+ expDiff -= 64;
+ while ( 0 < expDiff ) {
+ q = estimateDiv128To64( aSig, 0, bSig );
+ q = ( 2 < q ) ? q - 2 : 0;
+ aSig = - ( ( bSig>>2 ) * q );
+ expDiff -= 62;
+ }
+ expDiff += 64;
+ if ( 0 < expDiff ) {
+ q = estimateDiv128To64( aSig, 0, bSig );
+ q = ( 2 < q ) ? q - 2 : 0;
+ q >>= 64 - expDiff;
+ bSig >>= 2;
+ aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
+ }
+ else {
+ aSig >>= 2;
+ bSig >>= 2;
+ }
+ do {
+ alternateASig = aSig;
+ ++q;
+ aSig -= bSig;
+ } while ( 0 <= (int64_t) aSig );
+ sigMean = aSig + alternateASig;
+ if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
+ aSig = alternateASig;
+ }
+ zSign = ( (int64_t) aSig < 0 );
+ if ( zSign ) aSig = - aSig;
+ return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the double-precision floating-point values
+| `a' and `b' then adding 'c', with no intermediate rounding step after the
+| multiplication. The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic 754-2008.
+| The flags argument allows the caller to select negation of the
+| addend, the intermediate product, or the final result. (The difference
+| between this and having the caller do a separate negation is that negating
+| externally will flip the sign bit on NaNs.)
+*----------------------------------------------------------------------------*/
+
+float64 float64_muladd(float64 a, float64 b, float64 c, int flags STATUS_PARAM)
+{
+ flag aSign, bSign, cSign, zSign;
+ int_fast16_t aExp, bExp, cExp, pExp, zExp, expDiff;
+ uint64_t aSig, bSig, cSig;
+ flag pInf, pZero, pSign;
+ uint64_t pSig0, pSig1, cSig0, cSig1, zSig0, zSig1;
+ int shiftcount;
+ flag signflip, infzero;
+
+ a = float64_squash_input_denormal(a STATUS_VAR);
+ b = float64_squash_input_denormal(b STATUS_VAR);
+ c = float64_squash_input_denormal(c STATUS_VAR);
+ aSig = extractFloat64Frac(a);
+ aExp = extractFloat64Exp(a);
+ aSign = extractFloat64Sign(a);
+ bSig = extractFloat64Frac(b);
+ bExp = extractFloat64Exp(b);
+ bSign = extractFloat64Sign(b);
+ cSig = extractFloat64Frac(c);
+ cExp = extractFloat64Exp(c);
+ cSign = extractFloat64Sign(c);
+
+ infzero = ((aExp == 0 && aSig == 0 && bExp == 0x7ff && bSig == 0) ||
+ (aExp == 0x7ff && aSig == 0 && bExp == 0 && bSig == 0));
+
+ /* It is implementation-defined whether the cases of (0,inf,qnan)
+ * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
+ * they return if they do), so we have to hand this information
+ * off to the target-specific pick-a-NaN routine.
+ */
+ if (((aExp == 0x7ff) && aSig) ||
+ ((bExp == 0x7ff) && bSig) ||
+ ((cExp == 0x7ff) && cSig)) {
+ return propagateFloat64MulAddNaN(a, b, c, infzero STATUS_VAR);
+ }
+
+ if (infzero) {
+ float_raise(float_flag_invalid STATUS_VAR);
+ return float64_default_nan;
+ }
+
+ if (flags & float_muladd_negate_c) {
+ cSign ^= 1;
+ }
+
+ signflip = (flags & float_muladd_negate_result) ? 1 : 0;
+
+ /* Work out the sign and type of the product */
+ pSign = aSign ^ bSign;
+ if (flags & float_muladd_negate_product) {
+ pSign ^= 1;
+ }
+ pInf = (aExp == 0x7ff) || (bExp == 0x7ff);
+ pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0);
+
+ if (cExp == 0x7ff) {
+ if (pInf && (pSign ^ cSign)) {
+ /* addition of opposite-signed infinities => InvalidOperation */
+ float_raise(float_flag_invalid STATUS_VAR);
+ return float64_default_nan;
+ }
+ /* Otherwise generate an infinity of the same sign */
+ return packFloat64(cSign ^ signflip, 0x7ff, 0);
+ }
+
+ if (pInf) {
+ return packFloat64(pSign ^ signflip, 0x7ff, 0);
+ }
+
+ if (pZero) {
+ if (cExp == 0) {
+ if (cSig == 0) {
+ /* Adding two exact zeroes */
+ if (pSign == cSign) {
+ zSign = pSign;
+ } else if (STATUS(float_rounding_mode) == float_round_down) {
+ zSign = 1;
+ } else {
+ zSign = 0;
+ }
+ return packFloat64(zSign ^ signflip, 0, 0);
+ }
+ /* Exact zero plus a denorm */
+ if (STATUS(flush_to_zero)) {
+ float_raise(float_flag_output_denormal STATUS_VAR);
+ return packFloat64(cSign ^ signflip, 0, 0);
+ }
+ }
+ /* Zero plus something non-zero : just return the something */
+ return make_float64(float64_val(c) ^ ((uint64_t)signflip << 63));
+ }
+
+ if (aExp == 0) {
+ normalizeFloat64Subnormal(aSig, &aExp, &aSig);
+ }
+ if (bExp == 0) {
+ normalizeFloat64Subnormal(bSig, &bExp, &bSig);
+ }
+
+ /* Calculate the actual result a * b + c */
+
+ /* Multiply first; this is easy. */
+ /* NB: we subtract 0x3fe where float64_mul() subtracts 0x3ff
+ * because we want the true exponent, not the "one-less-than"
+ * flavour that roundAndPackFloat64() takes.
+ */
+ pExp = aExp + bExp - 0x3fe;
+ aSig = (aSig | LIT64(0x0010000000000000))<<10;
+ bSig = (bSig | LIT64(0x0010000000000000))<<11;
+ mul64To128(aSig, bSig, &pSig0, &pSig1);
+ if ((int64_t)(pSig0 << 1) >= 0) {
+ shortShift128Left(pSig0, pSig1, 1, &pSig0, &pSig1);
+ pExp--;
+ }
+
+ zSign = pSign ^ signflip;
+
+ /* Now [pSig0:pSig1] is the significand of the multiply, with the explicit
+ * bit in position 126.
+ */
+ if (cExp == 0) {
+ if (!cSig) {
+ /* Throw out the special case of c being an exact zero now */
+ shift128RightJamming(pSig0, pSig1, 64, &pSig0, &pSig1);
+ return roundAndPackFloat64(zSign, pExp - 1,
+ pSig1 STATUS_VAR);
+ }
+ normalizeFloat64Subnormal(cSig, &cExp, &cSig);
+ }
+
+ /* Shift cSig and add the explicit bit so [cSig0:cSig1] is the
+ * significand of the addend, with the explicit bit in position 126.
+ */
+ cSig0 = cSig << (126 - 64 - 52);
+ cSig1 = 0;
+ cSig0 |= LIT64(0x4000000000000000);
+ expDiff = pExp - cExp;
+
+ if (pSign == cSign) {
+ /* Addition */
+ if (expDiff > 0) {
+ /* scale c to match p */
+ shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
+ zExp = pExp;
+ } else if (expDiff < 0) {
+ /* scale p to match c */
+ shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
+ zExp = cExp;
+ } else {
+ /* no scaling needed */
+ zExp = cExp;
+ }
+ /* Add significands and make sure explicit bit ends up in posn 126 */
+ add128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
+ if ((int64_t)zSig0 < 0) {
+ shift128RightJamming(zSig0, zSig1, 1, &zSig0, &zSig1);
+ } else {
+ zExp--;
+ }
+ shift128RightJamming(zSig0, zSig1, 64, &zSig0, &zSig1);
+ return roundAndPackFloat64(zSign, zExp, zSig1 STATUS_VAR);
+ } else {
+ /* Subtraction */
+ if (expDiff > 0) {
+ shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
+ sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
+ zExp = pExp;
+ } else if (expDiff < 0) {
+ shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
+ sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
+ zExp = cExp;
+ zSign ^= 1;
+ } else {
+ zExp = pExp;
+ if (lt128(cSig0, cSig1, pSig0, pSig1)) {
+ sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
+ } else if (lt128(pSig0, pSig1, cSig0, cSig1)) {
+ sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
+ zSign ^= 1;
+ } else {
+ /* Exact zero */
+ zSign = signflip;
+ if (STATUS(float_rounding_mode) == float_round_down) {
+ zSign ^= 1;
+ }
+ return packFloat64(zSign, 0, 0);
+ }
+ }
+ --zExp;
+ /* Do the equivalent of normalizeRoundAndPackFloat64() but
+ * starting with the significand in a pair of uint64_t.
+ */
+ if (zSig0) {
+ shiftcount = countLeadingZeros64(zSig0) - 1;
+ shortShift128Left(zSig0, zSig1, shiftcount, &zSig0, &zSig1);
+ if (zSig1) {
+ zSig0 |= 1;
+ }
+ zExp -= shiftcount;
+ } else {
+ shiftcount = countLeadingZeros64(zSig1) - 1;
+ zSig0 = zSig1 << shiftcount;
+ zExp -= (shiftcount + 64);
+ }
+ return roundAndPackFloat64(zSign, zExp, zSig0 STATUS_VAR);
+ }
+}
+
+/*----------------------------------------------------------------------------
+| Returns the square root of the double-precision floating-point value `a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_sqrt( float64 a STATUS_PARAM )
+{
+ flag aSign;
+ int_fast16_t aExp, zExp;
+ uint64_t aSig, zSig, doubleZSig;
+ uint64_t rem0, rem1, term0, term1;
+ a = float64_squash_input_denormal(a STATUS_VAR);
+
+ aSig = extractFloat64Frac( a );
+ aExp = extractFloat64Exp( a );
+ aSign = extractFloat64Sign( a );
+ if ( aExp == 0x7FF ) {
+ if ( aSig ) return propagateFloat64NaN( a, a STATUS_VAR );
+ if ( ! aSign ) return a;
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float64_default_nan;
+ }
+ if ( aSign ) {
+ if ( ( aExp | aSig ) == 0 ) return a;
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float64_default_nan;
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return float64_zero;
+ normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+ }
+ zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
+ aSig |= LIT64( 0x0010000000000000 );
+ zSig = estimateSqrt32( aExp, aSig>>21 );
+ aSig <<= 9 - ( aExp & 1 );
+ zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
+ if ( ( zSig & 0x1FF ) <= 5 ) {
+ doubleZSig = zSig<<1;
+ mul64To128( zSig, zSig, &term0, &term1 );
+ sub128( aSig, 0, term0, term1, &rem0, &rem1 );
+ while ( (int64_t) rem0 < 0 ) {
+ --zSig;
+ doubleZSig -= 2;
+ add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
+ }
+ zSig |= ( ( rem0 | rem1 ) != 0 );
+ }
+ return roundAndPackFloat64( 0, zExp, zSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the binary log of the double-precision floating-point value `a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+float64 float64_log2( float64 a STATUS_PARAM )
+{
+ flag aSign, zSign;
+ int_fast16_t aExp;
+ uint64_t aSig, aSig0, aSig1, zSig, i;
+ a = float64_squash_input_denormal(a STATUS_VAR);
+
+ aSig = extractFloat64Frac( a );
+ aExp = extractFloat64Exp( a );
+ aSign = extractFloat64Sign( a );
+
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return packFloat64( 1, 0x7FF, 0 );
+ normalizeFloat64Subnormal( aSig, &aExp, &aSig );
+ }
+ if ( aSign ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return float64_default_nan;
+ }
+ if ( aExp == 0x7FF ) {
+ if ( aSig ) return propagateFloat64NaN( a, float64_zero STATUS_VAR );
+ return a;
+ }
+
+ aExp -= 0x3FF;
+ aSig |= LIT64( 0x0010000000000000 );
+ zSign = aExp < 0;
+ zSig = (uint64_t)aExp << 52;
+ for (i = 1LL << 51; i > 0; i >>= 1) {
+ mul64To128( aSig, aSig, &aSig0, &aSig1 );
+ aSig = ( aSig0 << 12 ) | ( aSig1 >> 52 );
+ if ( aSig & LIT64( 0x0020000000000000 ) ) {
+ aSig >>= 1;
+ zSig |= i;
+ }
+ }
+
+ if ( zSign )
+ zSig = -zSig;
+ return normalizeRoundAndPackFloat64( zSign, 0x408, zSig STATUS_VAR );
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is equal to the
+| corresponding value `b', and 0 otherwise. The invalid exception is raised
+| if either operand is a NaN. Otherwise, the comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float64_eq( float64 a, float64 b STATUS_PARAM )
+{
+ uint64_t av, bv;
+ a = float64_squash_input_denormal(a STATUS_VAR);
+ b = float64_squash_input_denormal(b STATUS_VAR);
+
+ if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+ || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+ ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return 0;
+ }
+ av = float64_val(a);
+ bv = float64_val(b);
+ return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is less than or
+| equal to the corresponding value `b', and 0 otherwise. The invalid
+| exception is raised if either operand is a NaN. The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float64_le( float64 a, float64 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+ uint64_t av, bv;
+ a = float64_squash_input_denormal(a STATUS_VAR);
+ b = float64_squash_input_denormal(b STATUS_VAR);
+
+ if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+ || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+ ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return 0;
+ }
+ aSign = extractFloat64Sign( a );
+ bSign = extractFloat64Sign( b );
+ av = float64_val(a);
+ bv = float64_val(b);
+ if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
+ return ( av == bv ) || ( aSign ^ ( av < bv ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise. The invalid exception is
+| raised if either operand is a NaN. The comparison is performed according
+| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float64_lt( float64 a, float64 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+ uint64_t av, bv;
+
+ a = float64_squash_input_denormal(a STATUS_VAR);
+ b = float64_squash_input_denormal(b STATUS_VAR);
+ if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+ || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+ ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return 0;
+ }
+ aSign = extractFloat64Sign( a );
+ bSign = extractFloat64Sign( b );
+ av = float64_val(a);
+ bv = float64_val(b);
+ if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
+ return ( av != bv ) && ( aSign ^ ( av < bv ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point values `a' and `b' cannot
+| be compared, and 0 otherwise. The invalid exception is raised if either
+| operand is a NaN. The comparison is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float64_unordered( float64 a, float64 b STATUS_PARAM )
+{
+ a = float64_squash_input_denormal(a STATUS_VAR);
+ b = float64_squash_input_denormal(b STATUS_VAR);
+
+ if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+ || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+ ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return 1;
+ }
+ return 0;
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is equal to the
+| corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
+| exception.The comparison is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float64_eq_quiet( float64 a, float64 b STATUS_PARAM )
+{
+ uint64_t av, bv;
+ a = float64_squash_input_denormal(a STATUS_VAR);
+ b = float64_squash_input_denormal(b STATUS_VAR);
+
+ if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+ || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+ ) {
+ if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ }
+ return 0;
+ }
+ av = float64_val(a);
+ bv = float64_val(b);
+ return ( av == bv ) || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is less than or
+| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
+| cause an exception. Otherwise, the comparison is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float64_le_quiet( float64 a, float64 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+ uint64_t av, bv;
+ a = float64_squash_input_denormal(a STATUS_VAR);
+ b = float64_squash_input_denormal(b STATUS_VAR);
+
+ if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+ || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+ ) {
+ if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ }
+ return 0;
+ }
+ aSign = extractFloat64Sign( a );
+ bSign = extractFloat64Sign( b );
+ av = float64_val(a);
+ bv = float64_val(b);
+ if ( aSign != bSign ) return aSign || ( (uint64_t) ( ( av | bv )<<1 ) == 0 );
+ return ( av == bv ) || ( aSign ^ ( av < bv ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
+| exception. Otherwise, the comparison is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float64_lt_quiet( float64 a, float64 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+ uint64_t av, bv;
+ a = float64_squash_input_denormal(a STATUS_VAR);
+ b = float64_squash_input_denormal(b STATUS_VAR);
+
+ if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+ || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+ ) {
+ if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ }
+ return 0;
+ }
+ aSign = extractFloat64Sign( a );
+ bSign = extractFloat64Sign( b );
+ av = float64_val(a);
+ bv = float64_val(b);
+ if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av | bv )<<1 ) != 0 );
+ return ( av != bv ) && ( aSign ^ ( av < bv ) );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point values `a' and `b' cannot
+| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
+| comparison is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float64_unordered_quiet( float64 a, float64 b STATUS_PARAM )
+{
+ a = float64_squash_input_denormal(a STATUS_VAR);
+ b = float64_squash_input_denormal(b STATUS_VAR);
+
+ if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
+ || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
+ ) {
+ if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ }
+ return 1;
+ }
+ return 0;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 32-bit two's complement integer format. The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic---which means in particular that the conversion
+| is rounded according to the current rounding mode. If `a' is a NaN, the
+| largest positive integer is returned. Otherwise, if the conversion
+| overflows, the largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int32 floatx80_to_int32( floatx80 a STATUS_PARAM )
+{
+ flag aSign;
+ int32 aExp, shiftCount;
+ uint64_t aSig;
+
+ aSig = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ aSign = extractFloatx80Sign( a );
+ if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
+ shiftCount = 0x4037 - aExp;
+ if ( shiftCount <= 0 ) shiftCount = 1;
+ shift64RightJamming( aSig, shiftCount, &aSig );
+ return roundAndPackInt32( aSign, aSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 32-bit two's complement integer format. The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic, except that the conversion is always rounded
+| toward zero. If `a' is a NaN, the largest positive integer is returned.
+| Otherwise, if the conversion overflows, the largest integer with the same
+| sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int32 floatx80_to_int32_round_to_zero( floatx80 a STATUS_PARAM )
+{
+ flag aSign;
+ int32 aExp, shiftCount;
+ uint64_t aSig, savedASig;
+ int32_t z;
+
+ aSig = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ aSign = extractFloatx80Sign( a );
+ if ( 0x401E < aExp ) {
+ if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
+ goto invalid;
+ }
+ else if ( aExp < 0x3FFF ) {
+ if ( aExp || aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
+ return 0;
+ }
+ shiftCount = 0x403E - aExp;
+ savedASig = aSig;
+ aSig >>= shiftCount;
+ z = aSig;
+ if ( aSign ) z = - z;
+ if ( ( z < 0 ) ^ aSign ) {
+ invalid:
+ float_raise( float_flag_invalid STATUS_VAR);
+ return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
+ }
+ if ( ( aSig<<shiftCount ) != savedASig ) {
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ }
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 64-bit two's complement integer format. The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic---which means in particular that the conversion
+| is rounded according to the current rounding mode. If `a' is a NaN,
+| the largest positive integer is returned. Otherwise, if the conversion
+| overflows, the largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int64 floatx80_to_int64( floatx80 a STATUS_PARAM )
+{
+ flag aSign;
+ int32 aExp, shiftCount;
+ uint64_t aSig, aSigExtra;
+
+ aSig = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ aSign = extractFloatx80Sign( a );
+ shiftCount = 0x403E - aExp;
+ if ( shiftCount <= 0 ) {
+ if ( shiftCount ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ if ( ! aSign
+ || ( ( aExp == 0x7FFF )
+ && ( aSig != LIT64( 0x8000000000000000 ) ) )
+ ) {
+ return LIT64( 0x7FFFFFFFFFFFFFFF );
+ }
+ return (int64_t) LIT64( 0x8000000000000000 );
+ }
+ aSigExtra = 0;
+ }
+ else {
+ shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
+ }
+ return roundAndPackInt64( aSign, aSig, aSigExtra STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 64-bit two's complement integer format. The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic, except that the conversion is always rounded
+| toward zero. If `a' is a NaN, the largest positive integer is returned.
+| Otherwise, if the conversion overflows, the largest integer with the same
+| sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int64 floatx80_to_int64_round_to_zero( floatx80 a STATUS_PARAM )
+{
+ flag aSign;
+ int32 aExp, shiftCount;
+ uint64_t aSig;
+ int64 z;
+
+ aSig = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ aSign = extractFloatx80Sign( a );
+ shiftCount = aExp - 0x403E;
+ if ( 0 <= shiftCount ) {
+ aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
+ if ( ( a.high != 0xC03E ) || aSig ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
+ return LIT64( 0x7FFFFFFFFFFFFFFF );
+ }
+ }
+ return (int64_t) LIT64( 0x8000000000000000 );
+ }
+ else if ( aExp < 0x3FFF ) {
+ if ( aExp | aSig ) STATUS(float_exception_flags) |= float_flag_inexact;
+ return 0;
+ }
+ z = aSig>>( - shiftCount );
+ if ( (uint64_t) ( aSig<<( shiftCount & 63 ) ) ) {
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ }
+ if ( aSign ) z = - z;
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the single-precision floating-point format. The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 floatx80_to_float32( floatx80 a STATUS_PARAM )
+{
+ flag aSign;
+ int32 aExp;
+ uint64_t aSig;
+
+ aSig = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ aSign = extractFloatx80Sign( a );
+ if ( aExp == 0x7FFF ) {
+ if ( (uint64_t) ( aSig<<1 ) ) {
+ return commonNaNToFloat32( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
+ }
+ return packFloat32( aSign, 0xFF, 0 );
+ }
+ shift64RightJamming( aSig, 33, &aSig );
+ if ( aExp || aSig ) aExp -= 0x3F81;
+ return roundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the double-precision floating-point format. The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 floatx80_to_float64( floatx80 a STATUS_PARAM )
+{
+ flag aSign;
+ int32 aExp;
+ uint64_t aSig, zSig;
+
+ aSig = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ aSign = extractFloatx80Sign( a );
+ if ( aExp == 0x7FFF ) {
+ if ( (uint64_t) ( aSig<<1 ) ) {
+ return commonNaNToFloat64( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
+ }
+ return packFloat64( aSign, 0x7FF, 0 );
+ }
+ shift64RightJamming( aSig, 1, &zSig );
+ if ( aExp || aSig ) aExp -= 0x3C01;
+ return roundAndPackFloat64( aSign, aExp, zSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the quadruple-precision floating-point format. The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 floatx80_to_float128( floatx80 a STATUS_PARAM )
+{
+ flag aSign;
+ int_fast16_t aExp;
+ uint64_t aSig, zSig0, zSig1;
+
+ aSig = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ aSign = extractFloatx80Sign( a );
+ if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) {
+ return commonNaNToFloat128( floatx80ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
+ }
+ shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
+ return packFloat128( aSign, aExp, zSig0, zSig1 );
+
+}
+
+/*----------------------------------------------------------------------------
+| Rounds the extended double-precision floating-point value `a' to an integer,
+| and returns the result as an extended quadruple-precision floating-point
+| value. The operation is performed according to the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_round_to_int( floatx80 a STATUS_PARAM )
+{
+ flag aSign;
+ int32 aExp;
+ uint64_t lastBitMask, roundBitsMask;
+ int8 roundingMode;
+ floatx80 z;
+
+ aExp = extractFloatx80Exp( a );
+ if ( 0x403E <= aExp ) {
+ if ( ( aExp == 0x7FFF ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) {
+ return propagateFloatx80NaN( a, a STATUS_VAR );
+ }
+ return a;
+ }
+ if ( aExp < 0x3FFF ) {
+ if ( ( aExp == 0 )
+ && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
+ return a;
+ }
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ aSign = extractFloatx80Sign( a );
+ switch ( STATUS(float_rounding_mode) ) {
+ case float_round_nearest_even:
+ if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
+ ) {
+ return
+ packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
+ }
+ break;
+ case float_round_down:
+ return
+ aSign ?
+ packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
+ : packFloatx80( 0, 0, 0 );
+ case float_round_up:
+ return
+ aSign ? packFloatx80( 1, 0, 0 )
+ : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
+ }
+ return packFloatx80( aSign, 0, 0 );
+ }
+ lastBitMask = 1;
+ lastBitMask <<= 0x403E - aExp;
+ roundBitsMask = lastBitMask - 1;
+ z = a;
+ roundingMode = STATUS(float_rounding_mode);
+ if ( roundingMode == float_round_nearest_even ) {
+ z.low += lastBitMask>>1;
+ if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
+ }
+ else if ( roundingMode != float_round_to_zero ) {
+ if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
+ z.low += roundBitsMask;
+ }
+ }
+ z.low &= ~ roundBitsMask;
+ if ( z.low == 0 ) {
+ ++z.high;
+ z.low = LIT64( 0x8000000000000000 );
+ }
+ if ( z.low != a.low ) STATUS(float_exception_flags) |= float_flag_inexact;
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the extended double-
+| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
+| negated before being returned. `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM)
+{
+ int32 aExp, bExp, zExp;
+ uint64_t aSig, bSig, zSig0, zSig1;
+ int32 expDiff;
+
+ aSig = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ bSig = extractFloatx80Frac( b );
+ bExp = extractFloatx80Exp( b );
+ expDiff = aExp - bExp;
+ if ( 0 < expDiff ) {
+ if ( aExp == 0x7FFF ) {
+ if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+ return a;
+ }
+ if ( bExp == 0 ) --expDiff;
+ shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
+ zExp = aExp;
+ }
+ else if ( expDiff < 0 ) {
+ if ( bExp == 0x7FFF ) {
+ if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+ return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+ }
+ if ( aExp == 0 ) ++expDiff;
+ shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
+ zExp = bExp;
+ }
+ else {
+ if ( aExp == 0x7FFF ) {
+ if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
+ return propagateFloatx80NaN( a, b STATUS_VAR );
+ }
+ return a;
+ }
+ zSig1 = 0;
+ zSig0 = aSig + bSig;
+ if ( aExp == 0 ) {
+ normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
+ goto roundAndPack;
+ }
+ zExp = aExp;
+ goto shiftRight1;
+ }
+ zSig0 = aSig + bSig;
+ if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
+ shiftRight1:
+ shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
+ zSig0 |= LIT64( 0x8000000000000000 );
+ ++zExp;
+ roundAndPack:
+ return
+ roundAndPackFloatx80(
+ STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the extended
+| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
+| difference is negated before being returned. `zSign' is ignored if the
+| result is a NaN. The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign STATUS_PARAM )
+{
+ int32 aExp, bExp, zExp;
+ uint64_t aSig, bSig, zSig0, zSig1;
+ int32 expDiff;
+ floatx80 z;
+
+ aSig = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ bSig = extractFloatx80Frac( b );
+ bExp = extractFloatx80Exp( b );
+ expDiff = aExp - bExp;
+ if ( 0 < expDiff ) goto aExpBigger;
+ if ( expDiff < 0 ) goto bExpBigger;
+ if ( aExp == 0x7FFF ) {
+ if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
+ return propagateFloatx80NaN( a, b STATUS_VAR );
+ }
+ float_raise( float_flag_invalid STATUS_VAR);
+ z.low = floatx80_default_nan_low;
+ z.high = floatx80_default_nan_high;
+ return z;
+ }
+ if ( aExp == 0 ) {
+ aExp = 1;
+ bExp = 1;
+ }
+ zSig1 = 0;
+ if ( bSig < aSig ) goto aBigger;
+ if ( aSig < bSig ) goto bBigger;
+ return packFloatx80( STATUS(float_rounding_mode) == float_round_down, 0, 0 );
+ bExpBigger:
+ if ( bExp == 0x7FFF ) {
+ if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+ return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
+ }
+ if ( aExp == 0 ) ++expDiff;
+ shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
+ bBigger:
+ sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
+ zExp = bExp;
+ zSign ^= 1;
+ goto normalizeRoundAndPack;
+ aExpBigger:
+ if ( aExp == 0x7FFF ) {
+ if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+ return a;
+ }
+ if ( bExp == 0 ) --expDiff;
+ shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
+ aBigger:
+ sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
+ zExp = aExp;
+ normalizeRoundAndPack:
+ return
+ normalizeRoundAndPackFloatx80(
+ STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the extended double-precision floating-point
+| values `a' and `b'. The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_add( floatx80 a, floatx80 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+
+ aSign = extractFloatx80Sign( a );
+ bSign = extractFloatx80Sign( b );
+ if ( aSign == bSign ) {
+ return addFloatx80Sigs( a, b, aSign STATUS_VAR );
+ }
+ else {
+ return subFloatx80Sigs( a, b, aSign STATUS_VAR );
+ }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the extended double-precision floating-
+| point values `a' and `b'. The operation is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_sub( floatx80 a, floatx80 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+
+ aSign = extractFloatx80Sign( a );
+ bSign = extractFloatx80Sign( b );
+ if ( aSign == bSign ) {
+ return subFloatx80Sigs( a, b, aSign STATUS_VAR );
+ }
+ else {
+ return addFloatx80Sigs( a, b, aSign STATUS_VAR );
+ }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the extended double-precision floating-
+| point values `a' and `b'. The operation is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_mul( floatx80 a, floatx80 b STATUS_PARAM )
+{
+ flag aSign, bSign, zSign;
+ int32 aExp, bExp, zExp;
+ uint64_t aSig, bSig, zSig0, zSig1;
+ floatx80 z;
+
+ aSig = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ aSign = extractFloatx80Sign( a );
+ bSig = extractFloatx80Frac( b );
+ bExp = extractFloatx80Exp( b );
+ bSign = extractFloatx80Sign( b );
+ zSign = aSign ^ bSign;
+ if ( aExp == 0x7FFF ) {
+ if ( (uint64_t) ( aSig<<1 )
+ || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
+ return propagateFloatx80NaN( a, b STATUS_VAR );
+ }
+ if ( ( bExp | bSig ) == 0 ) goto invalid;
+ return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+ }
+ if ( bExp == 0x7FFF ) {
+ if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+ if ( ( aExp | aSig ) == 0 ) {
+ invalid:
+ float_raise( float_flag_invalid STATUS_VAR);
+ z.low = floatx80_default_nan_low;
+ z.high = floatx80_default_nan_high;
+ return z;
+ }
+ return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
+ normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
+ }
+ if ( bExp == 0 ) {
+ if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
+ normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
+ }
+ zExp = aExp + bExp - 0x3FFE;
+ mul64To128( aSig, bSig, &zSig0, &zSig1 );
+ if ( 0 < (int64_t) zSig0 ) {
+ shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
+ --zExp;
+ }
+ return
+ roundAndPackFloatx80(
+ STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of dividing the extended double-precision floating-point
+| value `a' by the corresponding value `b'. The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_div( floatx80 a, floatx80 b STATUS_PARAM )
+{
+ flag aSign, bSign, zSign;
+ int32 aExp, bExp, zExp;
+ uint64_t aSig, bSig, zSig0, zSig1;
+ uint64_t rem0, rem1, rem2, term0, term1, term2;
+ floatx80 z;
+
+ aSig = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ aSign = extractFloatx80Sign( a );
+ bSig = extractFloatx80Frac( b );
+ bExp = extractFloatx80Exp( b );
+ bSign = extractFloatx80Sign( b );
+ zSign = aSign ^ bSign;
+ if ( aExp == 0x7FFF ) {
+ if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+ if ( bExp == 0x7FFF ) {
+ if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+ goto invalid;
+ }
+ return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+ }
+ if ( bExp == 0x7FFF ) {
+ if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+ return packFloatx80( zSign, 0, 0 );
+ }
+ if ( bExp == 0 ) {
+ if ( bSig == 0 ) {
+ if ( ( aExp | aSig ) == 0 ) {
+ invalid:
+ float_raise( float_flag_invalid STATUS_VAR);
+ z.low = floatx80_default_nan_low;
+ z.high = floatx80_default_nan_high;
+ return z;
+ }
+ float_raise( float_flag_divbyzero STATUS_VAR);
+ return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+ }
+ normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
+ }
+ if ( aExp == 0 ) {
+ if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
+ normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
+ }
+ zExp = aExp - bExp + 0x3FFE;
+ rem1 = 0;
+ if ( bSig <= aSig ) {
+ shift128Right( aSig, 0, 1, &aSig, &rem1 );
+ ++zExp;
+ }
+ zSig0 = estimateDiv128To64( aSig, rem1, bSig );
+ mul64To128( bSig, zSig0, &term0, &term1 );
+ sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
+ while ( (int64_t) rem0 < 0 ) {
+ --zSig0;
+ add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
+ }
+ zSig1 = estimateDiv128To64( rem1, 0, bSig );
+ if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
+ mul64To128( bSig, zSig1, &term1, &term2 );
+ sub128( rem1, 0, term1, term2, &rem1, &rem2 );
+ while ( (int64_t) rem1 < 0 ) {
+ --zSig1;
+ add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
+ }
+ zSig1 |= ( ( rem1 | rem2 ) != 0 );
+ }
+ return
+ roundAndPackFloatx80(
+ STATUS(floatx80_rounding_precision), zSign, zExp, zSig0, zSig1 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the remainder of the extended double-precision floating-point value
+| `a' with respect to the corresponding value `b'. The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_rem( floatx80 a, floatx80 b STATUS_PARAM )
+{
+ flag aSign, zSign;
+ int32 aExp, bExp, expDiff;
+ uint64_t aSig0, aSig1, bSig;
+ uint64_t q, term0, term1, alternateASig0, alternateASig1;
+ floatx80 z;
+
+ aSig0 = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ aSign = extractFloatx80Sign( a );
+ bSig = extractFloatx80Frac( b );
+ bExp = extractFloatx80Exp( b );
+ if ( aExp == 0x7FFF ) {
+ if ( (uint64_t) ( aSig0<<1 )
+ || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
+ return propagateFloatx80NaN( a, b STATUS_VAR );
+ }
+ goto invalid;
+ }
+ if ( bExp == 0x7FFF ) {
+ if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b STATUS_VAR );
+ return a;
+ }
+ if ( bExp == 0 ) {
+ if ( bSig == 0 ) {
+ invalid:
+ float_raise( float_flag_invalid STATUS_VAR);
+ z.low = floatx80_default_nan_low;
+ z.high = floatx80_default_nan_high;
+ return z;
+ }
+ normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
+ }
+ if ( aExp == 0 ) {
+ if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
+ normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
+ }
+ bSig |= LIT64( 0x8000000000000000 );
+ zSign = aSign;
+ expDiff = aExp - bExp;
+ aSig1 = 0;
+ if ( expDiff < 0 ) {
+ if ( expDiff < -1 ) return a;
+ shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
+ expDiff = 0;
+ }
+ q = ( bSig <= aSig0 );
+ if ( q ) aSig0 -= bSig;
+ expDiff -= 64;
+ while ( 0 < expDiff ) {
+ q = estimateDiv128To64( aSig0, aSig1, bSig );
+ q = ( 2 < q ) ? q - 2 : 0;
+ mul64To128( bSig, q, &term0, &term1 );
+ sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
+ shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
+ expDiff -= 62;
+ }
+ expDiff += 64;
+ if ( 0 < expDiff ) {
+ q = estimateDiv128To64( aSig0, aSig1, bSig );
+ q = ( 2 < q ) ? q - 2 : 0;
+ q >>= 64 - expDiff;
+ mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
+ sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
+ shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
+ while ( le128( term0, term1, aSig0, aSig1 ) ) {
+ ++q;
+ sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
+ }
+ }
+ else {
+ term1 = 0;
+ term0 = bSig;
+ }
+ sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
+ if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
+ || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
+ && ( q & 1 ) )
+ ) {
+ aSig0 = alternateASig0;
+ aSig1 = alternateASig1;
+ zSign = ! zSign;
+ }
+ return
+ normalizeRoundAndPackFloatx80(
+ 80, zSign, bExp + expDiff, aSig0, aSig1 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the square root of the extended double-precision floating-point
+| value `a'. The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_sqrt( floatx80 a STATUS_PARAM )
+{
+ flag aSign;
+ int32 aExp, zExp;
+ uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
+ uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+ floatx80 z;
+
+ aSig0 = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ aSign = extractFloatx80Sign( a );
+ if ( aExp == 0x7FFF ) {
+ if ( (uint64_t) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a STATUS_VAR );
+ if ( ! aSign ) return a;
+ goto invalid;
+ }
+ if ( aSign ) {
+ if ( ( aExp | aSig0 ) == 0 ) return a;
+ invalid:
+ float_raise( float_flag_invalid STATUS_VAR);
+ z.low = floatx80_default_nan_low;
+ z.high = floatx80_default_nan_high;
+ return z;
+ }
+ if ( aExp == 0 ) {
+ if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
+ normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
+ }
+ zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
+ zSig0 = estimateSqrt32( aExp, aSig0>>32 );
+ shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
+ zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
+ doubleZSig0 = zSig0<<1;
+ mul64To128( zSig0, zSig0, &term0, &term1 );
+ sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
+ while ( (int64_t) rem0 < 0 ) {
+ --zSig0;
+ doubleZSig0 -= 2;
+ add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
+ }
+ zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
+ if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
+ if ( zSig1 == 0 ) zSig1 = 1;
+ mul64To128( doubleZSig0, zSig1, &term1, &term2 );
+ sub128( rem1, 0, term1, term2, &rem1, &rem2 );
+ mul64To128( zSig1, zSig1, &term2, &term3 );
+ sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
+ while ( (int64_t) rem1 < 0 ) {
+ --zSig1;
+ shortShift128Left( 0, zSig1, 1, &term2, &term3 );
+ term3 |= 1;
+ term2 |= doubleZSig0;
+ add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
+ }
+ zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
+ }
+ shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
+ zSig0 |= doubleZSig0;
+ return
+ roundAndPackFloatx80(
+ STATUS(floatx80_rounding_precision), 0, zExp, zSig0, zSig1 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is equal
+| to the corresponding value `b', and 0 otherwise. The invalid exception is
+| raised if either operand is a NaN. Otherwise, the comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int floatx80_eq( floatx80 a, floatx80 b STATUS_PARAM )
+{
+
+ if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
+ && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
+ || ( ( extractFloatx80Exp( b ) == 0x7FFF )
+ && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
+ ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return 0;
+ }
+ return
+ ( a.low == b.low )
+ && ( ( a.high == b.high )
+ || ( ( a.low == 0 )
+ && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
+ );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is
+| less than or equal to the corresponding value `b', and 0 otherwise. The
+| invalid exception is raised if either operand is a NaN. The comparison is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int floatx80_le( floatx80 a, floatx80 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+
+ if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
+ && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
+ || ( ( extractFloatx80Exp( b ) == 0x7FFF )
+ && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
+ ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return 0;
+ }
+ aSign = extractFloatx80Sign( a );
+ bSign = extractFloatx80Sign( b );
+ if ( aSign != bSign ) {
+ return
+ aSign
+ || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+ == 0 );
+ }
+ return
+ aSign ? le128( b.high, b.low, a.high, a.low )
+ : le128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is
+| less than the corresponding value `b', and 0 otherwise. The invalid
+| exception is raised if either operand is a NaN. The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int floatx80_lt( floatx80 a, floatx80 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+
+ if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
+ && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
+ || ( ( extractFloatx80Exp( b ) == 0x7FFF )
+ && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
+ ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return 0;
+ }
+ aSign = extractFloatx80Sign( a );
+ bSign = extractFloatx80Sign( b );
+ if ( aSign != bSign ) {
+ return
+ aSign
+ && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+ != 0 );
+ }
+ return
+ aSign ? lt128( b.high, b.low, a.high, a.low )
+ : lt128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point values `a' and `b'
+| cannot be compared, and 0 otherwise. The invalid exception is raised if
+| either operand is a NaN. The comparison is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+int floatx80_unordered( floatx80 a, floatx80 b STATUS_PARAM )
+{
+ if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
+ && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
+ || ( ( extractFloatx80Exp( b ) == 0x7FFF )
+ && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
+ ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return 1;
+ }
+ return 0;
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is
+| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
+| cause an exception. The comparison is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int floatx80_eq_quiet( floatx80 a, floatx80 b STATUS_PARAM )
+{
+
+ if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
+ && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
+ || ( ( extractFloatx80Exp( b ) == 0x7FFF )
+ && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
+ ) {
+ if ( floatx80_is_signaling_nan( a )
+ || floatx80_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ }
+ return 0;
+ }
+ return
+ ( a.low == b.low )
+ && ( ( a.high == b.high )
+ || ( ( a.low == 0 )
+ && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
+ );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is less
+| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
+| do not cause an exception. Otherwise, the comparison is performed according
+| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int floatx80_le_quiet( floatx80 a, floatx80 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+
+ if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
+ && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
+ || ( ( extractFloatx80Exp( b ) == 0x7FFF )
+ && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
+ ) {
+ if ( floatx80_is_signaling_nan( a )
+ || floatx80_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ }
+ return 0;
+ }
+ aSign = extractFloatx80Sign( a );
+ bSign = extractFloatx80Sign( b );
+ if ( aSign != bSign ) {
+ return
+ aSign
+ || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+ == 0 );
+ }
+ return
+ aSign ? le128( b.high, b.low, a.high, a.low )
+ : le128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is less
+| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
+| an exception. Otherwise, the comparison is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int floatx80_lt_quiet( floatx80 a, floatx80 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+
+ if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
+ && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
+ || ( ( extractFloatx80Exp( b ) == 0x7FFF )
+ && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
+ ) {
+ if ( floatx80_is_signaling_nan( a )
+ || floatx80_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ }
+ return 0;
+ }
+ aSign = extractFloatx80Sign( a );
+ bSign = extractFloatx80Sign( b );
+ if ( aSign != bSign ) {
+ return
+ aSign
+ && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+ != 0 );
+ }
+ return
+ aSign ? lt128( b.high, b.low, a.high, a.low )
+ : lt128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point values `a' and `b'
+| cannot be compared, and 0 otherwise. Quiet NaNs do not cause an exception.
+| The comparison is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+int floatx80_unordered_quiet( floatx80 a, floatx80 b STATUS_PARAM )
+{
+ if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
+ && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
+ || ( ( extractFloatx80Exp( b ) == 0x7FFF )
+ && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
+ ) {
+ if ( floatx80_is_signaling_nan( a )
+ || floatx80_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ }
+ return 1;
+ }
+ return 0;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the 32-bit two's complement integer format. The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode. If `a' is a NaN, the largest
+| positive integer is returned. Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int32 float128_to_int32( float128 a STATUS_PARAM )
+{
+ flag aSign;
+ int32 aExp, shiftCount;
+ uint64_t aSig0, aSig1;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ aSign = extractFloat128Sign( a );
+ if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
+ if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
+ aSig0 |= ( aSig1 != 0 );
+ shiftCount = 0x4028 - aExp;
+ if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
+ return roundAndPackInt32( aSign, aSig0 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the 32-bit two's complement integer format. The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero. If
+| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
+| conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int32 float128_to_int32_round_to_zero( float128 a STATUS_PARAM )
+{
+ flag aSign;
+ int32 aExp, shiftCount;
+ uint64_t aSig0, aSig1, savedASig;
+ int32_t z;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ aSign = extractFloat128Sign( a );
+ aSig0 |= ( aSig1 != 0 );
+ if ( 0x401E < aExp ) {
+ if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
+ goto invalid;
+ }
+ else if ( aExp < 0x3FFF ) {
+ if ( aExp || aSig0 ) STATUS(float_exception_flags) |= float_flag_inexact;
+ return 0;
+ }
+ aSig0 |= LIT64( 0x0001000000000000 );
+ shiftCount = 0x402F - aExp;
+ savedASig = aSig0;
+ aSig0 >>= shiftCount;
+ z = aSig0;
+ if ( aSign ) z = - z;
+ if ( ( z < 0 ) ^ aSign ) {
+ invalid:
+ float_raise( float_flag_invalid STATUS_VAR);
+ return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
+ }
+ if ( ( aSig0<<shiftCount ) != savedASig ) {
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ }
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the 64-bit two's complement integer format. The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic---which means in particular that the conversion is rounded
+| according to the current rounding mode. If `a' is a NaN, the largest
+| positive integer is returned. Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int64 float128_to_int64( float128 a STATUS_PARAM )
+{
+ flag aSign;
+ int32 aExp, shiftCount;
+ uint64_t aSig0, aSig1;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ aSign = extractFloat128Sign( a );
+ if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
+ shiftCount = 0x402F - aExp;
+ if ( shiftCount <= 0 ) {
+ if ( 0x403E < aExp ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ if ( ! aSign
+ || ( ( aExp == 0x7FFF )
+ && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
+ )
+ ) {
+ return LIT64( 0x7FFFFFFFFFFFFFFF );
+ }
+ return (int64_t) LIT64( 0x8000000000000000 );
+ }
+ shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
+ }
+ else {
+ shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
+ }
+ return roundAndPackInt64( aSign, aSig0, aSig1 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the 64-bit two's complement integer format. The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
+| the conversion overflows, the largest integer with the same sign as `a' is
+| returned.
+*----------------------------------------------------------------------------*/
+
+int64 float128_to_int64_round_to_zero( float128 a STATUS_PARAM )
+{
+ flag aSign;
+ int32 aExp, shiftCount;
+ uint64_t aSig0, aSig1;
+ int64 z;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ aSign = extractFloat128Sign( a );
+ if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
+ shiftCount = aExp - 0x402F;
+ if ( 0 < shiftCount ) {
+ if ( 0x403E <= aExp ) {
+ aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
+ if ( ( a.high == LIT64( 0xC03E000000000000 ) )
+ && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
+ if ( aSig1 ) STATUS(float_exception_flags) |= float_flag_inexact;
+ }
+ else {
+ float_raise( float_flag_invalid STATUS_VAR);
+ if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
+ return LIT64( 0x7FFFFFFFFFFFFFFF );
+ }
+ }
+ return (int64_t) LIT64( 0x8000000000000000 );
+ }
+ z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
+ if ( (uint64_t) ( aSig1<<shiftCount ) ) {
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ }
+ }
+ else {
+ if ( aExp < 0x3FFF ) {
+ if ( aExp | aSig0 | aSig1 ) {
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ }
+ return 0;
+ }
+ z = aSig0>>( - shiftCount );
+ if ( aSig1
+ || ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & 63 ) ) ) ) {
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ }
+ }
+ if ( aSign ) z = - z;
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the single-precision floating-point format. The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float128_to_float32( float128 a STATUS_PARAM )
+{
+ flag aSign;
+ int32 aExp;
+ uint64_t aSig0, aSig1;
+ uint32_t zSig;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ aSign = extractFloat128Sign( a );
+ if ( aExp == 0x7FFF ) {
+ if ( aSig0 | aSig1 ) {
+ return commonNaNToFloat32( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
+ }
+ return packFloat32( aSign, 0xFF, 0 );
+ }
+ aSig0 |= ( aSig1 != 0 );
+ shift64RightJamming( aSig0, 18, &aSig0 );
+ zSig = aSig0;
+ if ( aExp || zSig ) {
+ zSig |= 0x40000000;
+ aExp -= 0x3F81;
+ }
+ return roundAndPackFloat32( aSign, aExp, zSig STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the double-precision floating-point format. The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float128_to_float64( float128 a STATUS_PARAM )
+{
+ flag aSign;
+ int32 aExp;
+ uint64_t aSig0, aSig1;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ aSign = extractFloat128Sign( a );
+ if ( aExp == 0x7FFF ) {
+ if ( aSig0 | aSig1 ) {
+ return commonNaNToFloat64( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
+ }
+ return packFloat64( aSign, 0x7FF, 0 );
+ }
+ shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
+ aSig0 |= ( aSig1 != 0 );
+ if ( aExp || aSig0 ) {
+ aSig0 |= LIT64( 0x4000000000000000 );
+ aExp -= 0x3C01;
+ }
+ return roundAndPackFloat64( aSign, aExp, aSig0 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the extended double-precision floating-point format. The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 float128_to_floatx80( float128 a STATUS_PARAM )
+{
+ flag aSign;
+ int32 aExp;
+ uint64_t aSig0, aSig1;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ aSign = extractFloat128Sign( a );
+ if ( aExp == 0x7FFF ) {
+ if ( aSig0 | aSig1 ) {
+ return commonNaNToFloatx80( float128ToCommonNaN( a STATUS_VAR ) STATUS_VAR );
+ }
+ return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
+ }
+ if ( aExp == 0 ) {
+ if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
+ normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+ }
+ else {
+ aSig0 |= LIT64( 0x0001000000000000 );
+ }
+ shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
+ return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Rounds the quadruple-precision floating-point value `a' to an integer, and
+| returns the result as a quadruple-precision floating-point value. The
+| operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_round_to_int( float128 a STATUS_PARAM )
+{
+ flag aSign;
+ int32 aExp;
+ uint64_t lastBitMask, roundBitsMask;
+ int8 roundingMode;
+ float128 z;
+
+ aExp = extractFloat128Exp( a );
+ if ( 0x402F <= aExp ) {
+ if ( 0x406F <= aExp ) {
+ if ( ( aExp == 0x7FFF )
+ && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
+ ) {
+ return propagateFloat128NaN( a, a STATUS_VAR );
+ }
+ return a;
+ }
+ lastBitMask = 1;
+ lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
+ roundBitsMask = lastBitMask - 1;
+ z = a;
+ roundingMode = STATUS(float_rounding_mode);
+ if ( roundingMode == float_round_nearest_even ) {
+ if ( lastBitMask ) {
+ add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
+ if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
+ }
+ else {
+ if ( (int64_t) z.low < 0 ) {
+ ++z.high;
+ if ( (uint64_t) ( z.low<<1 ) == 0 ) z.high &= ~1;
+ }
+ }
+ }
+ else if ( roundingMode != float_round_to_zero ) {
+ if ( extractFloat128Sign( z )
+ ^ ( roundingMode == float_round_up ) ) {
+ add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
+ }
+ }
+ z.low &= ~ roundBitsMask;
+ }
+ else {
+ if ( aExp < 0x3FFF ) {
+ if ( ( ( (uint64_t) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ aSign = extractFloat128Sign( a );
+ switch ( STATUS(float_rounding_mode) ) {
+ case float_round_nearest_even:
+ if ( ( aExp == 0x3FFE )
+ && ( extractFloat128Frac0( a )
+ | extractFloat128Frac1( a ) )
+ ) {
+ return packFloat128( aSign, 0x3FFF, 0, 0 );
+ }
+ break;
+ case float_round_down:
+ return
+ aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
+ : packFloat128( 0, 0, 0, 0 );
+ case float_round_up:
+ return
+ aSign ? packFloat128( 1, 0, 0, 0 )
+ : packFloat128( 0, 0x3FFF, 0, 0 );
+ }
+ return packFloat128( aSign, 0, 0, 0 );
+ }
+ lastBitMask = 1;
+ lastBitMask <<= 0x402F - aExp;
+ roundBitsMask = lastBitMask - 1;
+ z.low = 0;
+ z.high = a.high;
+ roundingMode = STATUS(float_rounding_mode);
+ if ( roundingMode == float_round_nearest_even ) {
+ z.high += lastBitMask>>1;
+ if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
+ z.high &= ~ lastBitMask;
+ }
+ }
+ else if ( roundingMode != float_round_to_zero ) {
+ if ( extractFloat128Sign( z )
+ ^ ( roundingMode == float_round_up ) ) {
+ z.high |= ( a.low != 0 );
+ z.high += roundBitsMask;
+ }
+ }
+ z.high &= ~ roundBitsMask;
+ }
+ if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
+ STATUS(float_exception_flags) |= float_flag_inexact;
+ }
+ return z;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the quadruple-precision
+| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
+| before being returned. `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float128 addFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
+{
+ int32 aExp, bExp, zExp;
+ uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
+ int32 expDiff;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ bSig1 = extractFloat128Frac1( b );
+ bSig0 = extractFloat128Frac0( b );
+ bExp = extractFloat128Exp( b );
+ expDiff = aExp - bExp;
+ if ( 0 < expDiff ) {
+ if ( aExp == 0x7FFF ) {
+ if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
+ return a;
+ }
+ if ( bExp == 0 ) {
+ --expDiff;
+ }
+ else {
+ bSig0 |= LIT64( 0x0001000000000000 );
+ }
+ shift128ExtraRightJamming(
+ bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
+ zExp = aExp;
+ }
+ else if ( expDiff < 0 ) {
+ if ( bExp == 0x7FFF ) {
+ if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
+ return packFloat128( zSign, 0x7FFF, 0, 0 );
+ }
+ if ( aExp == 0 ) {
+ ++expDiff;
+ }
+ else {
+ aSig0 |= LIT64( 0x0001000000000000 );
+ }
+ shift128ExtraRightJamming(
+ aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
+ zExp = bExp;
+ }
+ else {
+ if ( aExp == 0x7FFF ) {
+ if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
+ return propagateFloat128NaN( a, b STATUS_VAR );
+ }
+ return a;
+ }
+ add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
+ if ( aExp == 0 ) {
+ if (STATUS(flush_to_zero)) {
+ if (zSig0 | zSig1) {
+ float_raise(float_flag_output_denormal STATUS_VAR);
+ }
+ return packFloat128(zSign, 0, 0, 0);
+ }
+ return packFloat128( zSign, 0, zSig0, zSig1 );
+ }
+ zSig2 = 0;
+ zSig0 |= LIT64( 0x0002000000000000 );
+ zExp = aExp;
+ goto shiftRight1;
+ }
+ aSig0 |= LIT64( 0x0001000000000000 );
+ add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
+ --zExp;
+ if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
+ ++zExp;
+ shiftRight1:
+ shift128ExtraRightJamming(
+ zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
+ roundAndPack:
+ return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the quadruple-
+| precision floating-point values `a' and `b'. If `zSign' is 1, the
+| difference is negated before being returned. `zSign' is ignored if the
+| result is a NaN. The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float128 subFloat128Sigs( float128 a, float128 b, flag zSign STATUS_PARAM)
+{
+ int32 aExp, bExp, zExp;
+ uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
+ int32 expDiff;
+ float128 z;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ bSig1 = extractFloat128Frac1( b );
+ bSig0 = extractFloat128Frac0( b );
+ bExp = extractFloat128Exp( b );
+ expDiff = aExp - bExp;
+ shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
+ shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
+ if ( 0 < expDiff ) goto aExpBigger;
+ if ( expDiff < 0 ) goto bExpBigger;
+ if ( aExp == 0x7FFF ) {
+ if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
+ return propagateFloat128NaN( a, b STATUS_VAR );
+ }
+ float_raise( float_flag_invalid STATUS_VAR);
+ z.low = float128_default_nan_low;
+ z.high = float128_default_nan_high;
+ return z;
+ }
+ if ( aExp == 0 ) {
+ aExp = 1;
+ bExp = 1;
+ }
+ if ( bSig0 < aSig0 ) goto aBigger;
+ if ( aSig0 < bSig0 ) goto bBigger;
+ if ( bSig1 < aSig1 ) goto aBigger;
+ if ( aSig1 < bSig1 ) goto bBigger;
+ return packFloat128( STATUS(float_rounding_mode) == float_round_down, 0, 0, 0 );
+ bExpBigger:
+ if ( bExp == 0x7FFF ) {
+ if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
+ return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
+ }
+ if ( aExp == 0 ) {
+ ++expDiff;
+ }
+ else {
+ aSig0 |= LIT64( 0x4000000000000000 );
+ }
+ shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
+ bSig0 |= LIT64( 0x4000000000000000 );
+ bBigger:
+ sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
+ zExp = bExp;
+ zSign ^= 1;
+ goto normalizeRoundAndPack;
+ aExpBigger:
+ if ( aExp == 0x7FFF ) {
+ if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
+ return a;
+ }
+ if ( bExp == 0 ) {
+ --expDiff;
+ }
+ else {
+ bSig0 |= LIT64( 0x4000000000000000 );
+ }
+ shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
+ aSig0 |= LIT64( 0x4000000000000000 );
+ aBigger:
+ sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
+ zExp = aExp;
+ normalizeRoundAndPack:
+ --zExp;
+ return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the quadruple-precision floating-point values
+| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_add( float128 a, float128 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+
+ aSign = extractFloat128Sign( a );
+ bSign = extractFloat128Sign( b );
+ if ( aSign == bSign ) {
+ return addFloat128Sigs( a, b, aSign STATUS_VAR );
+ }
+ else {
+ return subFloat128Sigs( a, b, aSign STATUS_VAR );
+ }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the quadruple-precision floating-point
+| values `a' and `b'. The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_sub( float128 a, float128 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+
+ aSign = extractFloat128Sign( a );
+ bSign = extractFloat128Sign( b );
+ if ( aSign == bSign ) {
+ return subFloat128Sigs( a, b, aSign STATUS_VAR );
+ }
+ else {
+ return addFloat128Sigs( a, b, aSign STATUS_VAR );
+ }
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the quadruple-precision floating-point
+| values `a' and `b'. The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_mul( float128 a, float128 b STATUS_PARAM )
+{
+ flag aSign, bSign, zSign;
+ int32 aExp, bExp, zExp;
+ uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
+ float128 z;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ aSign = extractFloat128Sign( a );
+ bSig1 = extractFloat128Frac1( b );
+ bSig0 = extractFloat128Frac0( b );
+ bExp = extractFloat128Exp( b );
+ bSign = extractFloat128Sign( b );
+ zSign = aSign ^ bSign;
+ if ( aExp == 0x7FFF ) {
+ if ( ( aSig0 | aSig1 )
+ || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
+ return propagateFloat128NaN( a, b STATUS_VAR );
+ }
+ if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
+ return packFloat128( zSign, 0x7FFF, 0, 0 );
+ }
+ if ( bExp == 0x7FFF ) {
+ if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
+ if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
+ invalid:
+ float_raise( float_flag_invalid STATUS_VAR);
+ z.low = float128_default_nan_low;
+ z.high = float128_default_nan_high;
+ return z;
+ }
+ return packFloat128( zSign, 0x7FFF, 0, 0 );
+ }
+ if ( aExp == 0 ) {
+ if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
+ normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+ }
+ if ( bExp == 0 ) {
+ if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
+ normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
+ }
+ zExp = aExp + bExp - 0x4000;
+ aSig0 |= LIT64( 0x0001000000000000 );
+ shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
+ mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
+ add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
+ zSig2 |= ( zSig3 != 0 );
+ if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
+ shift128ExtraRightJamming(
+ zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
+ ++zExp;
+ }
+ return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of dividing the quadruple-precision floating-point value
+| `a' by the corresponding value `b'. The operation is performed according to
+| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_div( float128 a, float128 b STATUS_PARAM )
+{
+ flag aSign, bSign, zSign;
+ int32 aExp, bExp, zExp;
+ uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
+ uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+ float128 z;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ aSign = extractFloat128Sign( a );
+ bSig1 = extractFloat128Frac1( b );
+ bSig0 = extractFloat128Frac0( b );
+ bExp = extractFloat128Exp( b );
+ bSign = extractFloat128Sign( b );
+ zSign = aSign ^ bSign;
+ if ( aExp == 0x7FFF ) {
+ if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
+ if ( bExp == 0x7FFF ) {
+ if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
+ goto invalid;
+ }
+ return packFloat128( zSign, 0x7FFF, 0, 0 );
+ }
+ if ( bExp == 0x7FFF ) {
+ if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
+ return packFloat128( zSign, 0, 0, 0 );
+ }
+ if ( bExp == 0 ) {
+ if ( ( bSig0 | bSig1 ) == 0 ) {
+ if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
+ invalid:
+ float_raise( float_flag_invalid STATUS_VAR);
+ z.low = float128_default_nan_low;
+ z.high = float128_default_nan_high;
+ return z;
+ }
+ float_raise( float_flag_divbyzero STATUS_VAR);
+ return packFloat128( zSign, 0x7FFF, 0, 0 );
+ }
+ normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
+ }
+ if ( aExp == 0 ) {
+ if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
+ normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+ }
+ zExp = aExp - bExp + 0x3FFD;
+ shortShift128Left(
+ aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
+ shortShift128Left(
+ bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
+ if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
+ shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
+ ++zExp;
+ }
+ zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
+ mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
+ sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
+ while ( (int64_t) rem0 < 0 ) {
+ --zSig0;
+ add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
+ }
+ zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
+ if ( ( zSig1 & 0x3FFF ) <= 4 ) {
+ mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
+ sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
+ while ( (int64_t) rem1 < 0 ) {
+ --zSig1;
+ add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
+ }
+ zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
+ }
+ shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
+ return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the remainder of the quadruple-precision floating-point value `a'
+| with respect to the corresponding value `b'. The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_rem( float128 a, float128 b STATUS_PARAM )
+{
+ flag aSign, zSign;
+ int32 aExp, bExp, expDiff;
+ uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
+ uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
+ int64_t sigMean0;
+ float128 z;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ aSign = extractFloat128Sign( a );
+ bSig1 = extractFloat128Frac1( b );
+ bSig0 = extractFloat128Frac0( b );
+ bExp = extractFloat128Exp( b );
+ if ( aExp == 0x7FFF ) {
+ if ( ( aSig0 | aSig1 )
+ || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
+ return propagateFloat128NaN( a, b STATUS_VAR );
+ }
+ goto invalid;
+ }
+ if ( bExp == 0x7FFF ) {
+ if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b STATUS_VAR );
+ return a;
+ }
+ if ( bExp == 0 ) {
+ if ( ( bSig0 | bSig1 ) == 0 ) {
+ invalid:
+ float_raise( float_flag_invalid STATUS_VAR);
+ z.low = float128_default_nan_low;
+ z.high = float128_default_nan_high;
+ return z;
+ }
+ normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
+ }
+ if ( aExp == 0 ) {
+ if ( ( aSig0 | aSig1 ) == 0 ) return a;
+ normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+ }
+ expDiff = aExp - bExp;
+ if ( expDiff < -1 ) return a;
+ shortShift128Left(
+ aSig0 | LIT64( 0x0001000000000000 ),
+ aSig1,
+ 15 - ( expDiff < 0 ),
+ &aSig0,
+ &aSig1
+ );
+ shortShift128Left(
+ bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
+ q = le128( bSig0, bSig1, aSig0, aSig1 );
+ if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
+ expDiff -= 64;
+ while ( 0 < expDiff ) {
+ q = estimateDiv128To64( aSig0, aSig1, bSig0 );
+ q = ( 4 < q ) ? q - 4 : 0;
+ mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
+ shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
+ shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
+ sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
+ expDiff -= 61;
+ }
+ if ( -64 < expDiff ) {
+ q = estimateDiv128To64( aSig0, aSig1, bSig0 );
+ q = ( 4 < q ) ? q - 4 : 0;
+ q >>= - expDiff;
+ shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
+ expDiff += 52;
+ if ( expDiff < 0 ) {
+ shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
+ }
+ else {
+ shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
+ }
+ mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
+ sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
+ }
+ else {
+ shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
+ shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
+ }
+ do {
+ alternateASig0 = aSig0;
+ alternateASig1 = aSig1;
+ ++q;
+ sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
+ } while ( 0 <= (int64_t) aSig0 );
+ add128(
+ aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
+ if ( ( sigMean0 < 0 )
+ || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
+ aSig0 = alternateASig0;
+ aSig1 = alternateASig1;
+ }
+ zSign = ( (int64_t) aSig0 < 0 );
+ if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
+ return
+ normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the square root of the quadruple-precision floating-point value `a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_sqrt( float128 a STATUS_PARAM )
+{
+ flag aSign;
+ int32 aExp, zExp;
+ uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
+ uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+ float128 z;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ aSign = extractFloat128Sign( a );
+ if ( aExp == 0x7FFF ) {
+ if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a STATUS_VAR );
+ if ( ! aSign ) return a;
+ goto invalid;
+ }
+ if ( aSign ) {
+ if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
+ invalid:
+ float_raise( float_flag_invalid STATUS_VAR);
+ z.low = float128_default_nan_low;
+ z.high = float128_default_nan_high;
+ return z;
+ }
+ if ( aExp == 0 ) {
+ if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
+ normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
+ }
+ zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
+ aSig0 |= LIT64( 0x0001000000000000 );
+ zSig0 = estimateSqrt32( aExp, aSig0>>17 );
+ shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
+ zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
+ doubleZSig0 = zSig0<<1;
+ mul64To128( zSig0, zSig0, &term0, &term1 );
+ sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
+ while ( (int64_t) rem0 < 0 ) {
+ --zSig0;
+ doubleZSig0 -= 2;
+ add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
+ }
+ zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
+ if ( ( zSig1 & 0x1FFF ) <= 5 ) {
+ if ( zSig1 == 0 ) zSig1 = 1;
+ mul64To128( doubleZSig0, zSig1, &term1, &term2 );
+ sub128( rem1, 0, term1, term2, &rem1, &rem2 );
+ mul64To128( zSig1, zSig1, &term2, &term3 );
+ sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
+ while ( (int64_t) rem1 < 0 ) {
+ --zSig1;
+ shortShift128Left( 0, zSig1, 1, &term2, &term3 );
+ term3 |= 1;
+ term2 |= doubleZSig0;
+ add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
+ }
+ zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
+ }
+ shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
+ return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 STATUS_VAR );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is equal to
+| the corresponding value `b', and 0 otherwise. The invalid exception is
+| raised if either operand is a NaN. Otherwise, the comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float128_eq( float128 a, float128 b STATUS_PARAM )
+{
+
+ if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
+ && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+ || ( ( extractFloat128Exp( b ) == 0x7FFF )
+ && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+ ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return 0;
+ }
+ return
+ ( a.low == b.low )
+ && ( ( a.high == b.high )
+ || ( ( a.low == 0 )
+ && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
+ );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
+| or equal to the corresponding value `b', and 0 otherwise. The invalid
+| exception is raised if either operand is a NaN. The comparison is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float128_le( float128 a, float128 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+
+ if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
+ && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+ || ( ( extractFloat128Exp( b ) == 0x7FFF )
+ && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+ ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return 0;
+ }
+ aSign = extractFloat128Sign( a );
+ bSign = extractFloat128Sign( b );
+ if ( aSign != bSign ) {
+ return
+ aSign
+ || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+ == 0 );
+ }
+ return
+ aSign ? le128( b.high, b.low, a.high, a.low )
+ : le128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise. The invalid exception is
+| raised if either operand is a NaN. The comparison is performed according
+| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float128_lt( float128 a, float128 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+
+ if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
+ && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+ || ( ( extractFloat128Exp( b ) == 0x7FFF )
+ && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+ ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return 0;
+ }
+ aSign = extractFloat128Sign( a );
+ bSign = extractFloat128Sign( b );
+ if ( aSign != bSign ) {
+ return
+ aSign
+ && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+ != 0 );
+ }
+ return
+ aSign ? lt128( b.high, b.low, a.high, a.low )
+ : lt128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
+| be compared, and 0 otherwise. The invalid exception is raised if either
+| operand is a NaN. The comparison is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float128_unordered( float128 a, float128 b STATUS_PARAM )
+{
+ if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
+ && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+ || ( ( extractFloat128Exp( b ) == 0x7FFF )
+ && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+ ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ return 1;
+ }
+ return 0;
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is equal to
+| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
+| exception. The comparison is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float128_eq_quiet( float128 a, float128 b STATUS_PARAM )
+{
+
+ if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
+ && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+ || ( ( extractFloat128Exp( b ) == 0x7FFF )
+ && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+ ) {
+ if ( float128_is_signaling_nan( a )
+ || float128_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ }
+ return 0;
+ }
+ return
+ ( a.low == b.low )
+ && ( ( a.high == b.high )
+ || ( ( a.low == 0 )
+ && ( (uint64_t) ( ( a.high | b.high )<<1 ) == 0 ) )
+ );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
+| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
+| cause an exception. Otherwise, the comparison is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float128_le_quiet( float128 a, float128 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+
+ if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
+ && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+ || ( ( extractFloat128Exp( b ) == 0x7FFF )
+ && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+ ) {
+ if ( float128_is_signaling_nan( a )
+ || float128_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ }
+ return 0;
+ }
+ aSign = extractFloat128Sign( a );
+ bSign = extractFloat128Sign( b );
+ if ( aSign != bSign ) {
+ return
+ aSign
+ || ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+ == 0 );
+ }
+ return
+ aSign ? le128( b.high, b.low, a.high, a.low )
+ : le128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is less than
+| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
+| exception. Otherwise, the comparison is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float128_lt_quiet( float128 a, float128 b STATUS_PARAM )
+{
+ flag aSign, bSign;
+
+ if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
+ && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+ || ( ( extractFloat128Exp( b ) == 0x7FFF )
+ && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+ ) {
+ if ( float128_is_signaling_nan( a )
+ || float128_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ }
+ return 0;
+ }
+ aSign = extractFloat128Sign( a );
+ bSign = extractFloat128Sign( b );
+ if ( aSign != bSign ) {
+ return
+ aSign
+ && ( ( ( (uint64_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
+ != 0 );
+ }
+ return
+ aSign ? lt128( b.high, b.low, a.high, a.low )
+ : lt128( a.high, a.low, b.high, b.low );
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
+| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
+| comparison is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+int float128_unordered_quiet( float128 a, float128 b STATUS_PARAM )
+{
+ if ( ( ( extractFloat128Exp( a ) == 0x7FFF )
+ && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
+ || ( ( extractFloat128Exp( b ) == 0x7FFF )
+ && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
+ ) {
+ if ( float128_is_signaling_nan( a )
+ || float128_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ }
+ return 1;
+ }
+ return 0;
+}
+
+/* misc functions */
+float32 uint32_to_float32( uint32 a STATUS_PARAM )
+{
+ return int64_to_float32(a STATUS_VAR);
+}
+
+float64 uint32_to_float64( uint32 a STATUS_PARAM )
+{
+ return int64_to_float64(a STATUS_VAR);
+}
+
+uint32 float32_to_uint32( float32 a STATUS_PARAM )
+{
+ int64_t v;
+ uint32 res;
+
+ v = float32_to_int64(a STATUS_VAR);
+ if (v < 0) {
+ res = 0;
+ float_raise( float_flag_invalid STATUS_VAR);
+ } else if (v > 0xffffffff) {
+ res = 0xffffffff;
+ float_raise( float_flag_invalid STATUS_VAR);
+ } else {
+ res = v;
+ }
+ return res;
+}
+
+uint32 float32_to_uint32_round_to_zero( float32 a STATUS_PARAM )
+{
+ int64_t v;
+ uint32 res;
+
+ v = float32_to_int64_round_to_zero(a STATUS_VAR);
+ if (v < 0) {
+ res = 0;
+ float_raise( float_flag_invalid STATUS_VAR);
+ } else if (v > 0xffffffff) {
+ res = 0xffffffff;
+ float_raise( float_flag_invalid STATUS_VAR);
+ } else {
+ res = v;
+ }
+ return res;
+}
+
+uint_fast16_t float32_to_uint16_round_to_zero(float32 a STATUS_PARAM)
+{
+ int64_t v;
+ uint_fast16_t res;
+
+ v = float32_to_int64_round_to_zero(a STATUS_VAR);
+ if (v < 0) {
+ res = 0;
+ float_raise( float_flag_invalid STATUS_VAR);
+ } else if (v > 0xffff) {
+ res = 0xffff;
+ float_raise( float_flag_invalid STATUS_VAR);
+ } else {
+ res = v;
+ }
+ return res;
+}
+
+uint32 float64_to_uint32( float64 a STATUS_PARAM )
+{
+ int64_t v;
+ uint32 res;
+
+ v = float64_to_int64(a STATUS_VAR);
+ if (v < 0) {
+ res = 0;
+ float_raise( float_flag_invalid STATUS_VAR);
+ } else if (v > 0xffffffff) {
+ res = 0xffffffff;
+ float_raise( float_flag_invalid STATUS_VAR);
+ } else {
+ res = v;
+ }
+ return res;
+}
+
+uint32 float64_to_uint32_round_to_zero( float64 a STATUS_PARAM )
+{
+ int64_t v;
+ uint32 res;
+
+ v = float64_to_int64_round_to_zero(a STATUS_VAR);
+ if (v < 0) {
+ res = 0;
+ float_raise( float_flag_invalid STATUS_VAR);
+ } else if (v > 0xffffffff) {
+ res = 0xffffffff;
+ float_raise( float_flag_invalid STATUS_VAR);
+ } else {
+ res = v;
+ }
+ return res;
+}
+
+uint_fast16_t float64_to_uint16_round_to_zero(float64 a STATUS_PARAM)
+{
+ int64_t v;
+ uint_fast16_t res;
+
+ v = float64_to_int64_round_to_zero(a STATUS_VAR);
+ if (v < 0) {
+ res = 0;
+ float_raise( float_flag_invalid STATUS_VAR);
+ } else if (v > 0xffff) {
+ res = 0xffff;
+ float_raise( float_flag_invalid STATUS_VAR);
+ } else {
+ res = v;
+ }
+ return res;
+}
+
+/* FIXME: This looks broken. */
+uint64_t float64_to_uint64 (float64 a STATUS_PARAM)
+{
+ int64_t v;
+
+ v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
+ v += float64_val(a);
+ v = float64_to_int64(make_float64(v) STATUS_VAR);
+
+ return v - INT64_MIN;
+}
+
+uint64_t float64_to_uint64_round_to_zero (float64 a STATUS_PARAM)
+{
+ int64_t v;
+
+ v = float64_val(int64_to_float64(INT64_MIN STATUS_VAR));
+ v += float64_val(a);
+ v = float64_to_int64_round_to_zero(make_float64(v) STATUS_VAR);
+
+ return v - INT64_MIN;
+}
+
+#define COMPARE(s, nan_exp) \
+INLINE int float ## s ## _compare_internal( float ## s a, float ## s b, \
+ int is_quiet STATUS_PARAM ) \
+{ \
+ flag aSign, bSign; \
+ uint ## s ## _t av, bv; \
+ a = float ## s ## _squash_input_denormal(a STATUS_VAR); \
+ b = float ## s ## _squash_input_denormal(b STATUS_VAR); \
+ \
+ if (( ( extractFloat ## s ## Exp( a ) == nan_exp ) && \
+ extractFloat ## s ## Frac( a ) ) || \
+ ( ( extractFloat ## s ## Exp( b ) == nan_exp ) && \
+ extractFloat ## s ## Frac( b ) )) { \
+ if (!is_quiet || \
+ float ## s ## _is_signaling_nan( a ) || \
+ float ## s ## _is_signaling_nan( b ) ) { \
+ float_raise( float_flag_invalid STATUS_VAR); \
+ } \
+ return float_relation_unordered; \
+ } \
+ aSign = extractFloat ## s ## Sign( a ); \
+ bSign = extractFloat ## s ## Sign( b ); \
+ av = float ## s ## _val(a); \
+ bv = float ## s ## _val(b); \
+ if ( aSign != bSign ) { \
+ if ( (uint ## s ## _t) ( ( av | bv )<<1 ) == 0 ) { \
+ /* zero case */ \
+ return float_relation_equal; \
+ } else { \
+ return 1 - (2 * aSign); \
+ } \
+ } else { \
+ if (av == bv) { \
+ return float_relation_equal; \
+ } else { \
+ return 1 - 2 * (aSign ^ ( av < bv )); \
+ } \
+ } \
+} \
+ \
+int float ## s ## _compare( float ## s a, float ## s b STATUS_PARAM ) \
+{ \
+ return float ## s ## _compare_internal(a, b, 0 STATUS_VAR); \
+} \
+ \
+int float ## s ## _compare_quiet( float ## s a, float ## s b STATUS_PARAM ) \
+{ \
+ return float ## s ## _compare_internal(a, b, 1 STATUS_VAR); \
+}
+
+COMPARE(32, 0xff)
+COMPARE(64, 0x7ff)
+
+INLINE int floatx80_compare_internal( floatx80 a, floatx80 b,
+ int is_quiet STATUS_PARAM )
+{
+ flag aSign, bSign;
+
+ if (( ( extractFloatx80Exp( a ) == 0x7fff ) &&
+ ( extractFloatx80Frac( a )<<1 ) ) ||
+ ( ( extractFloatx80Exp( b ) == 0x7fff ) &&
+ ( extractFloatx80Frac( b )<<1 ) )) {
+ if (!is_quiet ||
+ floatx80_is_signaling_nan( a ) ||
+ floatx80_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ }
+ return float_relation_unordered;
+ }
+ aSign = extractFloatx80Sign( a );
+ bSign = extractFloatx80Sign( b );
+ if ( aSign != bSign ) {
+
+ if ( ( ( (uint16_t) ( ( a.high | b.high ) << 1 ) ) == 0) &&
+ ( ( a.low | b.low ) == 0 ) ) {
+ /* zero case */
+ return float_relation_equal;
+ } else {
+ return 1 - (2 * aSign);
+ }
+ } else {
+ if (a.low == b.low && a.high == b.high) {
+ return float_relation_equal;
+ } else {
+ return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
+ }
+ }
+}
+
+int floatx80_compare( floatx80 a, floatx80 b STATUS_PARAM )
+{
+ return floatx80_compare_internal(a, b, 0 STATUS_VAR);
+}
+
+int floatx80_compare_quiet( floatx80 a, floatx80 b STATUS_PARAM )
+{
+ return floatx80_compare_internal(a, b, 1 STATUS_VAR);
+}
+
+INLINE int float128_compare_internal( float128 a, float128 b,
+ int is_quiet STATUS_PARAM )
+{
+ flag aSign, bSign;
+
+ if (( ( extractFloat128Exp( a ) == 0x7fff ) &&
+ ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) ||
+ ( ( extractFloat128Exp( b ) == 0x7fff ) &&
+ ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )) {
+ if (!is_quiet ||
+ float128_is_signaling_nan( a ) ||
+ float128_is_signaling_nan( b ) ) {
+ float_raise( float_flag_invalid STATUS_VAR);
+ }
+ return float_relation_unordered;
+ }
+ aSign = extractFloat128Sign( a );
+ bSign = extractFloat128Sign( b );
+ if ( aSign != bSign ) {
+ if ( ( ( ( a.high | b.high )<<1 ) | a.low | b.low ) == 0 ) {
+ /* zero case */
+ return float_relation_equal;
+ } else {
+ return 1 - (2 * aSign);
+ }
+ } else {
+ if (a.low == b.low && a.high == b.high) {
+ return float_relation_equal;
+ } else {
+ return 1 - 2 * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
+ }
+ }
+}
+
+int float128_compare( float128 a, float128 b STATUS_PARAM )
+{
+ return float128_compare_internal(a, b, 0 STATUS_VAR);
+}
+
+int float128_compare_quiet( float128 a, float128 b STATUS_PARAM )
+{
+ return float128_compare_internal(a, b, 1 STATUS_VAR);
+}
+
+/* min() and max() functions. These can't be implemented as
+ * 'compare and pick one input' because that would mishandle
+ * NaNs and +0 vs -0.
+ */
+#define MINMAX(s, nan_exp) \
+INLINE float ## s float ## s ## _minmax(float ## s a, float ## s b, \
+ int ismin STATUS_PARAM ) \
+{ \
+ flag aSign, bSign; \
+ uint ## s ## _t av, bv; \
+ a = float ## s ## _squash_input_denormal(a STATUS_VAR); \
+ b = float ## s ## _squash_input_denormal(b STATUS_VAR); \
+ if (float ## s ## _is_any_nan(a) || \
+ float ## s ## _is_any_nan(b)) { \
+ return propagateFloat ## s ## NaN(a, b STATUS_VAR); \
+ } \
+ aSign = extractFloat ## s ## Sign(a); \
+ bSign = extractFloat ## s ## Sign(b); \
+ av = float ## s ## _val(a); \
+ bv = float ## s ## _val(b); \
+ if (aSign != bSign) { \
+ if (ismin) { \
+ return aSign ? a : b; \
+ } else { \
+ return aSign ? b : a; \
+ } \
+ } else { \
+ if (ismin) { \
+ return (aSign ^ (av < bv)) ? a : b; \
+ } else { \
+ return (aSign ^ (av < bv)) ? b : a; \
+ } \
+ } \
+} \
+ \
+float ## s float ## s ## _min(float ## s a, float ## s b STATUS_PARAM) \
+{ \
+ return float ## s ## _minmax(a, b, 1 STATUS_VAR); \
+} \
+ \
+float ## s float ## s ## _max(float ## s a, float ## s b STATUS_PARAM) \
+{ \
+ return float ## s ## _minmax(a, b, 0 STATUS_VAR); \
+}
+
+MINMAX(32, 0xff)
+MINMAX(64, 0x7ff)
+
+
+/* Multiply A by 2 raised to the power N. */
+float32 float32_scalbn( float32 a, int n STATUS_PARAM )
+{
+ flag aSign;
+ int16_t aExp;
+ uint32_t aSig;
+
+ a = float32_squash_input_denormal(a STATUS_VAR);
+ aSig = extractFloat32Frac( a );
+ aExp = extractFloat32Exp( a );
+ aSign = extractFloat32Sign( a );
+
+ if ( aExp == 0xFF ) {
+ if ( aSig ) {
+ return propagateFloat32NaN( a, a STATUS_VAR );
+ }
+ return a;
+ }
+ if ( aExp != 0 )
+ aSig |= 0x00800000;
+ else if ( aSig == 0 )
+ return a;
+
+ if (n > 0x200) {
+ n = 0x200;
+ } else if (n < -0x200) {
+ n = -0x200;
+ }
+
+ aExp += n - 1;
+ aSig <<= 7;
+ return normalizeRoundAndPackFloat32( aSign, aExp, aSig STATUS_VAR );
+}
+
+float64 float64_scalbn( float64 a, int n STATUS_PARAM )
+{
+ flag aSign;
+ int16_t aExp;
+ uint64_t aSig;
+
+ a = float64_squash_input_denormal(a STATUS_VAR);
+ aSig = extractFloat64Frac( a );
+ aExp = extractFloat64Exp( a );
+ aSign = extractFloat64Sign( a );
+
+ if ( aExp == 0x7FF ) {
+ if ( aSig ) {
+ return propagateFloat64NaN( a, a STATUS_VAR );
+ }
+ return a;
+ }
+ if ( aExp != 0 )
+ aSig |= LIT64( 0x0010000000000000 );
+ else if ( aSig == 0 )
+ return a;
+
+ if (n > 0x1000) {
+ n = 0x1000;
+ } else if (n < -0x1000) {
+ n = -0x1000;
+ }
+
+ aExp += n - 1;
+ aSig <<= 10;
+ return normalizeRoundAndPackFloat64( aSign, aExp, aSig STATUS_VAR );
+}
+
+floatx80 floatx80_scalbn( floatx80 a, int n STATUS_PARAM )
+{
+ flag aSign;
+ int32_t aExp;
+ uint64_t aSig;
+
+ aSig = extractFloatx80Frac( a );
+ aExp = extractFloatx80Exp( a );
+ aSign = extractFloatx80Sign( a );
+
+ if ( aExp == 0x7FFF ) {
+ if ( aSig<<1 ) {
+ return propagateFloatx80NaN( a, a STATUS_VAR );
+ }
+ return a;
+ }
+
+ if (aExp == 0 && aSig == 0)
+ return a;
+
+ if (n > 0x10000) {
+ n = 0x10000;
+ } else if (n < -0x10000) {
+ n = -0x10000;
+ }
+
+ aExp += n;
+ return normalizeRoundAndPackFloatx80( STATUS(floatx80_rounding_precision),
+ aSign, aExp, aSig, 0 STATUS_VAR );
+}
+
+float128 float128_scalbn( float128 a, int n STATUS_PARAM )
+{
+ flag aSign;
+ int32_t aExp;
+ uint64_t aSig0, aSig1;
+
+ aSig1 = extractFloat128Frac1( a );
+ aSig0 = extractFloat128Frac0( a );
+ aExp = extractFloat128Exp( a );
+ aSign = extractFloat128Sign( a );
+ if ( aExp == 0x7FFF ) {
+ if ( aSig0 | aSig1 ) {
+ return propagateFloat128NaN( a, a STATUS_VAR );
+ }
+ return a;
+ }
+ if ( aExp != 0 )
+ aSig0 |= LIT64( 0x0001000000000000 );
+ else if ( aSig0 == 0 && aSig1 == 0 )
+ return a;
+
+ if (n > 0x10000) {
+ n = 0x10000;
+ } else if (n < -0x10000) {
+ n = -0x10000;
+ }
+
+ aExp += n - 1;
+ return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
+ STATUS_VAR );
+
+}
diff --git a/fpu/softfloat.h b/fpu/softfloat.h
new file mode 100644
index 000000000..feec3a180
--- /dev/null
+++ b/fpu/softfloat.h
@@ -0,0 +1,633 @@
+/*
+ * QEMU float support
+ *
+ * Derived from SoftFloat.
+ */
+
+/*============================================================================
+
+This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser. This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704. Funding was partially provided by the
+National Science Foundation under grant MIP-9311980. The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek. More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+#ifndef SOFTFLOAT_H
+#define SOFTFLOAT_H
+
+#if defined(CONFIG_SOLARIS) && defined(CONFIG_NEEDS_LIBSUNMATH)
+#include <sunmath.h>
+#endif
+
+#include <inttypes.h>
+#include "config-host.h"
+#include "osdep.h"
+
+/*----------------------------------------------------------------------------
+| Each of the following `typedef's defines the most convenient type that holds
+| integers of at least as many bits as specified. For example, `uint8' should
+| be the most convenient type that can hold unsigned integers of as many as
+| 8 bits. The `flag' type must be able to hold either a 0 or 1. For most
+| implementations of C, `flag', `uint8', and `int8' should all be `typedef'ed
+| to the same as `int'.
+*----------------------------------------------------------------------------*/
+typedef uint8_t flag;
+typedef uint8_t uint8;
+typedef int8_t int8;
+typedef unsigned int uint32;
+typedef signed int int32;
+typedef uint64_t uint64;
+typedef int64_t int64;
+
+#define LIT64( a ) a##LL
+#define INLINE static inline
+
+#define STATUS_PARAM , float_status *status
+#define STATUS(field) status->field
+#define STATUS_VAR , status
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point ordering relations
+*----------------------------------------------------------------------------*/
+enum {
+ float_relation_less = -1,
+ float_relation_equal = 0,
+ float_relation_greater = 1,
+ float_relation_unordered = 2
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point types.
+*----------------------------------------------------------------------------*/
+/* Use structures for soft-float types. This prevents accidentally mixing
+ them with native int/float types. A sufficiently clever compiler and
+ sane ABI should be able to see though these structs. However
+ x86/gcc 3.x seems to struggle a bit, so leave them disabled by default. */
+//#define USE_SOFTFLOAT_STRUCT_TYPES
+#ifdef USE_SOFTFLOAT_STRUCT_TYPES
+typedef struct {
+ uint16_t v;
+} float16;
+#define float16_val(x) (((float16)(x)).v)
+#define make_float16(x) __extension__ ({ float16 f16_val = {x}; f16_val; })
+#define const_float16(x) { x }
+typedef struct {
+ uint32_t v;
+} float32;
+/* The cast ensures an error if the wrong type is passed. */
+#define float32_val(x) (((float32)(x)).v)
+#define make_float32(x) __extension__ ({ float32 f32_val = {x}; f32_val; })
+#define const_float32(x) { x }
+typedef struct {
+ uint64_t v;
+} float64;
+#define float64_val(x) (((float64)(x)).v)
+#define make_float64(x) __extension__ ({ float64 f64_val = {x}; f64_val; })
+#define const_float64(x) { x }
+#else
+typedef uint16_t float16;
+typedef uint32_t float32;
+typedef uint64_t float64;
+#define float16_val(x) (x)
+#define float32_val(x) (x)
+#define float64_val(x) (x)
+#define make_float16(x) (x)
+#define make_float32(x) (x)
+#define make_float64(x) (x)
+#define const_float16(x) (x)
+#define const_float32(x) (x)
+#define const_float64(x) (x)
+#endif
+typedef struct {
+ uint64_t low;
+ uint16_t high;
+} floatx80;
+#define make_floatx80(exp, mant) ((floatx80) { mant, exp })
+#define make_floatx80_init(exp, mant) { .low = mant, .high = exp }
+typedef struct {
+#ifdef HOST_WORDS_BIGENDIAN
+ uint64_t high, low;
+#else
+ uint64_t low, high;
+#endif
+} float128;
+#define make_float128(high_, low_) ((float128) { .high = high_, .low = low_ })
+#define make_float128_init(high_, low_) { .high = high_, .low = low_ }
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point underflow tininess-detection mode.
+*----------------------------------------------------------------------------*/
+enum {
+ float_tininess_after_rounding = 0,
+ float_tininess_before_rounding = 1
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point rounding mode.
+*----------------------------------------------------------------------------*/
+enum {
+ float_round_nearest_even = 0,
+ float_round_down = 1,
+ float_round_up = 2,
+ float_round_to_zero = 3
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point exception flags.
+*----------------------------------------------------------------------------*/
+enum {
+ float_flag_invalid = 1,
+ float_flag_divbyzero = 4,
+ float_flag_overflow = 8,
+ float_flag_underflow = 16,
+ float_flag_inexact = 32,
+ float_flag_input_denormal = 64,
+ float_flag_output_denormal = 128
+};
+
+typedef struct float_status {
+ signed char float_detect_tininess;
+ signed char float_rounding_mode;
+ signed char float_exception_flags;
+ signed char floatx80_rounding_precision;
+ /* should denormalised results go to zero and set the inexact flag? */
+ flag flush_to_zero;
+ /* should denormalised inputs go to zero and set the input_denormal flag? */
+ flag flush_inputs_to_zero;
+ flag default_nan_mode;
+} float_status;
+
+void set_float_rounding_mode(int val STATUS_PARAM);
+void set_float_exception_flags(int val STATUS_PARAM);
+INLINE void set_float_detect_tininess(int val STATUS_PARAM)
+{
+ STATUS(float_detect_tininess) = val;
+}
+INLINE void set_flush_to_zero(flag val STATUS_PARAM)
+{
+ STATUS(flush_to_zero) = val;
+}
+INLINE void set_flush_inputs_to_zero(flag val STATUS_PARAM)
+{
+ STATUS(flush_inputs_to_zero) = val;
+}
+INLINE void set_default_nan_mode(flag val STATUS_PARAM)
+{
+ STATUS(default_nan_mode) = val;
+}
+INLINE int get_float_exception_flags(float_status *status)
+{
+ return STATUS(float_exception_flags);
+}
+void set_floatx80_rounding_precision(int val STATUS_PARAM);
+
+/*----------------------------------------------------------------------------
+| Routine to raise any or all of the software IEC/IEEE floating-point
+| exception flags.
+*----------------------------------------------------------------------------*/
+void float_raise( int8 flags STATUS_PARAM);
+
+/*----------------------------------------------------------------------------
+| Options to indicate which negations to perform in float*_muladd()
+| Using these differs from negating an input or output before calling
+| the muladd function in that this means that a NaN doesn't have its
+| sign bit inverted before it is propagated.
+*----------------------------------------------------------------------------*/
+enum {
+ float_muladd_negate_c = 1,
+ float_muladd_negate_product = 2,
+ float_muladd_negate_result = 3,
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE integer-to-floating-point conversion routines.
+*----------------------------------------------------------------------------*/
+float32 int32_to_float32( int32 STATUS_PARAM );
+float64 int32_to_float64( int32 STATUS_PARAM );
+float32 uint32_to_float32( uint32 STATUS_PARAM );
+float64 uint32_to_float64( uint32 STATUS_PARAM );
+floatx80 int32_to_floatx80( int32 STATUS_PARAM );
+float128 int32_to_float128( int32 STATUS_PARAM );
+float32 int64_to_float32( int64 STATUS_PARAM );
+float32 uint64_to_float32( uint64 STATUS_PARAM );
+float64 int64_to_float64( int64 STATUS_PARAM );
+float64 uint64_to_float64( uint64 STATUS_PARAM );
+floatx80 int64_to_floatx80( int64 STATUS_PARAM );
+float128 int64_to_float128( int64 STATUS_PARAM );
+
+/*----------------------------------------------------------------------------
+| Software half-precision conversion routines.
+*----------------------------------------------------------------------------*/
+float16 float32_to_float16( float32, flag STATUS_PARAM );
+float32 float16_to_float32( float16, flag STATUS_PARAM );
+
+/*----------------------------------------------------------------------------
+| Software half-precision operations.
+*----------------------------------------------------------------------------*/
+int float16_is_quiet_nan( float16 );
+int float16_is_signaling_nan( float16 );
+float16 float16_maybe_silence_nan( float16 );
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated half-precision NaN.
+*----------------------------------------------------------------------------*/
+extern const float16 float16_default_nan;
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int_fast16_t float32_to_int16_round_to_zero(float32 STATUS_PARAM);
+uint_fast16_t float32_to_uint16_round_to_zero(float32 STATUS_PARAM);
+int32 float32_to_int32( float32 STATUS_PARAM );
+int32 float32_to_int32_round_to_zero( float32 STATUS_PARAM );
+uint32 float32_to_uint32( float32 STATUS_PARAM );
+uint32 float32_to_uint32_round_to_zero( float32 STATUS_PARAM );
+int64 float32_to_int64( float32 STATUS_PARAM );
+int64 float32_to_int64_round_to_zero( float32 STATUS_PARAM );
+float64 float32_to_float64( float32 STATUS_PARAM );
+floatx80 float32_to_floatx80( float32 STATUS_PARAM );
+float128 float32_to_float128( float32 STATUS_PARAM );
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision operations.
+*----------------------------------------------------------------------------*/
+float32 float32_round_to_int( float32 STATUS_PARAM );
+float32 float32_add( float32, float32 STATUS_PARAM );
+float32 float32_sub( float32, float32 STATUS_PARAM );
+float32 float32_mul( float32, float32 STATUS_PARAM );
+float32 float32_div( float32, float32 STATUS_PARAM );
+float32 float32_rem( float32, float32 STATUS_PARAM );
+float32 float32_muladd(float32, float32, float32, int STATUS_PARAM);
+float32 float32_sqrt( float32 STATUS_PARAM );
+float32 float32_exp2( float32 STATUS_PARAM );
+float32 float32_log2( float32 STATUS_PARAM );
+int float32_eq( float32, float32 STATUS_PARAM );
+int float32_le( float32, float32 STATUS_PARAM );
+int float32_lt( float32, float32 STATUS_PARAM );
+int float32_unordered( float32, float32 STATUS_PARAM );
+int float32_eq_quiet( float32, float32 STATUS_PARAM );
+int float32_le_quiet( float32, float32 STATUS_PARAM );
+int float32_lt_quiet( float32, float32 STATUS_PARAM );
+int float32_unordered_quiet( float32, float32 STATUS_PARAM );
+int float32_compare( float32, float32 STATUS_PARAM );
+int float32_compare_quiet( float32, float32 STATUS_PARAM );
+float32 float32_min(float32, float32 STATUS_PARAM);
+float32 float32_max(float32, float32 STATUS_PARAM);
+int float32_is_quiet_nan( float32 );
+int float32_is_signaling_nan( float32 );
+float32 float32_maybe_silence_nan( float32 );
+float32 float32_scalbn( float32, int STATUS_PARAM );
+
+INLINE float32 float32_abs(float32 a)
+{
+ /* Note that abs does *not* handle NaN specially, nor does
+ * it flush denormal inputs to zero.
+ */
+ return make_float32(float32_val(a) & 0x7fffffff);
+}
+
+INLINE float32 float32_chs(float32 a)
+{
+ /* Note that chs does *not* handle NaN specially, nor does
+ * it flush denormal inputs to zero.
+ */
+ return make_float32(float32_val(a) ^ 0x80000000);
+}
+
+INLINE int float32_is_infinity(float32 a)
+{
+ return (float32_val(a) & 0x7fffffff) == 0x7f800000;
+}
+
+INLINE int float32_is_neg(float32 a)
+{
+ return float32_val(a) >> 31;
+}
+
+INLINE int float32_is_zero(float32 a)
+{
+ return (float32_val(a) & 0x7fffffff) == 0;
+}
+
+INLINE int float32_is_any_nan(float32 a)
+{
+ return ((float32_val(a) & ~(1 << 31)) > 0x7f800000UL);
+}
+
+INLINE int float32_is_zero_or_denormal(float32 a)
+{
+ return (float32_val(a) & 0x7f800000) == 0;
+}
+
+INLINE float32 float32_set_sign(float32 a, int sign)
+{
+ return make_float32((float32_val(a) & 0x7fffffff) | (sign << 31));
+}
+
+#define float32_zero make_float32(0)
+#define float32_one make_float32(0x3f800000)
+#define float32_ln2 make_float32(0x3f317218)
+#define float32_pi make_float32(0x40490fdb)
+#define float32_half make_float32(0x3f000000)
+#define float32_infinity make_float32(0x7f800000)
+
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated single-precision NaN.
+*----------------------------------------------------------------------------*/
+extern const float32 float32_default_nan;
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int_fast16_t float64_to_int16_round_to_zero(float64 STATUS_PARAM);
+uint_fast16_t float64_to_uint16_round_to_zero(float64 STATUS_PARAM);
+int32 float64_to_int32( float64 STATUS_PARAM );
+int32 float64_to_int32_round_to_zero( float64 STATUS_PARAM );
+uint32 float64_to_uint32( float64 STATUS_PARAM );
+uint32 float64_to_uint32_round_to_zero( float64 STATUS_PARAM );
+int64 float64_to_int64( float64 STATUS_PARAM );
+int64 float64_to_int64_round_to_zero( float64 STATUS_PARAM );
+uint64 float64_to_uint64 (float64 a STATUS_PARAM);
+uint64 float64_to_uint64_round_to_zero (float64 a STATUS_PARAM);
+float32 float64_to_float32( float64 STATUS_PARAM );
+floatx80 float64_to_floatx80( float64 STATUS_PARAM );
+float128 float64_to_float128( float64 STATUS_PARAM );
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision operations.
+*----------------------------------------------------------------------------*/
+float64 float64_round_to_int( float64 STATUS_PARAM );
+float64 float64_trunc_to_int( float64 STATUS_PARAM );
+float64 float64_add( float64, float64 STATUS_PARAM );
+float64 float64_sub( float64, float64 STATUS_PARAM );
+float64 float64_mul( float64, float64 STATUS_PARAM );
+float64 float64_div( float64, float64 STATUS_PARAM );
+float64 float64_rem( float64, float64 STATUS_PARAM );
+float64 float64_muladd(float64, float64, float64, int STATUS_PARAM);
+float64 float64_sqrt( float64 STATUS_PARAM );
+float64 float64_log2( float64 STATUS_PARAM );
+int float64_eq( float64, float64 STATUS_PARAM );
+int float64_le( float64, float64 STATUS_PARAM );
+int float64_lt( float64, float64 STATUS_PARAM );
+int float64_unordered( float64, float64 STATUS_PARAM );
+int float64_eq_quiet( float64, float64 STATUS_PARAM );
+int float64_le_quiet( float64, float64 STATUS_PARAM );
+int float64_lt_quiet( float64, float64 STATUS_PARAM );
+int float64_unordered_quiet( float64, float64 STATUS_PARAM );
+int float64_compare( float64, float64 STATUS_PARAM );
+int float64_compare_quiet( float64, float64 STATUS_PARAM );
+float64 float64_min(float64, float64 STATUS_PARAM);
+float64 float64_max(float64, float64 STATUS_PARAM);
+int float64_is_quiet_nan( float64 a );
+int float64_is_signaling_nan( float64 );
+float64 float64_maybe_silence_nan( float64 );
+float64 float64_scalbn( float64, int STATUS_PARAM );
+
+INLINE float64 float64_abs(float64 a)
+{
+ /* Note that abs does *not* handle NaN specially, nor does
+ * it flush denormal inputs to zero.
+ */
+ return make_float64(float64_val(a) & 0x7fffffffffffffffLL);
+}
+
+INLINE float64 float64_chs(float64 a)
+{
+ /* Note that chs does *not* handle NaN specially, nor does
+ * it flush denormal inputs to zero.
+ */
+ return make_float64(float64_val(a) ^ 0x8000000000000000LL);
+}
+
+INLINE int float64_is_infinity(float64 a)
+{
+ return (float64_val(a) & 0x7fffffffffffffffLL ) == 0x7ff0000000000000LL;
+}
+
+INLINE int float64_is_neg(float64 a)
+{
+ return float64_val(a) >> 63;
+}
+
+INLINE int float64_is_zero(float64 a)
+{
+ return (float64_val(a) & 0x7fffffffffffffffLL) == 0;
+}
+
+INLINE int float64_is_any_nan(float64 a)
+{
+ return ((float64_val(a) & ~(1ULL << 63)) > 0x7ff0000000000000ULL);
+}
+
+INLINE int float64_is_zero_or_denormal(float64 a)
+{
+ return (float64_val(a) & 0x7ff0000000000000LL) == 0;
+}
+
+INLINE float64 float64_set_sign(float64 a, int sign)
+{
+ return make_float64((float64_val(a) & 0x7fffffffffffffffULL)
+ | ((int64_t)sign << 63));
+}
+
+#define float64_zero make_float64(0)
+#define float64_one make_float64(0x3ff0000000000000LL)
+#define float64_ln2 make_float64(0x3fe62e42fefa39efLL)
+#define float64_pi make_float64(0x400921fb54442d18LL)
+#define float64_half make_float64(0x3fe0000000000000LL)
+#define float64_infinity make_float64(0x7ff0000000000000LL)
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated double-precision NaN.
+*----------------------------------------------------------------------------*/
+extern const float64 float64_default_nan;
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int32 floatx80_to_int32( floatx80 STATUS_PARAM );
+int32 floatx80_to_int32_round_to_zero( floatx80 STATUS_PARAM );
+int64 floatx80_to_int64( floatx80 STATUS_PARAM );
+int64 floatx80_to_int64_round_to_zero( floatx80 STATUS_PARAM );
+float32 floatx80_to_float32( floatx80 STATUS_PARAM );
+float64 floatx80_to_float64( floatx80 STATUS_PARAM );
+float128 floatx80_to_float128( floatx80 STATUS_PARAM );
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision operations.
+*----------------------------------------------------------------------------*/
+floatx80 floatx80_round_to_int( floatx80 STATUS_PARAM );
+floatx80 floatx80_add( floatx80, floatx80 STATUS_PARAM );
+floatx80 floatx80_sub( floatx80, floatx80 STATUS_PARAM );
+floatx80 floatx80_mul( floatx80, floatx80 STATUS_PARAM );
+floatx80 floatx80_div( floatx80, floatx80 STATUS_PARAM );
+floatx80 floatx80_rem( floatx80, floatx80 STATUS_PARAM );
+floatx80 floatx80_sqrt( floatx80 STATUS_PARAM );
+int floatx80_eq( floatx80, floatx80 STATUS_PARAM );
+int floatx80_le( floatx80, floatx80 STATUS_PARAM );
+int floatx80_lt( floatx80, floatx80 STATUS_PARAM );
+int floatx80_unordered( floatx80, floatx80 STATUS_PARAM );
+int floatx80_eq_quiet( floatx80, floatx80 STATUS_PARAM );
+int floatx80_le_quiet( floatx80, floatx80 STATUS_PARAM );
+int floatx80_lt_quiet( floatx80, floatx80 STATUS_PARAM );
+int floatx80_unordered_quiet( floatx80, floatx80 STATUS_PARAM );
+int floatx80_compare( floatx80, floatx80 STATUS_PARAM );
+int floatx80_compare_quiet( floatx80, floatx80 STATUS_PARAM );
+int floatx80_is_quiet_nan( floatx80 );
+int floatx80_is_signaling_nan( floatx80 );
+floatx80 floatx80_maybe_silence_nan( floatx80 );
+floatx80 floatx80_scalbn( floatx80, int STATUS_PARAM );
+
+INLINE floatx80 floatx80_abs(floatx80 a)
+{
+ a.high &= 0x7fff;
+ return a;
+}
+
+INLINE floatx80 floatx80_chs(floatx80 a)
+{
+ a.high ^= 0x8000;
+ return a;
+}
+
+INLINE int floatx80_is_infinity(floatx80 a)
+{
+ return (a.high & 0x7fff) == 0x7fff && a.low == 0x8000000000000000LL;
+}
+
+INLINE int floatx80_is_neg(floatx80 a)
+{
+ return a.high >> 15;
+}
+
+INLINE int floatx80_is_zero(floatx80 a)
+{
+ return (a.high & 0x7fff) == 0 && a.low == 0;
+}
+
+INLINE int floatx80_is_zero_or_denormal(floatx80 a)
+{
+ return (a.high & 0x7fff) == 0;
+}
+
+INLINE int floatx80_is_any_nan(floatx80 a)
+{
+ return ((a.high & 0x7fff) == 0x7fff) && (a.low<<1);
+}
+
+#define floatx80_zero make_floatx80(0x0000, 0x0000000000000000LL)
+#define floatx80_one make_floatx80(0x3fff, 0x8000000000000000LL)
+#define floatx80_ln2 make_floatx80(0x3ffe, 0xb17217f7d1cf79acLL)
+#define floatx80_pi make_floatx80(0x4000, 0xc90fdaa22168c235LL)
+#define floatx80_half make_floatx80(0x3ffe, 0x8000000000000000LL)
+#define floatx80_infinity make_floatx80(0x7fff, 0x8000000000000000LL)
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN.
+*----------------------------------------------------------------------------*/
+extern const floatx80 floatx80_default_nan;
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE quadruple-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int32 float128_to_int32( float128 STATUS_PARAM );
+int32 float128_to_int32_round_to_zero( float128 STATUS_PARAM );
+int64 float128_to_int64( float128 STATUS_PARAM );
+int64 float128_to_int64_round_to_zero( float128 STATUS_PARAM );
+float32 float128_to_float32( float128 STATUS_PARAM );
+float64 float128_to_float64( float128 STATUS_PARAM );
+floatx80 float128_to_floatx80( float128 STATUS_PARAM );
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE quadruple-precision operations.
+*----------------------------------------------------------------------------*/
+float128 float128_round_to_int( float128 STATUS_PARAM );
+float128 float128_add( float128, float128 STATUS_PARAM );
+float128 float128_sub( float128, float128 STATUS_PARAM );
+float128 float128_mul( float128, float128 STATUS_PARAM );
+float128 float128_div( float128, float128 STATUS_PARAM );
+float128 float128_rem( float128, float128 STATUS_PARAM );
+float128 float128_sqrt( float128 STATUS_PARAM );
+int float128_eq( float128, float128 STATUS_PARAM );
+int float128_le( float128, float128 STATUS_PARAM );
+int float128_lt( float128, float128 STATUS_PARAM );
+int float128_unordered( float128, float128 STATUS_PARAM );
+int float128_eq_quiet( float128, float128 STATUS_PARAM );
+int float128_le_quiet( float128, float128 STATUS_PARAM );
+int float128_lt_quiet( float128, float128 STATUS_PARAM );
+int float128_unordered_quiet( float128, float128 STATUS_PARAM );
+int float128_compare( float128, float128 STATUS_PARAM );
+int float128_compare_quiet( float128, float128 STATUS_PARAM );
+int float128_is_quiet_nan( float128 );
+int float128_is_signaling_nan( float128 );
+float128 float128_maybe_silence_nan( float128 );
+float128 float128_scalbn( float128, int STATUS_PARAM );
+
+INLINE float128 float128_abs(float128 a)
+{
+ a.high &= 0x7fffffffffffffffLL;
+ return a;
+}
+
+INLINE float128 float128_chs(float128 a)
+{
+ a.high ^= 0x8000000000000000LL;
+ return a;
+}
+
+INLINE int float128_is_infinity(float128 a)
+{
+ return (a.high & 0x7fffffffffffffffLL) == 0x7fff000000000000LL && a.low == 0;
+}
+
+INLINE int float128_is_neg(float128 a)
+{
+ return a.high >> 63;
+}
+
+INLINE int float128_is_zero(float128 a)
+{
+ return (a.high & 0x7fffffffffffffffLL) == 0 && a.low == 0;
+}
+
+INLINE int float128_is_zero_or_denormal(float128 a)
+{
+ return (a.high & 0x7fff000000000000LL) == 0;
+}
+
+INLINE int float128_is_any_nan(float128 a)
+{
+ return ((a.high >> 48) & 0x7fff) == 0x7fff &&
+ ((a.low != 0) || ((a.high & 0xffffffffffffLL) != 0));
+}
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated quadruple-precision NaN.
+*----------------------------------------------------------------------------*/
+extern const float128 float128_default_nan;
+
+#endif /* !SOFTFLOAT_H */