diff options
author | DongHun Kwak <dh0128.kwak@samsung.com> | 2017-09-13 11:08:07 +0900 |
---|---|---|
committer | DongHun Kwak <dh0128.kwak@samsung.com> | 2017-09-13 11:09:00 +0900 |
commit | b5c87084afaef42b2d058f68091be31988a6a874 (patch) | |
tree | adef9a65870a41181687e11d57fdf98e7629de3c /boost/multiprecision/cpp_bin_float.hpp | |
parent | 34bd32e225e2a8a94104489b31c42e5801cc1f4a (diff) | |
download | boost-b5c87084afaef42b2d058f68091be31988a6a874.tar.gz boost-b5c87084afaef42b2d058f68091be31988a6a874.tar.bz2 boost-b5c87084afaef42b2d058f68091be31988a6a874.zip |
Imported Upstream version 1.64.0upstream/1.64.0
Change-Id: Id9212edd016dd55f21172c427aa7894d1d24148b
Signed-off-by: DongHun Kwak <dh0128.kwak@samsung.com>
Diffstat (limited to 'boost/multiprecision/cpp_bin_float.hpp')
-rw-r--r-- | boost/multiprecision/cpp_bin_float.hpp | 253 |
1 files changed, 200 insertions, 53 deletions
diff --git a/boost/multiprecision/cpp_bin_float.hpp b/boost/multiprecision/cpp_bin_float.hpp index 38f5387528..f3e593a142 100644 --- a/boost/multiprecision/cpp_bin_float.hpp +++ b/boost/multiprecision/cpp_bin_float.hpp @@ -21,6 +21,10 @@ #include <boost/math/special_functions/expm1.hpp> #include <boost/math/special_functions/gamma.hpp> +#ifdef BOOST_HAS_FLOAT128 +#include <quadmath.h> +#endif + namespace boost{ namespace multiprecision{ namespace backends{ enum digit_base_type @@ -163,6 +167,64 @@ public: return assign_float(f); } +#ifdef BOOST_HAS_FLOAT128 + cpp_bin_float& operator=(__float128 f) + { + using default_ops::eval_add; + typedef typename boost::multiprecision::detail::canonical<int, cpp_bin_float>::type bf_int_type; + + if(f == 0) + { + m_data = limb_type(0); + m_sign = (signbitq(f) > 0); + m_exponent = exponent_zero; + return *this; + } + else if(isnanq(f)) + { + m_data = limb_type(0); + m_sign = false; + m_exponent = exponent_nan; + return *this; + } + else if(isinfq(f)) + { + m_data = limb_type(0); + m_sign = (f < 0); + m_exponent = exponent_infinity; + return *this; + } + if(f < 0) + { + *this = -f; + this->negate(); + return *this; + } + + typedef typename mpl::front<unsigned_types>::type ui_type; + m_data = static_cast<ui_type>(0u); + m_sign = false; + m_exponent = 0; + + static const int bits = sizeof(int) * CHAR_BIT - 1; + int e; + f = frexpq(f, &e); + while(f) + { + f = ldexpq(f, bits); + e -= bits; + int ipart = (int)truncq(f); + f -= ipart; + m_exponent += bits; + cpp_bin_float t; + t = static_cast<bf_int_type>(ipart); + eval_add(*this, t); + } + m_exponent += static_cast<Exponent>(e); + return *this; + } +#endif + template <class Float> typename boost::enable_if_c<is_floating_point<Float>::value, cpp_bin_float&>::type assign_float(Float f) { @@ -414,10 +476,10 @@ public: #endif template <unsigned Digits, digit_base_type DigitBase, class Allocator, class Exponent, Exponent MinE, Exponent MaxE, class Int> -inline void copy_and_round(cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE> &res, Int &arg) +inline void copy_and_round(cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE> &res, Int &arg, int bits_to_keep = cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count) { // Precondition: exponent of res must have been set before this function is called - // as we may need to adjust it based on how many cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count in arg are set. + // as we may need to adjust it based on how many bits_to_keep in arg are set. using default_ops::eval_msb; using default_ops::eval_lsb; using default_ops::eval_left_shift; @@ -435,45 +497,70 @@ inline void copy_and_round(cpp_bin_float<Digits, DigitBase, Allocator, Exponent, return; } int msb = eval_msb(arg); - if(static_cast<int>(cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count) > msb + 1) + if(static_cast<int>(bits_to_keep) > msb + 1) { - // Must have had cancellation in subtraction, shift left and copy: + // Must have had cancellation in subtraction, + // or be converting from a narrower type, so shift left: res.bits() = arg; - eval_left_shift(res.bits(), cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count - msb - 1); - res.exponent() -= static_cast<Exponent>(cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count - msb - 1); + eval_left_shift(res.bits(), bits_to_keep - msb - 1); + res.exponent() -= static_cast<Exponent>(bits_to_keep - msb - 1); } - else if(static_cast<int>(cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count) < msb + 1) + else if(static_cast<int>(bits_to_keep) < msb + 1) { - // We have more cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count than we need, so round as required, + // We have more bits_to_keep than we need, so round as required, // first get the rounding bit: - bool roundup = eval_bit_test(arg, msb - cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count); + bool roundup = eval_bit_test(arg, msb - bits_to_keep); // Then check for a tie: - if(roundup && (msb - cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count == eval_lsb(arg))) + if(roundup && (msb - bits_to_keep == (int)eval_lsb(arg))) { // Ties round towards even: - if(!eval_bit_test(arg, msb - cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count + 1)) + if(!eval_bit_test(arg, msb - bits_to_keep + 1)) roundup = false; } - // Shift off the cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count we don't need: - eval_right_shift(arg, msb - cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count + 1); - res.exponent() += static_cast<Exponent>(msb - (int)cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count + 1); + // Shift off the bits_to_keep we don't need: + eval_right_shift(arg, msb - bits_to_keep + 1); + res.exponent() += static_cast<Exponent>(msb - bits_to_keep + 1); if(roundup) { eval_increment(arg); - if(eval_bit_test(arg, cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count)) + if(bits_to_keep) + { + if(eval_bit_test(arg, bits_to_keep)) + { + // This happens very very rairly, all the bits left after + // truncation must be 1's and we're rounding up an order of magnitude: + eval_right_shift(arg, 1u); + ++res.exponent(); + } + } + else { - // This happens very very rairly: - eval_right_shift(arg, 1u); - ++res.exponent(); + // We get here when bits_to_keep is zero but we're rounding up, + // as a result we end up with a single digit that is a 1: + ++bits_to_keep; } } + if(bits_to_keep != cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count) + { + // Normalize result when we're rounding to fewer bits than we can hold, only happens in conversions + // to narrower types: + eval_left_shift(arg, cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count - bits_to_keep); + res.exponent() -= static_cast<Exponent>(cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count - bits_to_keep); + } res.bits() = arg; } else { res.bits() = arg; } - BOOST_ASSERT((eval_msb(res.bits()) == cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count - 1)); + if(!bits_to_keep && !res.bits().limbs()[0]) + { + // We're keeping zero bits and did not round up, so result is zero: + res.exponent() = cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::exponent_zero; + return; + } + // Result must be normalized: + BOOST_ASSERT(((int)eval_msb(res.bits()) == cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count - 1)); if(res.exponent() > cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::max_exponent) { @@ -571,6 +658,7 @@ inline void do_eval_subtract(cpp_bin_float<Digits, DigitBase, Allocator, Exponen { using default_ops::eval_subtract; using default_ops::eval_bit_test; + using default_ops::eval_decrement; typename cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::double_rep_type dt; @@ -626,19 +714,41 @@ inline void do_eval_subtract(cpp_bin_float<Digits, DigitBase, Allocator, Exponen res.exponent() = a.exponent() - e_diff; eval_subtract(dt, b.bits()); } + else if(a.exponent() == (int)cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count + b.exponent() + 1) + { + if(eval_lsb(b.bits()) != cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count - 1) + { + eval_left_shift(dt, 1); + eval_decrement(dt); + res.exponent() = a.exponent() - 1; + } + else + res.exponent() = a.exponent(); + } else res.exponent() = a.exponent(); } else { dt = b.bits(); - if(b.exponent() <= a.exponent() + (int)cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count) + if(b.exponent() <= (int)cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count + a.exponent()) { typename cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::exponent_type e_diff = a.exponent() - b.exponent(); eval_left_shift(dt, -e_diff); res.exponent() = b.exponent() + e_diff; eval_subtract(dt, a.bits()); } + else if(b.exponent() == (int)cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count + a.exponent() + 1) + { + if(eval_lsb(a.bits()) != cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count - 1) + { + eval_left_shift(dt, 1); + eval_decrement(dt); + res.exponent() = b.exponent() - 1; + } + else + res.exponent() = b.exponent(); + } else res.exponent() = b.exponent(); s = !s; @@ -1232,50 +1342,77 @@ inline void eval_convert_to(boost::ulong_long_type *res, const cpp_bin_float<Dig template <class Float, unsigned Digits, digit_base_type DigitBase, class Allocator, class Exponent, Exponent MinE, Exponent MaxE> inline typename boost::enable_if_c<boost::is_float<Float>::value>::type eval_convert_to(Float *res, const cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE> &original_arg) { - // - // Perform rounding first, then afterwards extract the digits: - // typedef cpp_bin_float<std::numeric_limits<Float>::digits, digit_base_2, void, Exponent, MinE, MaxE> conv_type; typedef typename common_type<typename conv_type::exponent_type, int>::type common_exp_type; - conv_type arg(original_arg); - switch(arg.exponent()) + // + // Special cases first: + // + switch(original_arg.exponent()) { - case conv_type::exponent_zero: + case cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::exponent_zero: *res = 0; - if(arg.sign()) + if(original_arg.sign()) *res = -*res; return; - case conv_type::exponent_nan: + case cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::exponent_nan: *res = std::numeric_limits<Float>::quiet_NaN(); return; - case conv_type::exponent_infinity: + case cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::exponent_infinity: *res = (std::numeric_limits<Float>::infinity)(); - if(arg.sign()) + if(original_arg.sign()) *res = -*res; return; } - common_exp_type e = arg.exponent(); - static const common_exp_type min_exp_limit = std::numeric_limits<Float>::min_exponent - - (common_exp_type)cpp_bin_float<std::numeric_limits<Float>::digits, digit_base_2, void, Exponent, MinE, MaxE>::bit_count - std::numeric_limits<Float>::digits - 2; - e -= cpp_bin_float<std::numeric_limits<Float>::digits, digit_base_2, void, Exponent, MinE, MaxE>::bit_count - 1; - if(e < min_exp_limit) + // + // Check for super large exponent that must be converted to infinity: + // + if(original_arg.exponent() > std::numeric_limits<Float>::max_exponent) { - *res = 0; + *res = std::numeric_limits<Float>::has_infinity ? std::numeric_limits<Float>::infinity() : (std::numeric_limits<Float>::max)(); + if(original_arg.sign()) + *res = -*res; return; } - if(e > std::numeric_limits<Float>::max_exponent) + // + // Figure out how many digits we will have in our result, + // allowing for a possibly denormalized result: + // + common_exp_type digits_to_round_to = std::numeric_limits<Float>::digits; + if(original_arg.exponent() < std::numeric_limits<Float>::min_exponent - 1) { - *res = std::numeric_limits<Float>::has_infinity ? std::numeric_limits<Float>::infinity() : (std::numeric_limits<Float>::max)(); - return; + common_exp_type diff = original_arg.exponent(); + diff -= std::numeric_limits<Float>::min_exponent - 1; + digits_to_round_to += diff; } - - *res = std::ldexp(static_cast<Float>(*arg.bits().limbs()), static_cast<int>(e)); - for(unsigned i = 1; i < arg.bits().size(); ++i) + if(digits_to_round_to < 0) { + // Result must be zero: + *res = 0; + if(original_arg.sign()) + *res = -*res; + return; + } + // + // Perform rounding first, then afterwards extract the digits: + // + cpp_bin_float<std::numeric_limits<Float>::digits, digit_base_2, Allocator, Exponent, MinE, MaxE> arg; + typename cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::rep_type bits(original_arg.bits()); + arg.exponent() = original_arg.exponent(); + copy_and_round(arg, bits, (int)digits_to_round_to); + common_exp_type e = arg.exponent(); + e -= cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count - 1; + static const unsigned limbs_needed = std::numeric_limits<Float>::digits / (sizeof(*arg.bits().limbs()) * CHAR_BIT) + + (std::numeric_limits<Float>::digits % (sizeof(*arg.bits().limbs()) * CHAR_BIT) ? 1 : 0); + unsigned first_limb_needed = arg.bits().size() - limbs_needed; + *res = 0; + e += first_limb_needed * sizeof(*arg.bits().limbs()) * CHAR_BIT; + while(first_limb_needed < arg.bits().size()) + { + *res += std::ldexp(static_cast<Float>(arg.bits().limbs()[first_limb_needed]), static_cast<int>(e)); + ++first_limb_needed; e += sizeof(*arg.bits().limbs()) * CHAR_BIT; - *res += std::ldexp(static_cast<Float>(arg.bits().limbs()[i]), static_cast<int>(e)); } - if(arg.sign()) + if(original_arg.sign()) *res = -*res; } @@ -1401,13 +1538,18 @@ inline void eval_sqrt(cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE using default_ops::eval_increment; switch(arg.exponent()) { - case cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::exponent_zero: case cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::exponent_nan: + errno = EDOM; + // fallthrough... + case cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::exponent_zero: res = arg; return; case cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::exponent_infinity: if(arg.sign()) + { res = std::numeric_limits<number<cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE> > >::quiet_NaN().backend(); + errno = EDOM; + } else res = arg; return; @@ -1415,6 +1557,7 @@ inline void eval_sqrt(cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE if(arg.sign()) { res = std::numeric_limits<number<cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE> > >::quiet_NaN().backend(); + errno = EDOM; return; } @@ -1443,8 +1586,10 @@ inline void eval_floor(cpp_bin_float<Digits, DigitBase, Allocator, Exponent, Min using default_ops::eval_increment; switch(arg.exponent()) { - case cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::exponent_zero: case cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::exponent_nan: + errno = EDOM; + // fallthrough... + case cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::exponent_zero: case cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::exponent_infinity: res = arg; return; @@ -1484,9 +1629,11 @@ inline void eval_ceil(cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE using default_ops::eval_increment; switch(arg.exponent()) { + case cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::exponent_infinity: + errno = EDOM; + // fallthrough... case cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::exponent_zero: case cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::exponent_nan: - case cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::exponent_infinity: res = arg; return; } @@ -1521,6 +1668,12 @@ inline void eval_ceil(cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE } template<unsigned D1, backends::digit_base_type B1, class A1, class E1, E1 M1, E1 M2> +int eval_signbit(const cpp_bin_float<D1, B1, A1, E1, M1, M2>& val) +{ + return val.sign(); +} + +template<unsigned D1, backends::digit_base_type B1, class A1, class E1, E1 M1, E1 M2> inline std::size_t hash_value(const cpp_bin_float<D1, B1, A1, E1, M1, M2>& val) { std::size_t result = hash_value(val.bits()); @@ -1545,12 +1698,6 @@ struct is_explicitly_convertible<FloatT, backends::cpp_bin_float<D2, B2, A2, E2, #endif template<unsigned Digits, boost::multiprecision::backends::digit_base_type DigitBase, class Exponent, Exponent MinE, Exponent MaxE, class Allocator, boost::multiprecision::expression_template_option ExpressionTemplates> -inline int signbit BOOST_PREVENT_MACRO_SUBSTITUTION(const boost::multiprecision::number<boost::multiprecision::backends::cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>, ExpressionTemplates>& arg) -{ - return arg.backend().sign(); -} - -template<unsigned Digits, boost::multiprecision::backends::digit_base_type DigitBase, class Exponent, Exponent MinE, Exponent MaxE, class Allocator, boost::multiprecision::expression_template_option ExpressionTemplates> inline boost::multiprecision::number<boost::multiprecision::backends::cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>, ExpressionTemplates> copysign BOOST_PREVENT_MACRO_SUBSTITUTION( const boost::multiprecision::number<boost::multiprecision::backends::cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>, ExpressionTemplates>& a, |