summaryrefslogtreecommitdiff
path: root/boost/multiprecision/cpp_bin_float.hpp
diff options
context:
space:
mode:
authorDongHun Kwak <dh0128.kwak@samsung.com>2017-09-13 11:08:07 +0900
committerDongHun Kwak <dh0128.kwak@samsung.com>2017-09-13 11:09:00 +0900
commitb5c87084afaef42b2d058f68091be31988a6a874 (patch)
treeadef9a65870a41181687e11d57fdf98e7629de3c /boost/multiprecision/cpp_bin_float.hpp
parent34bd32e225e2a8a94104489b31c42e5801cc1f4a (diff)
downloadboost-b5c87084afaef42b2d058f68091be31988a6a874.tar.gz
boost-b5c87084afaef42b2d058f68091be31988a6a874.tar.bz2
boost-b5c87084afaef42b2d058f68091be31988a6a874.zip
Imported Upstream version 1.64.0upstream/1.64.0
Change-Id: Id9212edd016dd55f21172c427aa7894d1d24148b Signed-off-by: DongHun Kwak <dh0128.kwak@samsung.com>
Diffstat (limited to 'boost/multiprecision/cpp_bin_float.hpp')
-rw-r--r--boost/multiprecision/cpp_bin_float.hpp253
1 files changed, 200 insertions, 53 deletions
diff --git a/boost/multiprecision/cpp_bin_float.hpp b/boost/multiprecision/cpp_bin_float.hpp
index 38f5387528..f3e593a142 100644
--- a/boost/multiprecision/cpp_bin_float.hpp
+++ b/boost/multiprecision/cpp_bin_float.hpp
@@ -21,6 +21,10 @@
#include <boost/math/special_functions/expm1.hpp>
#include <boost/math/special_functions/gamma.hpp>
+#ifdef BOOST_HAS_FLOAT128
+#include <quadmath.h>
+#endif
+
namespace boost{ namespace multiprecision{ namespace backends{
enum digit_base_type
@@ -163,6 +167,64 @@ public:
return assign_float(f);
}
+#ifdef BOOST_HAS_FLOAT128
+ cpp_bin_float& operator=(__float128 f)
+ {
+ using default_ops::eval_add;
+ typedef typename boost::multiprecision::detail::canonical<int, cpp_bin_float>::type bf_int_type;
+
+ if(f == 0)
+ {
+ m_data = limb_type(0);
+ m_sign = (signbitq(f) > 0);
+ m_exponent = exponent_zero;
+ return *this;
+ }
+ else if(isnanq(f))
+ {
+ m_data = limb_type(0);
+ m_sign = false;
+ m_exponent = exponent_nan;
+ return *this;
+ }
+ else if(isinfq(f))
+ {
+ m_data = limb_type(0);
+ m_sign = (f < 0);
+ m_exponent = exponent_infinity;
+ return *this;
+ }
+ if(f < 0)
+ {
+ *this = -f;
+ this->negate();
+ return *this;
+ }
+
+ typedef typename mpl::front<unsigned_types>::type ui_type;
+ m_data = static_cast<ui_type>(0u);
+ m_sign = false;
+ m_exponent = 0;
+
+ static const int bits = sizeof(int) * CHAR_BIT - 1;
+ int e;
+ f = frexpq(f, &e);
+ while(f)
+ {
+ f = ldexpq(f, bits);
+ e -= bits;
+ int ipart = (int)truncq(f);
+ f -= ipart;
+ m_exponent += bits;
+ cpp_bin_float t;
+ t = static_cast<bf_int_type>(ipart);
+ eval_add(*this, t);
+ }
+ m_exponent += static_cast<Exponent>(e);
+ return *this;
+ }
+#endif
+
template <class Float>
typename boost::enable_if_c<is_floating_point<Float>::value, cpp_bin_float&>::type assign_float(Float f)
{
@@ -414,10 +476,10 @@ public:
#endif
template <unsigned Digits, digit_base_type DigitBase, class Allocator, class Exponent, Exponent MinE, Exponent MaxE, class Int>
-inline void copy_and_round(cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE> &res, Int &arg)
+inline void copy_and_round(cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE> &res, Int &arg, int bits_to_keep = cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count)
{
// Precondition: exponent of res must have been set before this function is called
- // as we may need to adjust it based on how many cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count in arg are set.
+ // as we may need to adjust it based on how many bits_to_keep in arg are set.
using default_ops::eval_msb;
using default_ops::eval_lsb;
using default_ops::eval_left_shift;
@@ -435,45 +497,70 @@ inline void copy_and_round(cpp_bin_float<Digits, DigitBase, Allocator, Exponent,
return;
}
int msb = eval_msb(arg);
- if(static_cast<int>(cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count) > msb + 1)
+ if(static_cast<int>(bits_to_keep) > msb + 1)
{
- // Must have had cancellation in subtraction, shift left and copy:
+ // Must have had cancellation in subtraction,
+ // or be converting from a narrower type, so shift left:
res.bits() = arg;
- eval_left_shift(res.bits(), cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count - msb - 1);
- res.exponent() -= static_cast<Exponent>(cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count - msb - 1);
+ eval_left_shift(res.bits(), bits_to_keep - msb - 1);
+ res.exponent() -= static_cast<Exponent>(bits_to_keep - msb - 1);
}
- else if(static_cast<int>(cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count) < msb + 1)
+ else if(static_cast<int>(bits_to_keep) < msb + 1)
{
- // We have more cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count than we need, so round as required,
+ // We have more bits_to_keep than we need, so round as required,
// first get the rounding bit:
- bool roundup = eval_bit_test(arg, msb - cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count);
+ bool roundup = eval_bit_test(arg, msb - bits_to_keep);
// Then check for a tie:
- if(roundup && (msb - cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count == eval_lsb(arg)))
+ if(roundup && (msb - bits_to_keep == (int)eval_lsb(arg)))
{
// Ties round towards even:
- if(!eval_bit_test(arg, msb - cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count + 1))
+ if(!eval_bit_test(arg, msb - bits_to_keep + 1))
roundup = false;
}
- // Shift off the cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count we don't need:
- eval_right_shift(arg, msb - cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count + 1);
- res.exponent() += static_cast<Exponent>(msb - (int)cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count + 1);
+ // Shift off the bits_to_keep we don't need:
+ eval_right_shift(arg, msb - bits_to_keep + 1);
+ res.exponent() += static_cast<Exponent>(msb - bits_to_keep + 1);
if(roundup)
{
eval_increment(arg);
- if(eval_bit_test(arg, cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count))
+ if(bits_to_keep)
+ {
+ if(eval_bit_test(arg, bits_to_keep))
+ {
+ // This happens very very rairly, all the bits left after
+ // truncation must be 1's and we're rounding up an order of magnitude:
+ eval_right_shift(arg, 1u);
+ ++res.exponent();
+ }
+ }
+ else
{
- // This happens very very rairly:
- eval_right_shift(arg, 1u);
- ++res.exponent();
+ // We get here when bits_to_keep is zero but we're rounding up,
+ // as a result we end up with a single digit that is a 1:
+ ++bits_to_keep;
}
}
+ if(bits_to_keep != cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count)
+ {
+ // Normalize result when we're rounding to fewer bits than we can hold, only happens in conversions
+ // to narrower types:
+ eval_left_shift(arg, cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count - bits_to_keep);
+ res.exponent() -= static_cast<Exponent>(cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count - bits_to_keep);
+ }
res.bits() = arg;
}
else
{
res.bits() = arg;
}
- BOOST_ASSERT((eval_msb(res.bits()) == cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count - 1));
+ if(!bits_to_keep && !res.bits().limbs()[0])
+ {
+ // We're keeping zero bits and did not round up, so result is zero:
+ res.exponent() = cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::exponent_zero;
+ return;
+ }
+ // Result must be normalized:
+ BOOST_ASSERT(((int)eval_msb(res.bits()) == cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count - 1));
if(res.exponent() > cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::max_exponent)
{
@@ -571,6 +658,7 @@ inline void do_eval_subtract(cpp_bin_float<Digits, DigitBase, Allocator, Exponen
{
using default_ops::eval_subtract;
using default_ops::eval_bit_test;
+ using default_ops::eval_decrement;
typename cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::double_rep_type dt;
@@ -626,19 +714,41 @@ inline void do_eval_subtract(cpp_bin_float<Digits, DigitBase, Allocator, Exponen
res.exponent() = a.exponent() - e_diff;
eval_subtract(dt, b.bits());
}
+ else if(a.exponent() == (int)cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count + b.exponent() + 1)
+ {
+ if(eval_lsb(b.bits()) != cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count - 1)
+ {
+ eval_left_shift(dt, 1);
+ eval_decrement(dt);
+ res.exponent() = a.exponent() - 1;
+ }
+ else
+ res.exponent() = a.exponent();
+ }
else
res.exponent() = a.exponent();
}
else
{
dt = b.bits();
- if(b.exponent() <= a.exponent() + (int)cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count)
+ if(b.exponent() <= (int)cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count + a.exponent())
{
typename cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::exponent_type e_diff = a.exponent() - b.exponent();
eval_left_shift(dt, -e_diff);
res.exponent() = b.exponent() + e_diff;
eval_subtract(dt, a.bits());
}
+ else if(b.exponent() == (int)cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count + a.exponent() + 1)
+ {
+ if(eval_lsb(a.bits()) != cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count - 1)
+ {
+ eval_left_shift(dt, 1);
+ eval_decrement(dt);
+ res.exponent() = b.exponent() - 1;
+ }
+ else
+ res.exponent() = b.exponent();
+ }
else
res.exponent() = b.exponent();
s = !s;
@@ -1232,50 +1342,77 @@ inline void eval_convert_to(boost::ulong_long_type *res, const cpp_bin_float<Dig
template <class Float, unsigned Digits, digit_base_type DigitBase, class Allocator, class Exponent, Exponent MinE, Exponent MaxE>
inline typename boost::enable_if_c<boost::is_float<Float>::value>::type eval_convert_to(Float *res, const cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE> &original_arg)
{
- //
- // Perform rounding first, then afterwards extract the digits:
- //
typedef cpp_bin_float<std::numeric_limits<Float>::digits, digit_base_2, void, Exponent, MinE, MaxE> conv_type;
typedef typename common_type<typename conv_type::exponent_type, int>::type common_exp_type;
- conv_type arg(original_arg);
- switch(arg.exponent())
+ //
+ // Special cases first:
+ //
+ switch(original_arg.exponent())
{
- case conv_type::exponent_zero:
+ case cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::exponent_zero:
*res = 0;
- if(arg.sign())
+ if(original_arg.sign())
*res = -*res;
return;
- case conv_type::exponent_nan:
+ case cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::exponent_nan:
*res = std::numeric_limits<Float>::quiet_NaN();
return;
- case conv_type::exponent_infinity:
+ case cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::exponent_infinity:
*res = (std::numeric_limits<Float>::infinity)();
- if(arg.sign())
+ if(original_arg.sign())
*res = -*res;
return;
}
- common_exp_type e = arg.exponent();
- static const common_exp_type min_exp_limit = std::numeric_limits<Float>::min_exponent
- - (common_exp_type)cpp_bin_float<std::numeric_limits<Float>::digits, digit_base_2, void, Exponent, MinE, MaxE>::bit_count - std::numeric_limits<Float>::digits - 2;
- e -= cpp_bin_float<std::numeric_limits<Float>::digits, digit_base_2, void, Exponent, MinE, MaxE>::bit_count - 1;
- if(e < min_exp_limit)
+ //
+ // Check for super large exponent that must be converted to infinity:
+ //
+ if(original_arg.exponent() > std::numeric_limits<Float>::max_exponent)
{
- *res = 0;
+ *res = std::numeric_limits<Float>::has_infinity ? std::numeric_limits<Float>::infinity() : (std::numeric_limits<Float>::max)();
+ if(original_arg.sign())
+ *res = -*res;
return;
}
- if(e > std::numeric_limits<Float>::max_exponent)
+ //
+ // Figure out how many digits we will have in our result,
+ // allowing for a possibly denormalized result:
+ //
+ common_exp_type digits_to_round_to = std::numeric_limits<Float>::digits;
+ if(original_arg.exponent() < std::numeric_limits<Float>::min_exponent - 1)
{
- *res = std::numeric_limits<Float>::has_infinity ? std::numeric_limits<Float>::infinity() : (std::numeric_limits<Float>::max)();
- return;
+ common_exp_type diff = original_arg.exponent();
+ diff -= std::numeric_limits<Float>::min_exponent - 1;
+ digits_to_round_to += diff;
}
-
- *res = std::ldexp(static_cast<Float>(*arg.bits().limbs()), static_cast<int>(e));
- for(unsigned i = 1; i < arg.bits().size(); ++i)
+ if(digits_to_round_to < 0)
{
+ // Result must be zero:
+ *res = 0;
+ if(original_arg.sign())
+ *res = -*res;
+ return;
+ }
+ //
+ // Perform rounding first, then afterwards extract the digits:
+ //
+ cpp_bin_float<std::numeric_limits<Float>::digits, digit_base_2, Allocator, Exponent, MinE, MaxE> arg;
+ typename cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::rep_type bits(original_arg.bits());
+ arg.exponent() = original_arg.exponent();
+ copy_and_round(arg, bits, (int)digits_to_round_to);
+ common_exp_type e = arg.exponent();
+ e -= cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::bit_count - 1;
+ static const unsigned limbs_needed = std::numeric_limits<Float>::digits / (sizeof(*arg.bits().limbs()) * CHAR_BIT)
+ + (std::numeric_limits<Float>::digits % (sizeof(*arg.bits().limbs()) * CHAR_BIT) ? 1 : 0);
+ unsigned first_limb_needed = arg.bits().size() - limbs_needed;
+ *res = 0;
+ e += first_limb_needed * sizeof(*arg.bits().limbs()) * CHAR_BIT;
+ while(first_limb_needed < arg.bits().size())
+ {
+ *res += std::ldexp(static_cast<Float>(arg.bits().limbs()[first_limb_needed]), static_cast<int>(e));
+ ++first_limb_needed;
e += sizeof(*arg.bits().limbs()) * CHAR_BIT;
- *res += std::ldexp(static_cast<Float>(arg.bits().limbs()[i]), static_cast<int>(e));
}
- if(arg.sign())
+ if(original_arg.sign())
*res = -*res;
}
@@ -1401,13 +1538,18 @@ inline void eval_sqrt(cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE
using default_ops::eval_increment;
switch(arg.exponent())
{
- case cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::exponent_zero:
case cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::exponent_nan:
+ errno = EDOM;
+ // fallthrough...
+ case cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::exponent_zero:
res = arg;
return;
case cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::exponent_infinity:
if(arg.sign())
+ {
res = std::numeric_limits<number<cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE> > >::quiet_NaN().backend();
+ errno = EDOM;
+ }
else
res = arg;
return;
@@ -1415,6 +1557,7 @@ inline void eval_sqrt(cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE
if(arg.sign())
{
res = std::numeric_limits<number<cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE> > >::quiet_NaN().backend();
+ errno = EDOM;
return;
}
@@ -1443,8 +1586,10 @@ inline void eval_floor(cpp_bin_float<Digits, DigitBase, Allocator, Exponent, Min
using default_ops::eval_increment;
switch(arg.exponent())
{
- case cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::exponent_zero:
case cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::exponent_nan:
+ errno = EDOM;
+ // fallthrough...
+ case cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::exponent_zero:
case cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::exponent_infinity:
res = arg;
return;
@@ -1484,9 +1629,11 @@ inline void eval_ceil(cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE
using default_ops::eval_increment;
switch(arg.exponent())
{
+ case cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::exponent_infinity:
+ errno = EDOM;
+ // fallthrough...
case cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::exponent_zero:
case cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::exponent_nan:
- case cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>::exponent_infinity:
res = arg;
return;
}
@@ -1521,6 +1668,12 @@ inline void eval_ceil(cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE
}
template<unsigned D1, backends::digit_base_type B1, class A1, class E1, E1 M1, E1 M2>
+int eval_signbit(const cpp_bin_float<D1, B1, A1, E1, M1, M2>& val)
+{
+ return val.sign();
+}
+
+template<unsigned D1, backends::digit_base_type B1, class A1, class E1, E1 M1, E1 M2>
inline std::size_t hash_value(const cpp_bin_float<D1, B1, A1, E1, M1, M2>& val)
{
std::size_t result = hash_value(val.bits());
@@ -1545,12 +1698,6 @@ struct is_explicitly_convertible<FloatT, backends::cpp_bin_float<D2, B2, A2, E2,
#endif
template<unsigned Digits, boost::multiprecision::backends::digit_base_type DigitBase, class Exponent, Exponent MinE, Exponent MaxE, class Allocator, boost::multiprecision::expression_template_option ExpressionTemplates>
-inline int signbit BOOST_PREVENT_MACRO_SUBSTITUTION(const boost::multiprecision::number<boost::multiprecision::backends::cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>, ExpressionTemplates>& arg)
-{
- return arg.backend().sign();
-}
-
-template<unsigned Digits, boost::multiprecision::backends::digit_base_type DigitBase, class Exponent, Exponent MinE, Exponent MaxE, class Allocator, boost::multiprecision::expression_template_option ExpressionTemplates>
inline boost::multiprecision::number<boost::multiprecision::backends::cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>, ExpressionTemplates>
copysign BOOST_PREVENT_MACRO_SUBSTITUTION(
const boost::multiprecision::number<boost::multiprecision::backends::cpp_bin_float<Digits, DigitBase, Allocator, Exponent, MinE, MaxE>, ExpressionTemplates>& a,