// (C) Copyright John Maddock 2008. // Use, modification and distribution are subject to the // Boost Software License, Version 1.0. (See accompanying file // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) #ifndef BOOST_MATH_SPECIAL_NEXT_HPP #define BOOST_MATH_SPECIAL_NEXT_HPP #ifdef _MSC_VER #pragma once #endif #include #include #include #include #include #include #if !defined(_CRAYC) && !defined(__CUDACC__) && (!defined(__GNUC__) || (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ > 3))) #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(__SSE2__) #include "xmmintrin.h" #define BOOST_MATH_CHECK_SSE2 #endif #endif namespace boost{ namespace math{ namespace concepts { class real_concept; struct std_real_concept; } namespace detail{ template struct has_hidden_guard_digits; template <> struct has_hidden_guard_digits : public mpl::false_ {}; template <> struct has_hidden_guard_digits : public mpl::false_ {}; template <> struct has_hidden_guard_digits : public mpl::false_ {}; #ifdef BOOST_HAS_FLOAT128 template <> struct has_hidden_guard_digits<__float128> : public mpl::false_ {}; #endif template <> struct has_hidden_guard_digits : public mpl::false_ {}; template <> struct has_hidden_guard_digits : public mpl::false_ {}; template struct has_hidden_guard_digits_10 : public mpl::false_ {}; template struct has_hidden_guard_digits_10 : public mpl::bool_<(std::numeric_limits::digits10 != std::numeric_limits::max_digits10)> {}; template struct has_hidden_guard_digits : public has_hidden_guard_digits_10::is_specialized && (std::numeric_limits::radix == 10) > {}; template inline const T& normalize_value(const T& val, const mpl::false_&) { return val; } template inline T normalize_value(const T& val, const mpl::true_&) { BOOST_STATIC_ASSERT(std::numeric_limits::is_specialized); BOOST_STATIC_ASSERT(std::numeric_limits::radix != 2); boost::intmax_t shift = std::numeric_limits::digits - ilogb(val) - 1; T result = scalbn(val, shift); result = round(result); return scalbn(result, -shift); } template inline T get_smallest_value(mpl::true_ const&) { // // numeric_limits lies about denorms being present - particularly // when this can be turned on or off at runtime, as is the case // when using the SSE2 registers in DAZ or FTZ mode. // static const T m = std::numeric_limits::denorm_min(); #ifdef BOOST_MATH_CHECK_SSE2 return (_mm_getcsr() & (_MM_FLUSH_ZERO_ON | 0x40)) ? tools::min_value() : m;; #else return ((tools::min_value() / 2) == 0) ? tools::min_value() : m; #endif } template inline T get_smallest_value(mpl::false_ const&) { return tools::min_value(); } template inline T get_smallest_value() { #if defined(BOOST_MSVC) && (BOOST_MSVC <= 1310) return get_smallest_value(mpl::bool_::is_specialized && (std::numeric_limits::has_denorm == 1)>()); #else return get_smallest_value(mpl::bool_::is_specialized && (std::numeric_limits::has_denorm == std::denorm_present)>()); #endif } // // Returns the smallest value that won't generate denorms when // we calculate the value of the least-significant-bit: // template T get_min_shift_value(); template struct min_shift_initializer { struct init { init() { do_init(); } static void do_init() { get_min_shift_value(); } void force_instantiate()const{} }; static const init initializer; static void force_instantiate() { initializer.force_instantiate(); } }; template const typename min_shift_initializer::init min_shift_initializer::initializer; template inline T calc_min_shifted(const mpl::true_&) { BOOST_MATH_STD_USING return ldexp(tools::min_value(), tools::digits() + 1); } template inline T calc_min_shifted(const mpl::false_&) { BOOST_STATIC_ASSERT(std::numeric_limits::is_specialized); BOOST_STATIC_ASSERT(std::numeric_limits::radix != 2); return scalbn(tools::min_value(), std::numeric_limits::digits + 1); } template inline T get_min_shift_value() { static const T val = calc_min_shifted(mpl::bool_::is_specialized || std::numeric_limits::radix == 2>()); min_shift_initializer::force_instantiate(); return val; } template T float_next_imp(const T& val, const mpl::true_&, const Policy& pol) { BOOST_MATH_STD_USING int expon; static const char* function = "float_next<%1%>(%1%)"; int fpclass = (boost::math::fpclassify)(val); if((fpclass == (int)FP_NAN) || (fpclass == (int)FP_INFINITE)) { if(val < 0) return -tools::max_value(); return policies::raise_domain_error( function, "Argument must be finite, but got %1%", val, pol); } if(val >= tools::max_value()) return policies::raise_overflow_error(function, 0, pol); if(val == 0) return detail::get_smallest_value(); if((fpclass != (int)FP_SUBNORMAL) && (fpclass != (int)FP_ZERO) && (fabs(val) < detail::get_min_shift_value()) && (val != -tools::min_value())) { // // Special case: if the value of the least significant bit is a denorm, and the result // would not be a denorm, then shift the input, increment, and shift back. // This avoids issues with the Intel SSE2 registers when the FTZ or DAZ flags are set. // return ldexp(float_next(T(ldexp(val, 2 * tools::digits())), pol), -2 * tools::digits()); } if(-0.5f == frexp(val, &expon)) --expon; // reduce exponent when val is a power of two, and negative. T diff = ldexp(T(1), expon - tools::digits()); if(diff == 0) diff = detail::get_smallest_value(); return val + diff; } // float_next_imp // // Special version for some base other than 2: // template T float_next_imp(const T& val, const mpl::false_&, const Policy& pol) { BOOST_STATIC_ASSERT(std::numeric_limits::is_specialized); BOOST_STATIC_ASSERT(std::numeric_limits::radix != 2); BOOST_MATH_STD_USING boost::intmax_t expon; static const char* function = "float_next<%1%>(%1%)"; int fpclass = (boost::math::fpclassify)(val); if((fpclass == (int)FP_NAN) || (fpclass == (int)FP_INFINITE)) { if(val < 0) return -tools::max_value(); return policies::raise_domain_error( function, "Argument must be finite, but got %1%", val, pol); } if(val >= tools::max_value()) return policies::raise_overflow_error(function, 0, pol); if(val == 0) return detail::get_smallest_value(); if((fpclass != (int)FP_SUBNORMAL) && (fpclass != (int)FP_ZERO) && (fabs(val) < detail::get_min_shift_value()) && (val != -tools::min_value())) { // // Special case: if the value of the least significant bit is a denorm, and the result // would not be a denorm, then shift the input, increment, and shift back. // This avoids issues with the Intel SSE2 registers when the FTZ or DAZ flags are set. // return scalbn(float_next(T(scalbn(val, 2 * std::numeric_limits::digits)), pol), -2 * std::numeric_limits::digits); } expon = 1 + ilogb(val); if(-1 == scalbn(val, -expon) * std::numeric_limits::radix) --expon; // reduce exponent when val is a power of base, and negative. T diff = scalbn(T(1), expon - std::numeric_limits::digits); if(diff == 0) diff = detail::get_smallest_value(); return val + diff; } // float_next_imp } // namespace detail template inline typename tools::promote_args::type float_next(const T& val, const Policy& pol) { typedef typename tools::promote_args::type result_type; return detail::float_next_imp(detail::normalize_value(static_cast(val), typename detail::has_hidden_guard_digits::type()), mpl::bool_::is_specialized || (std::numeric_limits::radix == 2)>(), pol); } #if 0 //def BOOST_MSVC // // We used to use ::_nextafter here, but doing so fails when using // the SSE2 registers if the FTZ or DAZ flags are set, so use our own // - albeit slower - code instead as at least that gives the correct answer. // template inline double float_next(const double& val, const Policy& pol) { static const char* function = "float_next<%1%>(%1%)"; if(!(boost::math::isfinite)(val) && (val > 0)) return policies::raise_domain_error( function, "Argument must be finite, but got %1%", val, pol); if(val >= tools::max_value()) return policies::raise_overflow_error(function, 0, pol); return ::_nextafter(val, tools::max_value()); } #endif template inline typename tools::promote_args::type float_next(const T& val) { return float_next(val, policies::policy<>()); } namespace detail{ template T float_prior_imp(const T& val, const mpl::true_&, const Policy& pol) { BOOST_MATH_STD_USING int expon; static const char* function = "float_prior<%1%>(%1%)"; int fpclass = (boost::math::fpclassify)(val); if((fpclass == (int)FP_NAN) || (fpclass == (int)FP_INFINITE)) { if(val > 0) return tools::max_value(); return policies::raise_domain_error( function, "Argument must be finite, but got %1%", val, pol); } if(val <= -tools::max_value()) return -policies::raise_overflow_error(function, 0, pol); if(val == 0) return -detail::get_smallest_value(); if((fpclass != (int)FP_SUBNORMAL) && (fpclass != (int)FP_ZERO) && (fabs(val) < detail::get_min_shift_value()) && (val != tools::min_value())) { // // Special case: if the value of the least significant bit is a denorm, and the result // would not be a denorm, then shift the input, increment, and shift back. // This avoids issues with the Intel SSE2 registers when the FTZ or DAZ flags are set. // return ldexp(float_prior(T(ldexp(val, 2 * tools::digits())), pol), -2 * tools::digits()); } T remain = frexp(val, &expon); if(remain == 0.5f) --expon; // when val is a power of two we must reduce the exponent T diff = ldexp(T(1), expon - tools::digits()); if(diff == 0) diff = detail::get_smallest_value(); return val - diff; } // float_prior_imp // // Special version for bases other than 2: // template T float_prior_imp(const T& val, const mpl::false_&, const Policy& pol) { BOOST_STATIC_ASSERT(std::numeric_limits::is_specialized); BOOST_STATIC_ASSERT(std::numeric_limits::radix != 2); BOOST_MATH_STD_USING boost::intmax_t expon; static const char* function = "float_prior<%1%>(%1%)"; int fpclass = (boost::math::fpclassify)(val); if((fpclass == (int)FP_NAN) || (fpclass == (int)FP_INFINITE)) { if(val > 0) return tools::max_value(); return policies::raise_domain_error( function, "Argument must be finite, but got %1%", val, pol); } if(val <= -tools::max_value()) return -policies::raise_overflow_error(function, 0, pol); if(val == 0) return -detail::get_smallest_value(); if((fpclass != (int)FP_SUBNORMAL) && (fpclass != (int)FP_ZERO) && (fabs(val) < detail::get_min_shift_value()) && (val != tools::min_value())) { // // Special case: if the value of the least significant bit is a denorm, and the result // would not be a denorm, then shift the input, increment, and shift back. // This avoids issues with the Intel SSE2 registers when the FTZ or DAZ flags are set. // return scalbn(float_prior(T(scalbn(val, 2 * std::numeric_limits::digits)), pol), -2 * std::numeric_limits::digits); } expon = 1 + ilogb(val); T remain = scalbn(val, -expon); if(remain * std::numeric_limits::radix == 1) --expon; // when val is a power of two we must reduce the exponent T diff = scalbn(T(1), expon - std::numeric_limits::digits); if(diff == 0) diff = detail::get_smallest_value(); return val - diff; } // float_prior_imp } // namespace detail template inline typename tools::promote_args::type float_prior(const T& val, const Policy& pol) { typedef typename tools::promote_args::type result_type; return detail::float_prior_imp(detail::normalize_value(static_cast(val), typename detail::has_hidden_guard_digits::type()), mpl::bool_::is_specialized || (std::numeric_limits::radix == 2)>(), pol); } #if 0 //def BOOST_MSVC // // We used to use ::_nextafter here, but doing so fails when using // the SSE2 registers if the FTZ or DAZ flags are set, so use our own // - albeit slower - code instead as at least that gives the correct answer. // template inline double float_prior(const double& val, const Policy& pol) { static const char* function = "float_prior<%1%>(%1%)"; if(!(boost::math::isfinite)(val) && (val < 0)) return policies::raise_domain_error( function, "Argument must be finite, but got %1%", val, pol); if(val <= -tools::max_value()) return -policies::raise_overflow_error(function, 0, pol); return ::_nextafter(val, -tools::max_value()); } #endif template inline typename tools::promote_args::type float_prior(const T& val) { return float_prior(val, policies::policy<>()); } template inline typename tools::promote_args::type nextafter(const T& val, const U& direction, const Policy& pol) { typedef typename tools::promote_args::type result_type; return val < direction ? boost::math::float_next(val, pol) : val == direction ? val : boost::math::float_prior(val, pol); } template inline typename tools::promote_args::type nextafter(const T& val, const U& direction) { return nextafter(val, direction, policies::policy<>()); } namespace detail{ template T float_distance_imp(const T& a, const T& b, const mpl::true_&, const Policy& pol) { BOOST_MATH_STD_USING // // Error handling: // static const char* function = "float_distance<%1%>(%1%, %1%)"; if(!(boost::math::isfinite)(a)) return policies::raise_domain_error( function, "Argument a must be finite, but got %1%", a, pol); if(!(boost::math::isfinite)(b)) return policies::raise_domain_error( function, "Argument b must be finite, but got %1%", b, pol); // // Special cases: // if(a > b) return -float_distance(b, a, pol); if(a == b) return T(0); if(a == 0) return 1 + fabs(float_distance(static_cast((b < 0) ? T(-detail::get_smallest_value()) : detail::get_smallest_value()), b, pol)); if(b == 0) return 1 + fabs(float_distance(static_cast((a < 0) ? T(-detail::get_smallest_value()) : detail::get_smallest_value()), a, pol)); if(boost::math::sign(a) != boost::math::sign(b)) return 2 + fabs(float_distance(static_cast((b < 0) ? T(-detail::get_smallest_value()) : detail::get_smallest_value()), b, pol)) + fabs(float_distance(static_cast((a < 0) ? T(-detail::get_smallest_value()) : detail::get_smallest_value()), a, pol)); // // By the time we get here, both a and b must have the same sign, we want // b > a and both postive for the following logic: // if(a < 0) return float_distance(static_cast(-b), static_cast(-a), pol); BOOST_ASSERT(a >= 0); BOOST_ASSERT(b >= a); int expon; // // Note that if a is a denorm then the usual formula fails // because we actually have fewer than tools::digits() // significant bits in the representation: // frexp(((boost::math::fpclassify)(a) == (int)FP_SUBNORMAL) ? tools::min_value() : a, &expon); T upper = ldexp(T(1), expon); T result = T(0); // // If b is greater than upper, then we *must* split the calculation // as the size of the ULP changes with each order of magnitude change: // if(b > upper) { int expon2; frexp(b, &expon2); T upper2 = ldexp(T(0.5), expon2); result = float_distance(upper2, b); result += (expon2 - expon - 1) * ldexp(T(1), tools::digits() - 1); } // // Use compensated double-double addition to avoid rounding // errors in the subtraction: // expon = tools::digits() - expon; T mb, x, y, z; if(((boost::math::fpclassify)(a) == (int)FP_SUBNORMAL) || (b - a < tools::min_value())) { // // Special case - either one end of the range is a denormal, or else the difference is. // The regular code will fail if we're using the SSE2 registers on Intel and either // the FTZ or DAZ flags are set. // T a2 = ldexp(a, tools::digits()); T b2 = ldexp(b, tools::digits()); mb = -(std::min)(T(ldexp(upper, tools::digits())), b2); x = a2 + mb; z = x - a2; y = (a2 - (x - z)) + (mb - z); expon -= tools::digits(); } else { mb = -(std::min)(upper, b); x = a + mb; z = x - a; y = (a - (x - z)) + (mb - z); } if(x < 0) { x = -x; y = -y; } result += ldexp(x, expon) + ldexp(y, expon); // // Result must be an integer: // BOOST_ASSERT(result == floor(result)); return result; } // float_distance_imp // // Special versions for bases other than 2: // template T float_distance_imp(const T& a, const T& b, const mpl::false_&, const Policy& pol) { BOOST_STATIC_ASSERT(std::numeric_limits::is_specialized); BOOST_STATIC_ASSERT(std::numeric_limits::radix != 2); BOOST_MATH_STD_USING // // Error handling: // static const char* function = "float_distance<%1%>(%1%, %1%)"; if(!(boost::math::isfinite)(a)) return policies::raise_domain_error( function, "Argument a must be finite, but got %1%", a, pol); if(!(boost::math::isfinite)(b)) return policies::raise_domain_error( function, "Argument b must be finite, but got %1%", b, pol); // // Special cases: // if(a > b) return -float_distance(b, a, pol); if(a == b) return T(0); if(a == 0) return 1 + fabs(float_distance(static_cast((b < 0) ? T(-detail::get_smallest_value()) : detail::get_smallest_value()), b, pol)); if(b == 0) return 1 + fabs(float_distance(static_cast((a < 0) ? T(-detail::get_smallest_value()) : detail::get_smallest_value()), a, pol)); if(boost::math::sign(a) != boost::math::sign(b)) return 2 + fabs(float_distance(static_cast((b < 0) ? T(-detail::get_smallest_value()) : detail::get_smallest_value()), b, pol)) + fabs(float_distance(static_cast((a < 0) ? T(-detail::get_smallest_value()) : detail::get_smallest_value()), a, pol)); // // By the time we get here, both a and b must have the same sign, we want // b > a and both postive for the following logic: // if(a < 0) return float_distance(static_cast(-b), static_cast(-a), pol); BOOST_ASSERT(a >= 0); BOOST_ASSERT(b >= a); boost::intmax_t expon; // // Note that if a is a denorm then the usual formula fails // because we actually have fewer than tools::digits() // significant bits in the representation: // expon = 1 + ilogb(((boost::math::fpclassify)(a) == (int)FP_SUBNORMAL) ? tools::min_value() : a); T upper = scalbn(T(1), expon); T result = T(0); // // If b is greater than upper, then we *must* split the calculation // as the size of the ULP changes with each order of magnitude change: // if(b > upper) { boost::intmax_t expon2 = 1 + ilogb(b); T upper2 = scalbn(T(1), expon2 - 1); result = float_distance(upper2, b); result += (expon2 - expon - 1) * scalbn(T(1), std::numeric_limits::digits - 1); } // // Use compensated double-double addition to avoid rounding // errors in the subtraction: // expon = std::numeric_limits::digits - expon; T mb, x, y, z; if(((boost::math::fpclassify)(a) == (int)FP_SUBNORMAL) || (b - a < tools::min_value())) { // // Special case - either one end of the range is a denormal, or else the difference is. // The regular code will fail if we're using the SSE2 registers on Intel and either // the FTZ or DAZ flags are set. // T a2 = scalbn(a, std::numeric_limits::digits); T b2 = scalbn(b, std::numeric_limits::digits); mb = -(std::min)(T(scalbn(upper, std::numeric_limits::digits)), b2); x = a2 + mb; z = x - a2; y = (a2 - (x - z)) + (mb - z); expon -= std::numeric_limits::digits; } else { mb = -(std::min)(upper, b); x = a + mb; z = x - a; y = (a - (x - z)) + (mb - z); } if(x < 0) { x = -x; y = -y; } result += scalbn(x, expon) + scalbn(y, expon); // // Result must be an integer: // BOOST_ASSERT(result == floor(result)); return result; } // float_distance_imp } // namespace detail template inline typename tools::promote_args::type float_distance(const T& a, const U& b, const Policy& pol) { typedef typename tools::promote_args::type result_type; return detail::float_distance_imp(detail::normalize_value(static_cast(a), typename detail::has_hidden_guard_digits::type()), detail::normalize_value(static_cast(b), typename detail::has_hidden_guard_digits::type()), mpl::bool_::is_specialized || (std::numeric_limits::radix == 2)>(), pol); } template typename tools::promote_args::type float_distance(const T& a, const U& b) { return boost::math::float_distance(a, b, policies::policy<>()); } namespace detail{ template T float_advance_imp(T val, int distance, const mpl::true_&, const Policy& pol) { BOOST_MATH_STD_USING // // Error handling: // static const char* function = "float_advance<%1%>(%1%, int)"; int fpclass = (boost::math::fpclassify)(val); if((fpclass == (int)FP_NAN) || (fpclass == (int)FP_INFINITE)) return policies::raise_domain_error( function, "Argument val must be finite, but got %1%", val, pol); if(val < 0) return -float_advance(-val, -distance, pol); if(distance == 0) return val; if(distance == 1) return float_next(val, pol); if(distance == -1) return float_prior(val, pol); if(fabs(val) < detail::get_min_shift_value()) { // // Special case: if the value of the least significant bit is a denorm, // implement in terms of float_next/float_prior. // This avoids issues with the Intel SSE2 registers when the FTZ or DAZ flags are set. // if(distance > 0) { do{ val = float_next(val, pol); } while(--distance); } else { do{ val = float_prior(val, pol); } while(++distance); } return val; } int expon; frexp(val, &expon); T limit = ldexp((distance < 0 ? T(0.5f) : T(1)), expon); if(val <= tools::min_value()) { limit = sign(T(distance)) * tools::min_value(); } T limit_distance = float_distance(val, limit); while(fabs(limit_distance) < abs(distance)) { distance -= itrunc(limit_distance); val = limit; if(distance < 0) { limit /= 2; expon--; } else { limit *= 2; expon++; } limit_distance = float_distance(val, limit); if(distance && (limit_distance == 0)) { return policies::raise_evaluation_error(function, "Internal logic failed while trying to increment floating point value %1%: most likely your FPU is in non-IEEE conforming mode.", val, pol); } } if((0.5f == frexp(val, &expon)) && (distance < 0)) --expon; T diff = 0; if(val != 0) diff = distance * ldexp(T(1), expon - tools::digits()); if(diff == 0) diff = distance * detail::get_smallest_value(); return val += diff; } // float_advance_imp // // Special version for bases other than 2: // template T float_advance_imp(T val, int distance, const mpl::false_&, const Policy& pol) { BOOST_STATIC_ASSERT(std::numeric_limits::is_specialized); BOOST_STATIC_ASSERT(std::numeric_limits::radix != 2); BOOST_MATH_STD_USING // // Error handling: // static const char* function = "float_advance<%1%>(%1%, int)"; int fpclass = (boost::math::fpclassify)(val); if((fpclass == (int)FP_NAN) || (fpclass == (int)FP_INFINITE)) return policies::raise_domain_error( function, "Argument val must be finite, but got %1%", val, pol); if(val < 0) return -float_advance(-val, -distance, pol); if(distance == 0) return val; if(distance == 1) return float_next(val, pol); if(distance == -1) return float_prior(val, pol); if(fabs(val) < detail::get_min_shift_value()) { // // Special case: if the value of the least significant bit is a denorm, // implement in terms of float_next/float_prior. // This avoids issues with the Intel SSE2 registers when the FTZ or DAZ flags are set. // if(distance > 0) { do{ val = float_next(val, pol); } while(--distance); } else { do{ val = float_prior(val, pol); } while(++distance); } return val; } boost::intmax_t expon = 1 + ilogb(val); T limit = scalbn(T(1), distance < 0 ? expon - 1 : expon); if(val <= tools::min_value()) { limit = sign(T(distance)) * tools::min_value(); } T limit_distance = float_distance(val, limit); while(fabs(limit_distance) < abs(distance)) { distance -= itrunc(limit_distance); val = limit; if(distance < 0) { limit /= std::numeric_limits::radix; expon--; } else { limit *= std::numeric_limits::radix; expon++; } limit_distance = float_distance(val, limit); if(distance && (limit_distance == 0)) { return policies::raise_evaluation_error(function, "Internal logic failed while trying to increment floating point value %1%: most likely your FPU is in non-IEEE conforming mode.", val, pol); } } /*expon = 1 + ilogb(val); if((1 == scalbn(val, 1 + expon)) && (distance < 0)) --expon;*/ T diff = 0; if(val != 0) diff = distance * scalbn(T(1), expon - std::numeric_limits::digits); if(diff == 0) diff = distance * detail::get_smallest_value(); return val += diff; } // float_advance_imp } // namespace detail template inline typename tools::promote_args::type float_advance(T val, int distance, const Policy& pol) { typedef typename tools::promote_args::type result_type; return detail::float_advance_imp(detail::normalize_value(static_cast(val), typename detail::has_hidden_guard_digits::type()), distance, mpl::bool_::is_specialized || (std::numeric_limits::radix == 2)>(), pol); } template inline typename tools::promote_args::type float_advance(const T& val, int distance) { return boost::math::float_advance(val, distance, policies::policy<>()); } }} // boost math namespaces #endif // BOOST_MATH_SPECIAL_NEXT_HPP