summaryrefslogtreecommitdiff
path: root/boost/fiber/detail/spinlock_ttas_adaptive_futex.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'boost/fiber/detail/spinlock_ttas_adaptive_futex.hpp')
-rw-r--r--boost/fiber/detail/spinlock_ttas_adaptive_futex.hpp63
1 files changed, 44 insertions, 19 deletions
diff --git a/boost/fiber/detail/spinlock_ttas_adaptive_futex.hpp b/boost/fiber/detail/spinlock_ttas_adaptive_futex.hpp
index 61ab47691e..0f0b191e67 100644
--- a/boost/fiber/detail/spinlock_ttas_adaptive_futex.hpp
+++ b/boost/fiber/detail/spinlock_ttas_adaptive_futex.hpp
@@ -26,21 +26,28 @@ namespace detail {
class spinlock_ttas_adaptive_futex {
private:
- std::atomic< std::int32_t > value_{ 0 };
- std::atomic< std::int32_t > tests_{ 0 };
+ template< typename FBSplk >
+ friend class spinlock_rtm;
+
+ std::atomic< std::int32_t > value_{ 0 };
+ std::atomic< std::int32_t > retries_{ 0 };
public:
- spinlock_ttas_adaptive_futex() noexcept = default;
+ spinlock_ttas_adaptive_futex() = default;
spinlock_ttas_adaptive_futex( spinlock_ttas_adaptive_futex const&) = delete;
spinlock_ttas_adaptive_futex & operator=( spinlock_ttas_adaptive_futex const&) = delete;
void lock() noexcept {
- std::int32_t collisions = 0, tests = 0, expected = 0;
- const std::int32_t prev_tests = tests_.load( std::memory_order_relaxed);
- const std::int32_t max_tests = (std::min)( static_cast< std::int32_t >( BOOST_FIBERS_SPIN_MAX_TESTS), 2 * prev_tests + 10);
+ static thread_local std::minstd_rand generator{ std::random_device{}() };
+ std::int32_t collisions = 0, retries = 0, expected = 0;
+ const std::int32_t prev_retries = retries_.load( std::memory_order_relaxed);
+ const std::int32_t max_relax_retries = (std::min)(
+ static_cast< std::int32_t >( BOOST_FIBERS_SPIN_BEFORE_SLEEP0), 2 * prev_retries + 10);
+ const std::int32_t max_sleep_retries = (std::min)(
+ static_cast< std::int32_t >( BOOST_FIBERS_SPIN_BEFORE_YIELD), 2 * prev_retries + 10);
// after max. spins or collisions suspend via futex
- while ( max_tests > tests && BOOST_FIBERS_SPIN_MAX_COLLISIONS > collisions) {
+ while ( retries++ < BOOST_FIBERS_RETRY_THRESHOLD) {
// avoid using multiple pause instructions for a delay of a specific cycle count
// the delay of cpu_relax() (pause on Intel) depends on the processor family
// the cycle count can not guaranteed from one system to the next
@@ -52,26 +59,39 @@ public:
// if 'value_' was released by other fiber
// cached 'value_' is invalidated -> cache miss
if ( 0 != ( expected = value_.load( std::memory_order_relaxed) ) ) {
- ++tests;
#if !defined(BOOST_FIBERS_SPIN_SINGLE_CORE)
- // give CPU a hint that this thread is in a "spin-wait" loop
- // delays the next instruction's execution for a finite period of time (depends on processor family)
- // the CPU is not under demand, parts of the pipeline are no longer being used
- // -> reduces the power consumed by the CPU
- // -> prevent pipeline stalls
- cpu_relax();
+ if ( max_relax_retries > retries) {
+ // give CPU a hint that this thread is in a "spin-wait" loop
+ // delays the next instruction's execution for a finite period of time (depends on processor family)
+ // the CPU is not under demand, parts of the pipeline are no longer being used
+ // -> reduces the power consumed by the CPU
+ // -> prevent pipeline stalls
+ cpu_relax();
+ } else if ( max_sleep_retries > retries) {
+ // std::this_thread::sleep_for( 0us) has a fairly long instruction path length,
+ // combined with an expensive ring3 to ring 0 transition costing about 1000 cycles
+ // std::this_thread::sleep_for( 0us) lets give up this_thread the remaining part of its time slice
+ // if and only if a thread of equal or greater priority is ready to run
+ static constexpr std::chrono::microseconds us0{ 0 };
+ std::this_thread::sleep_for( us0);
+ } else {
+ // std::this_thread::yield() allows this_thread to give up the remaining part of its time slice,
+ // but only to another thread on the same processor
+ // instead of constant checking, a thread only checks if no other useful work is pending
+ std::this_thread::yield();
+ }
#else
// std::this_thread::yield() allows this_thread to give up the remaining part of its time slice,
// but only to another thread on the same processor
// instead of constant checking, a thread only checks if no other useful work is pending
std::this_thread::yield();
#endif
- } else if ( ! value_.compare_exchange_strong( expected, 1, std::memory_order_acquire, std::memory_order_release) ) {
+ } else if ( ! value_.compare_exchange_strong( expected, 1, std::memory_order_acquire) ) {
// spinlock now contended
// utilize 'Binary Exponential Backoff' algorithm
// linear_congruential_engine is a random number engine based on Linear congruential generator (LCG)
- static thread_local std::minstd_rand generator;
- static std::uniform_int_distribution< std::int32_t > distribution{ 0, static_cast< std::int32_t >( 1) << collisions };
+ std::uniform_int_distribution< std::int32_t > distribution{
+ 0, static_cast< std::int32_t >( 1) << (std::min)(collisions, static_cast< std::int32_t >( BOOST_FIBERS_CONTENTION_WINDOW_THRESHOLD)) };
const std::int32_t z = distribution( generator);
++collisions;
for ( std::int32_t i = 0; i < z; ++i) {
@@ -81,7 +101,7 @@ public:
}
} else {
// success, lock acquired
- tests_.store( prev_tests + (tests - prev_tests) / 8, std::memory_order_relaxed);
+ retries_.store( prev_retries + (retries - prev_retries) / 8, std::memory_order_relaxed);
return;
}
}
@@ -95,7 +115,12 @@ public:
expected = value_.exchange( 2, std::memory_order_acquire);
}
// success, lock acquired
- tests_.store( prev_tests + (tests - prev_tests) / 8, std::memory_order_relaxed);
+ retries_.store( prev_retries + (retries - prev_retries) / 8, std::memory_order_relaxed);
+ }
+
+ bool try_lock() noexcept {
+ std::int32_t expected = 0;
+ return value_.compare_exchange_strong( expected, 1, std::memory_order_acquire);
}
void unlock() noexcept {