summaryrefslogtreecommitdiff
path: root/boost/fiber/detail/spinlock_ttas_adaptive.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'boost/fiber/detail/spinlock_ttas_adaptive.hpp')
-rw-r--r--boost/fiber/detail/spinlock_ttas_adaptive.hpp46
1 files changed, 29 insertions, 17 deletions
diff --git a/boost/fiber/detail/spinlock_ttas_adaptive.hpp b/boost/fiber/detail/spinlock_ttas_adaptive.hpp
index da044b6298..d1f8b73cf3 100644
--- a/boost/fiber/detail/spinlock_ttas_adaptive.hpp
+++ b/boost/fiber/detail/spinlock_ttas_adaptive.hpp
@@ -15,6 +15,7 @@
#include <boost/fiber/detail/config.hpp>
#include <boost/fiber/detail/cpu_relax.hpp>
+#include <boost/fiber/detail/spinlock_status.hpp>
// based on informations from:
// https://software.intel.com/en-us/articles/benefitting-power-and-performance-sleep-loops
@@ -26,26 +27,28 @@ namespace detail {
class spinlock_ttas_adaptive {
private:
- enum class spinlock_status {
- locked = 0,
- unlocked
- };
+ template< typename FBSplk >
+ friend class spinlock_rtm;
- std::atomic< spinlock_status > state_{ spinlock_status::unlocked };
- std::atomic< std::size_t > tests_{ 0 };
+ std::atomic< spinlock_status > state_{ spinlock_status::unlocked };
+ std::atomic< std::size_t > retries_{ 0 };
public:
- spinlock_ttas_adaptive() noexcept = default;
+ spinlock_ttas_adaptive() = default;
spinlock_ttas_adaptive( spinlock_ttas_adaptive const&) = delete;
spinlock_ttas_adaptive & operator=( spinlock_ttas_adaptive const&) = delete;
void lock() noexcept {
+ static thread_local std::minstd_rand generator{ std::random_device{}() };
std::size_t collisions = 0 ;
for (;;) {
- std::size_t tests = 0;
- const std::size_t prev_tests = tests_.load( std::memory_order_relaxed);
- const std::size_t max_tests = (std::min)( static_cast< std::size_t >( BOOST_FIBERS_SPIN_MAX_TESTS), 2 * prev_tests + 10);
+ std::size_t retries = 0;
+ const std::size_t prev_retries = retries_.load( std::memory_order_relaxed);
+ const std::size_t max_relax_retries = (std::min)(
+ static_cast< std::size_t >( BOOST_FIBERS_SPIN_BEFORE_SLEEP0), 2 * prev_retries + 10);
+ const std::size_t max_sleep_retries = (std::min)(
+ static_cast< std::size_t >( BOOST_FIBERS_SPIN_BEFORE_YIELD), 2 * prev_retries + 10);
// avoid using multiple pause instructions for a delay of a specific cycle count
// the delay of cpu_relax() (pause on Intel) depends on the processor family
// the cycle count can not guaranteed from one system to the next
@@ -58,22 +61,27 @@ public:
// cached 'state_' is invalidated -> cache miss
while ( spinlock_status::locked == state_.load( std::memory_order_relaxed) ) {
#if !defined(BOOST_FIBERS_SPIN_SINGLE_CORE)
- if ( max_tests > tests) {
- ++tests;
+ if ( max_relax_retries > retries) {
+ ++retries;
// give CPU a hint that this thread is in a "spin-wait" loop
// delays the next instruction's execution for a finite period of time (depends on processor family)
// the CPU is not under demand, parts of the pipeline are no longer being used
// -> reduces the power consumed by the CPU
// -> prevent pipeline stalls
cpu_relax();
- } else {
- ++tests;
+ } else if ( max_sleep_retries > retries) {
+ ++retries;
// std::this_thread::sleep_for( 0us) has a fairly long instruction path length,
// combined with an expensive ring3 to ring 0 transition costing about 1000 cycles
// std::this_thread::sleep_for( 0us) lets give up this_thread the remaining part of its time slice
// if and only if a thread of equal or greater priority is ready to run
static constexpr std::chrono::microseconds us0{ 0 };
std::this_thread::sleep_for( us0);
+ } else {
+ // std::this_thread::yield() allows this_thread to give up the remaining part of its time slice,
+ // but only to another thread on the same processor
+ // instead of constant checking, a thread only checks if no other useful work is pending
+ std::this_thread::yield();
}
#else
std::this_thread::yield();
@@ -85,8 +93,8 @@ public:
// spinlock now contended
// utilize 'Binary Exponential Backoff' algorithm
// linear_congruential_engine is a random number engine based on Linear congruential generator (LCG)
- static thread_local std::minstd_rand generator;
- static std::uniform_int_distribution< std::size_t > distribution{ 0, static_cast< std::size_t >( 1) << collisions };
+ std::uniform_int_distribution< std::size_t > distribution{
+ 0, static_cast< std::size_t >( 1) << (std::min)(collisions, static_cast< std::size_t >( BOOST_FIBERS_CONTENTION_WINDOW_THRESHOLD)) };
const std::size_t z = distribution( generator);
++collisions;
for ( std::size_t i = 0; i < z; ++i) {
@@ -95,13 +103,17 @@ public:
cpu_relax();
}
} else {
- tests_.store( prev_tests + (tests - prev_tests) / 8, std::memory_order_relaxed);
+ retries_.store( prev_retries + (retries - prev_retries) / 8, std::memory_order_relaxed);
// success, thread has acquired the lock
break;
}
}
}
+ bool try_lock() noexcept {
+ return spinlock_status::unlocked == state_.exchange( spinlock_status::locked, std::memory_order_acquire);
+ }
+
void unlock() noexcept {
state_.store( spinlock_status::unlocked, std::memory_order_release);
}