1 files changed, 29 insertions, 17 deletions
diff --git a/boost/fiber/detail/spinlock_ttas_adaptive.hpp b/boost/fiber/detail/spinlock_ttas_adaptive.hpp
index da044b6298..d1f8b73cf3 100644
--- a/boost/fiber/detail/spinlock_ttas_adaptive.hpp
+++ b/boost/fiber/detail/spinlock_ttas_adaptive.hpp
@@ -15,6 +15,7 @@
 
 #include <boost/fiber/detail/config.hpp>
 #include <boost/fiber/detail/cpu_relax.hpp>
+#include <boost/fiber/detail/spinlock_status.hpp>
 
 // based on informations from:
 // https://software.intel.com/en-us/articles/benefitting-power-and-performance-sleep-loops
@@ -26,26 +27,28 @@ namespace detail {
 
 class spinlock_ttas_adaptive {
 private:
-    enum class spinlock_status {
-        locked = 0,
-        unlocked
-    };
+    template< typename FBSplk >
+    friend class spinlock_rtm;
 
-    std::atomic< spinlock_status >  state_{ spinlock_status::unlocked };
-    std::atomic< std::size_t >      tests_{ 0 };
+    std::atomic< spinlock_status >              state_{ spinlock_status::unlocked };
+    std::atomic< std::size_t >                  retries_{ 0 };
 
 public:
-    spinlock_ttas_adaptive() noexcept = default;
+    spinlock_ttas_adaptive() = default;
 
     spinlock_ttas_adaptive( spinlock_ttas_adaptive const&) = delete;
     spinlock_ttas_adaptive & operator=( spinlock_ttas_adaptive const&) = delete;
 
     void lock() noexcept {
+        static thread_local std::minstd_rand generator{ std::random_device{}() };
         std::size_t collisions = 0 ;
         for (;;) {
-            std::size_t tests = 0;
-            const std::size_t prev_tests = tests_.load( std::memory_order_relaxed);
-            const std::size_t max_tests = (std::min)( static_cast< std::size_t >( BOOST_FIBERS_SPIN_MAX_TESTS), 2 * prev_tests + 10);
+            std::size_t retries = 0;
+            const std::size_t prev_retries = retries_.load( std::memory_order_relaxed);
+            const std::size_t max_relax_retries = (std::min)(
+                    static_cast< std::size_t >( BOOST_FIBERS_SPIN_BEFORE_SLEEP0), 2 * prev_retries + 10);
+            const std::size_t max_sleep_retries = (std::min)(
+                    static_cast< std::size_t >( BOOST_FIBERS_SPIN_BEFORE_YIELD), 2 * prev_retries + 10);
             // avoid using multiple pause instructions for a delay of a specific cycle count
             // the delay of cpu_relax() (pause on Intel) depends on the processor family
             // the cycle count can not guaranteed from one system to the next
@@ -58,22 +61,27 @@ public:
             // cached 'state_' is invalidated -> cache miss
             while ( spinlock_status::locked == state_.load( std::memory_order_relaxed) ) {
 #if !defined(BOOST_FIBERS_SPIN_SINGLE_CORE)
-                if ( max_tests > tests) {
-                    ++tests;
+                if ( max_relax_retries > retries) {
+                    ++retries;
                     // give CPU a hint that this thread is in a "spin-wait" loop
                     // delays the next instruction's execution for a finite period of time (depends on processor family)
                     // the CPU is not under demand, parts of the pipeline are no longer being used
                     // -> reduces the power consumed by the CPU
                     // -> prevent pipeline stalls
                     cpu_relax();
-                } else {
-                    ++tests;
+                } else if ( max_sleep_retries > retries) {
+                    ++retries;
                     // std::this_thread::sleep_for( 0us) has a fairly long instruction path length,
                     // combined with an expensive ring3 to ring 0 transition costing about 1000 cycles
                     // std::this_thread::sleep_for( 0us) lets give up this_thread the remaining part of its time slice
                     // if and only if a thread of equal or greater priority is ready to run
                     static constexpr std::chrono::microseconds us0{ 0 };
                     std::this_thread::sleep_for( us0);
+                } else {
+                    // std::this_thread::yield() allows this_thread to give up the remaining part of its time slice,
+                    // but only to another thread on the same processor
+                    // instead of constant checking, a thread only checks if no other useful work is pending
+                    std::this_thread::yield();
                 }
 #else
                 std::this_thread::yield();
@@ -85,8 +93,8 @@ public:
                 // spinlock now contended
                 // utilize 'Binary Exponential Backoff' algorithm
                 // linear_congruential_engine is a random number engine based on Linear congruential generator (LCG)
-                static thread_local std::minstd_rand generator;
-                static std::uniform_int_distribution< std::size_t > distribution{ 0, static_cast< std::size_t >( 1) << collisions };
+                std::uniform_int_distribution< std::size_t > distribution{
+                    0, static_cast< std::size_t >( 1) << (std::min)(collisions, static_cast< std::size_t >( BOOST_FIBERS_CONTENTION_WINDOW_THRESHOLD)) };
                 const std::size_t z = distribution( generator);
                 ++collisions;
                 for ( std::size_t i = 0; i < z; ++i) {
@@ -95,13 +103,17 @@ public:
                     cpu_relax();
                 }
             } else {
-                tests_.store( prev_tests + (tests - prev_tests) / 8, std::memory_order_relaxed);
+                retries_.store( prev_retries + (retries - prev_retries) / 8, std::memory_order_relaxed);
                 // success, thread has acquired the lock
                 break;
             }
         }
     }
 
+    bool try_lock() noexcept {
+        return spinlock_status::unlocked == state_.exchange( spinlock_status::locked, std::memory_order_acquire);
+    }
+
     void unlock() noexcept {
         state_.store( spinlock_status::unlocked, std::memory_order_release);
     }