From 4fadd968fa12130524c8380f33fcfe25d4de79e5 Mon Sep 17 00:00:00 2001
From: DongHun Kwak <dh0128.kwak@samsung.com>
Date: Wed, 13 Sep 2017 11:24:46 +0900
Subject: Imported Upstream version 1.65.0

Change-Id: Icf8400b375482cb11bcf77440a6934ba360d6ba4
Signed-off-by: DongHun Kwak <dh0128.kwak@samsung.com>
---
 boost/fiber/detail/spinlock_ttas_futex.hpp | 51 ++++++++++++++++++++++--------
 1 file changed, 37 insertions(+), 14 deletions(-)

(limited to 'boost/fiber/detail/spinlock_ttas_futex.hpp')
diff --git a/boost/fiber/detail/spinlock_ttas_futex.hpp b/boost/fiber/detail/spinlock_ttas_futex.hpp
index a427b73ba5..fd30c4120e 100644
--- a/boost/fiber/detail/spinlock_ttas_futex.hpp
+++ b/boost/fiber/detail/spinlock_ttas_futex.hpp
@@ -8,6 +8,7 @@
 #define BOOST_FIBERS_spinlock_ttas_futex_FUTEX_H
 
 #include <atomic>
+#include <cmath>
 #include <random>
 #include <thread>
 
@@ -25,18 +26,22 @@ namespace detail {
 
 class spinlock_ttas_futex {
 private:
-    std::atomic< std::int32_t > value_{ 0 };
+    template< typename FBSplk >
+    friend class spinlock_rtm;
+
+    std::atomic< std::int32_t >                 value_{ 0 };
 
 public:
-    spinlock_ttas_futex() noexcept = default;
+    spinlock_ttas_futex() = default;
 
     spinlock_ttas_futex( spinlock_ttas_futex const&) = delete;
     spinlock_ttas_futex & operator=( spinlock_ttas_futex const&) = delete;
 
     void lock() noexcept {
-        std::int32_t collisions = 0, tests = 0, expected = 0;
+        static thread_local std::minstd_rand generator{ std::random_device{}() };
+        std::int32_t collisions = 0, retries = 0, expected = 0;
         // after max. spins or collisions suspend via futex
-        while ( BOOST_FIBERS_SPIN_MAX_TESTS > tests && BOOST_FIBERS_SPIN_MAX_COLLISIONS > collisions) {
+        while ( retries++ < BOOST_FIBERS_RETRY_THRESHOLD) {
             // avoid using multiple pause instructions for a delay of a specific cycle count
             // the delay of cpu_relax() (pause on Intel) depends on the processor family
             // the cycle count can not guaranteed from one system to the next
@@ -48,26 +53,39 @@ public:
             // if 'value_' was released by other fiber
             // cached 'value_' is invalidated -> cache miss
             if ( 0 != ( expected = value_.load( std::memory_order_relaxed) ) ) {
-                ++tests;
 #if !defined(BOOST_FIBERS_SPIN_SINGLE_CORE)
-                // give CPU a hint that this thread is in a "spin-wait" loop
-                // delays the next instruction's execution for a finite period of time (depends on processor family)
-                // the CPU is not under demand, parts of the pipeline are no longer being used
-                // -> reduces the power consumed by the CPU
-                // -> prevent pipeline stalls
-                cpu_relax();
+                if ( BOOST_FIBERS_SPIN_BEFORE_SLEEP0 > retries) {
+                    // give CPU a hint that this thread is in a "spin-wait" loop
+                    // delays the next instruction's execution for a finite period of time (depends on processor family)
+                    // the CPU is not under demand, parts of the pipeline are no longer being used
+                    // -> reduces the power consumed by the CPU
+                    // -> prevent pipeline stalls
+                    cpu_relax();
+                } else if ( BOOST_FIBERS_SPIN_BEFORE_YIELD > retries) {
+                    // std::this_thread::sleep_for( 0us) has a fairly long instruction path length,
+                    // combined with an expensive ring3 to ring 0 transition costing about 1000 cycles
+                    // std::this_thread::sleep_for( 0us) lets give up this_thread the remaining part of its time slice
+                    // if and only if a thread of equal or greater priority is ready to run
+                    static constexpr std::chrono::microseconds us0{ 0 };
+                    std::this_thread::sleep_for( us0);
+                } else {
+                    // std::this_thread::yield() allows this_thread to give up the remaining part of its time slice,
+                    // but only to another thread on the same processor
+                    // instead of constant checking, a thread only checks if no other useful work is pending
+                    std::this_thread::yield();
+                }
 #else
                 // std::this_thread::yield() allows this_thread to give up the remaining part of its time slice,
                 // but only to another thread on the same processor
                 // instead of constant checking, a thread only checks if no other useful work is pending
                 std::this_thread::yield();
 #endif
-            } else if ( ! value_.compare_exchange_strong( expected, 1, std::memory_order_acquire, std::memory_order_release) ) {
+            } else if ( ! value_.compare_exchange_strong( expected, 1, std::memory_order_acquire) ) {
                 // spinlock now contended
                 // utilize 'Binary Exponential Backoff' algorithm
                 // linear_congruential_engine is a random number engine based on Linear congruential generator (LCG)
-                static thread_local std::minstd_rand generator;
-                static std::uniform_int_distribution< std::int32_t > distribution{ 0, static_cast< std::int32_t >( 1) << collisions };
+                std::uniform_int_distribution< std::int32_t > distribution{
+                    0, static_cast< std::int32_t >( 1) << (std::min)(collisions, static_cast< std::int32_t >( BOOST_FIBERS_CONTENTION_WINDOW_THRESHOLD)) };
                 const std::int32_t z = distribution( generator);
                 ++collisions;
                 for ( std::int32_t i = 0; i < z; ++i) {
@@ -91,6 +109,11 @@ public:
         }
     }
 
+    bool try_lock() noexcept {
+        std::int32_t expected = 0;
+        return value_.compare_exchange_strong( expected, 1, std::memory_order_acquire);
+    }
+
     void unlock() noexcept {
         if ( 1 != value_.fetch_sub( 1, std::memory_order_acquire) ) {
             value_.store( 0, std::memory_order_release);
-- 
cgit v1.2.3