summaryrefslogtreecommitdiff
path: root/boost/fiber/detail
diff options
context:
space:
mode:
Diffstat (limited to 'boost/fiber/detail')
-rw-r--r--boost/fiber/detail/config.hpp21
-rw-r--r--boost/fiber/detail/context_spinlock_queue.hpp2
-rw-r--r--boost/fiber/detail/context_spmc_queue.hpp10
-rw-r--r--boost/fiber/detail/convert.hpp16
-rw-r--r--boost/fiber/detail/cpu_relax.hpp2
-rw-r--r--boost/fiber/detail/data.hpp17
-rw-r--r--boost/fiber/detail/futex.hpp12
-rw-r--r--boost/fiber/detail/rtm.hpp94
-rw-r--r--boost/fiber/detail/spinlock.hpp29
-rw-r--r--boost/fiber/detail/spinlock_rtm.hpp126
-rw-r--r--boost/fiber/detail/spinlock_status.hpp21
-rw-r--r--boost/fiber/detail/spinlock_ttas.hpp43
-rw-r--r--boost/fiber/detail/spinlock_ttas_adaptive.hpp46
-rw-r--r--boost/fiber/detail/spinlock_ttas_adaptive_futex.hpp63
-rw-r--r--boost/fiber/detail/spinlock_ttas_futex.hpp51
-rw-r--r--boost/fiber/detail/wrap.hpp131
16 files changed, 420 insertions, 264 deletions
diff --git a/boost/fiber/detail/config.hpp b/boost/fiber/detail/config.hpp
index 7c7119e1fb..21dea693ac 100644
--- a/boost/fiber/detail/config.hpp
+++ b/boost/fiber/detail/config.hpp
@@ -47,19 +47,20 @@
# error "futex not supported on this platform"
#endif
-#if !defined(BOOST_FIBERS_SPIN_MAX_COLLISIONS)
-# define BOOST_FIBERS_SPIN_MAX_COLLISIONS 16
+#if !defined(BOOST_FIBERS_CONTENTION_WINDOW_THRESHOLD)
+# define BOOST_FIBERS_CONTENTION_WINDOW_THRESHOLD 16
#endif
-#if !defined(BOOST_FIBERS_SPIN_MAX_TESTS)
-# define BOOST_FIBERS_SPIN_MAX_TESTS 500
+#if !defined(BOOST_FIBERS_RETRY_THRESHOLD)
+# define BOOST_FIBERS_RETRY_THRESHOLD 64
#endif
-// modern architectures have cachelines with 64byte length
-// ARM Cortex-A15 32/64byte, Cortex-A9 16/32/64bytes
-// MIPS 74K: 32byte, 4KEc: 16byte
-// ist shoudl be safe to use 64byte for all
-static constexpr std::size_t cache_alignment{ 64 };
-static constexpr std::size_t cacheline_length{ 64 };
+#if !defined(BOOST_FIBERS_SPIN_BEFORE_SLEEP0)
+# define BOOST_FIBERS_SPIN_BEFORE_SLEEP0 32
+#endif
+
+#if !defined(BOOST_FIBERS_SPIN_BEFORE_YIELD)
+# define BOOST_FIBERS_SPIN_BEFORE_YIELD 64
+#endif
#endif // BOOST_FIBERS_DETAIL_CONFIG_H
diff --git a/boost/fiber/detail/context_spinlock_queue.hpp b/boost/fiber/detail/context_spinlock_queue.hpp
index e0ebdabda6..f58fbd2296 100644
--- a/boost/fiber/detail/context_spinlock_queue.hpp
+++ b/boost/fiber/detail/context_spinlock_queue.hpp
@@ -30,7 +30,7 @@ class context_spinlock_queue {
private:
typedef context * slot_type;
- alignas(cache_alignment) mutable spinlock splk_{};
+ mutable spinlock splk_{};
std::size_t pidx_{ 0 };
std::size_t cidx_{ 0 };
std::size_t capacity_;
diff --git a/boost/fiber/detail/context_spmc_queue.hpp b/boost/fiber/detail/context_spmc_queue.hpp
index 27256233cf..89f93044f9 100644
--- a/boost/fiber/detail/context_spmc_queue.hpp
+++ b/boost/fiber/detail/context_spmc_queue.hpp
@@ -44,9 +44,7 @@ private:
class array {
private:
typedef std::atomic< context * > atomic_type;
- typedef std::aligned_storage<
- sizeof( atomic_type), cache_alignment
- >::type storage_type;
+ typedef atomic_type storage_type;
std::size_t capacity_;
storage_type * storage_;
@@ -92,9 +90,9 @@ private:
}
};
- alignas(cache_alignment) std::atomic< std::size_t > top_{ 0 };
- alignas(cache_alignment) std::atomic< std::size_t > bottom_{ 0 };
- alignas(cache_alignment) std::atomic< array * > array_;
+ std::atomic< std::size_t > top_{ 0 };
+ std::atomic< std::size_t > bottom_{ 0 };
+ std::atomic< array * > array_;
std::vector< array * > old_arrays_{};
char padding_[cacheline_length];
diff --git a/boost/fiber/detail/convert.hpp b/boost/fiber/detail/convert.hpp
index ac190d8528..ba3bbbd0aa 100644
--- a/boost/fiber/detail/convert.hpp
+++ b/boost/fiber/detail/convert.hpp
@@ -34,22 +34,6 @@ std::chrono::steady_clock::time_point convert(
return std::chrono::steady_clock::now() + ( timeout_time - Clock::now() );
}
-// suggested by Howard Hinnant
-template< typename T >
-inline
-T * convert( T * p) noexcept {
- return p;
-}
-
-template< typename Pointer >
-inline
-typename std::pointer_traits< Pointer >::element_type *
-convert( Pointer p) noexcept {
- return nullptr != p
- ? to_raw_pointer( p.operator->() )
- : nullptr;
-}
-
}}}
#ifdef BOOST_HAS_ABI_HEADERS
diff --git a/boost/fiber/detail/cpu_relax.hpp b/boost/fiber/detail/cpu_relax.hpp
index 541b46dfd0..8a20aae059 100644
--- a/boost/fiber/detail/cpu_relax.hpp
+++ b/boost/fiber/detail/cpu_relax.hpp
@@ -16,7 +16,7 @@
#include <boost/fiber/detail/config.hpp>
#if BOOST_COMP_MSVC || BOOST_COMP_MSVC_EMULATED
-# include <Windows.h>
+# include <windows.h>
#endif
#ifdef BOOST_HAS_ABI_HEADERS
diff --git a/boost/fiber/detail/data.hpp b/boost/fiber/detail/data.hpp
index e2b119ec3e..c363817a09 100644
--- a/boost/fiber/detail/data.hpp
+++ b/boost/fiber/detail/data.hpp
@@ -23,22 +23,6 @@ class context;
namespace detail {
-#if (BOOST_EXECUTION_CONTEXT==1)
-struct data_t {
- spinlock_lock * lk{ nullptr };
- context * ctx{ nullptr };
-
- data_t() = default;
-
- explicit data_t( spinlock_lock * lk_) noexcept :
- lk{ lk_ } {
- }
-
- explicit data_t( context * ctx_) noexcept :
- ctx{ ctx_ } {
- }
-};
-#else
struct data_t {
spinlock_lock * lk{ nullptr };
context * ctx{ nullptr };
@@ -60,7 +44,6 @@ struct data_t {
from{ from_ } {
}
};
-#endif
}}}
diff --git a/boost/fiber/detail/futex.hpp b/boost/fiber/detail/futex.hpp
index d383dc4077..e64bd5990d 100644
--- a/boost/fiber/detail/futex.hpp
+++ b/boost/fiber/detail/futex.hpp
@@ -18,7 +18,7 @@ extern "C" {
#include <sys/syscall.h>
}
#elif BOOST_OS_WINDOWS
-#include <Windows.h>
+#include <windows.h>
#endif
namespace boost {
@@ -26,28 +26,28 @@ namespace fibers {
namespace detail {
#if BOOST_OS_LINUX
-inline
+BOOST_FORCEINLINE
int sys_futex( void * addr, std::int32_t op, std::int32_t x) {
return ::syscall( SYS_futex, addr, op, x, nullptr, nullptr, 0);
}
-inline
+BOOST_FORCEINLINE
int futex_wake( std::atomic< std::int32_t > * addr) {
return 0 <= sys_futex( static_cast< void * >( addr), FUTEX_WAKE_PRIVATE, 1) ? 0 : -1;
}
-inline
+BOOST_FORCEINLINE
int futex_wait( std::atomic< std::int32_t > * addr, std::int32_t x) {
return 0 <= sys_futex( static_cast< void * >( addr), FUTEX_WAIT_PRIVATE, x) ? 0 : -1;
}
#elif BOOST_OS_WINDOWS
-inline
+BOOST_FORCEINLINE
int futex_wake( std::atomic< std::int32_t > * addr) {
::WakeByAddressSingle( static_cast< void * >( addr) );
return 0;
}
-inline
+BOOST_FORCEINLINE
int futex_wait( std::atomic< std::int32_t > * addr, std::int32_t x) {
::WaitOnAddress( static_cast< volatile void * >( addr), & x, sizeof( x), INFINITE);
return 0;
diff --git a/boost/fiber/detail/rtm.hpp b/boost/fiber/detail/rtm.hpp
new file mode 100644
index 0000000000..5188b0d216
--- /dev/null
+++ b/boost/fiber/detail/rtm.hpp
@@ -0,0 +1,94 @@
+
+// Copyright Oliver Kowalke 2017.
+// Distributed under the Boost Software License, Version 1.0.
+// (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+//
+
+#ifndef BOOST_FIBER_DETAIL_RTM_H
+#define BOOST_FIBER_DETAIL_RTM_H
+
+#include <cstdint>
+
+#include <boost/assert.hpp>
+#include <boost/config.hpp>
+
+#include <boost/fiber/detail/config.hpp>
+
+#ifdef BOOST_HAS_ABI_HEADERS
+# include BOOST_ABI_PREFIX
+#endif
+
+namespace boost {
+namespace fibers {
+namespace detail {
+
+struct rtm_status {
+ enum {
+ none = 0,
+ explicit_abort = 1 << 0,
+ may_retry = 1 << 1,
+ memory_conflict = 1 << 2,
+ buffer_overflow = 1 << 3,
+ debug_hit = 1 << 4,
+ nested_abort = 1 << 5
+ };
+
+ static constexpr std::uint32_t success = ~std::uint32_t{ 0 };
+};
+
+static BOOST_FORCEINLINE
+std::uint32_t rtm_begin() noexcept {
+ std::uint32_t result = rtm_status::success;
+ __asm__ __volatile__
+ (
+ ".byte 0xc7,0xf8 ; .long 0"
+ : "+a" (result)
+ :
+ : "memory"
+ );
+ return result;
+}
+
+static BOOST_FORCEINLINE
+void rtm_end() noexcept {
+ __asm__ __volatile__
+ (
+ ".byte 0x0f,0x01,0xd5"
+ :
+ :
+ : "memory"
+ );
+}
+
+static BOOST_FORCEINLINE
+void rtm_abort_lock_not_free() noexcept {
+ __asm__ __volatile__
+ (
+ ".byte 0xc6,0xf8,0xff"
+ :
+ :
+ : "memory"
+ );
+}
+
+static BOOST_FORCEINLINE
+bool rtm_test() noexcept {
+ bool result;
+ __asm__ __volatile__
+ (
+ ".byte 0x0f,0x01,0xd6; setz %0"
+ : "=q" (result)
+ :
+ : "memory"
+ );
+ return result;
+}
+
+}}}
+
+#ifdef BOOST_HAS_ABI_HEADERS
+# include BOOST_ABI_SUFFIX
+#endif
+
+#endif // BOOST_FIBER_DETAIL_RTM_H
diff --git a/boost/fiber/detail/spinlock.hpp b/boost/fiber/detail/spinlock.hpp
index 89a6d51a6f..59d2a5cd2b 100644
--- a/boost/fiber/detail/spinlock.hpp
+++ b/boost/fiber/detail/spinlock.hpp
@@ -13,11 +13,14 @@
#if !defined(BOOST_FIBERS_NO_ATOMICS)
# include <mutex>
-# include <boost/fiber/detail/spinlock_ttas.hpp>
# include <boost/fiber/detail/spinlock_ttas_adaptive.hpp>
+# include <boost/fiber/detail/spinlock_ttas.hpp>
# if defined(BOOST_FIBERS_HAS_FUTEX)
-# include <boost/fiber/detail/spinlock_ttas_futex.hpp>
# include <boost/fiber/detail/spinlock_ttas_adaptive_futex.hpp>
+# include <boost/fiber/detail/spinlock_ttas_futex.hpp>
+# endif
+# if defined(BOOST_USE_TSX)
+# include <boost/fiber/detail/spinlock_rtm.hpp>
# endif
#endif
@@ -29,7 +32,7 @@ namespace boost {
namespace fibers {
namespace detail {
-#if defined(BOOST_FIBERS_NO_ATOMICS)
+#if defined(BOOST_FIBERS_NO_ATOMICS)
struct spinlock {
constexpr spinlock() noexcept {}
void lock() noexcept {}
@@ -42,16 +45,32 @@ struct spinlock_lock {
void unlock() noexcept {}
};
#else
-# if defined(BOOST_FIBERS_SPINLOCK_STD_MUTEX)
+# if defined(BOOST_FIBERS_SPINLOCK_STD_MUTEX)
using spinlock = std::mutex;
# elif defined(BOOST_FIBERS_SPINLOCK_TTAS_FUTEX)
+# if defined(BOOST_USE_TSX)
+using spinlock = spinlock_rtm< spinlock_ttas_futex >;
+# else
using spinlock = spinlock_ttas_futex;
+# endif
# elif defined(BOOST_FIBERS_SPINLOCK_TTAS_ADAPTIVE_FUTEX)
+# if defined(BOOST_USE_TSX)
+using spinlock = spinlock_rtm< spinlock_ttas_adaptive_futex >;
+# else
using spinlock = spinlock_ttas_adaptive_futex;
-# elif defined(BOOST_FIBERS_SPINLOCK_TTAS_ADAPTIVE)
+# endif
+# elif defined(BOOST_FIBERS_SPINLOCK_TTAS_ADAPTIVE)
+# if defined(BOOST_USE_TSX)
+using spinlock = spinlock_rtm< spinlock_ttas_adaptive >;
+# else
using spinlock = spinlock_ttas_adaptive;
+# endif
# else
+# if defined(BOOST_USE_TSX)
+using spinlock = spinlock_rtm< spinlock_ttas >;
+# else
using spinlock = spinlock_ttas;
+# endif
# endif
using spinlock_lock = std::unique_lock< spinlock >;
#endif
diff --git a/boost/fiber/detail/spinlock_rtm.hpp b/boost/fiber/detail/spinlock_rtm.hpp
new file mode 100644
index 0000000000..5cc4a5e9af
--- /dev/null
+++ b/boost/fiber/detail/spinlock_rtm.hpp
@@ -0,0 +1,126 @@
+
+// Copyright Oliver Kowalke 2017.
+// Distributed under the Boost Software License, Version 1.0.
+// (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef BOOST_FIBERS_SPINLOCK_RTM_H
+#define BOOST_FIBERS_SPINLOCK_RTM_H
+
+#include <atomic>
+#include <chrono>
+#include <cmath>
+#include <random>
+#include <thread>
+
+#include <boost/fiber/detail/config.hpp>
+#include <boost/fiber/detail/cpu_relax.hpp>
+#include <boost/fiber/detail/rtm.hpp>
+#include <boost/fiber/detail/spinlock_status.hpp>
+
+namespace boost {
+namespace fibers {
+namespace detail {
+
+template< typename FBSplk >
+class spinlock_rtm {
+private:
+ FBSplk splk_{};
+
+public:
+ spinlock_rtm() = default;
+
+ spinlock_rtm( spinlock_rtm const&) = delete;
+ spinlock_rtm & operator=( spinlock_rtm const&) = delete;
+
+ void lock() noexcept {
+ static thread_local std::minstd_rand generator{ std::random_device{}() };
+ std::size_t collisions = 0 ;
+ for ( std::size_t retries = 0; retries < BOOST_FIBERS_RETRY_THRESHOLD; ++retries) {
+ std::uint32_t status;
+ if ( rtm_status::success == ( status = rtm_begin() ) ) {
+ // add lock to read-set
+ if ( spinlock_status::unlocked == splk_.state_.load( std::memory_order_relaxed) ) {
+ // lock is free, enter critical section
+ return;
+ }
+ // lock was acquired by another thread
+ // explicit abort of transaction with abort argument 'lock not free'
+ rtm_abort_lock_not_free();
+ }
+ // transaction aborted
+ if ( rtm_status::none != (status & rtm_status::may_retry) ||
+ rtm_status::none != (status & rtm_status::memory_conflict) ) {
+ // another logical processor conflicted with a memory address that was
+ // part or the read-/write-set
+ if ( BOOST_FIBERS_CONTENTION_WINDOW_THRESHOLD > collisions) {
+ std::uniform_int_distribution< std::size_t > distribution{
+ 0, static_cast< std::size_t >( 1) << (std::min)(collisions, static_cast< std::size_t >( BOOST_FIBERS_CONTENTION_WINDOW_THRESHOLD)) };
+ const std::size_t z = distribution( generator);
+ ++collisions;
+ for ( std::size_t i = 0; i < z; ++i) {
+ cpu_relax();
+ }
+ } else {
+ std::this_thread::yield();
+ }
+ } else if ( rtm_status::none != (status & rtm_status::explicit_abort) &&
+ rtm_status::none == (status & rtm_status::nested_abort) ) {
+ // another logical processor has acquired the lock and
+ // abort was not caused by a nested transaction
+ // wait till lock becomes free again
+ std::size_t count = 0;
+ while ( spinlock_status::locked == splk_.state_.load( std::memory_order_relaxed) ) {
+ if ( BOOST_FIBERS_SPIN_BEFORE_SLEEP0 > count) {
+ ++count;
+ cpu_relax();
+ } else if ( BOOST_FIBERS_SPIN_BEFORE_YIELD > count) {
+ ++count;
+ static constexpr std::chrono::microseconds us0{ 0 };
+ std::this_thread::sleep_for( us0);
+#if 0
+ using namespace std::chrono_literals;
+ std::this_thread::sleep_for( 0ms);
+#endif
+ } else {
+ std::this_thread::yield();
+ }
+ }
+ } else {
+ // transaction aborted due:
+ // - internal buffer to track transactional state overflowed
+ // - debug exception or breakpoint exception was hit
+ // - abort during execution of nested transactions (max nesting limit exceeded)
+ // -> use fallback path
+ break;
+ }
+ }
+ splk_.lock();
+ }
+
+ bool try_lock() noexcept {
+ if ( rtm_status::success != rtm_begin() ) {
+ return false;
+ }
+
+ // add lock to read-set
+ if ( spinlock_status::unlocked != splk_.state_.load( std::memory_order_relaxed) ) {
+ // lock was acquired by another thread
+ // explicit abort of transaction with abort argument 'lock not free'
+ rtm_abort_lock_not_free();
+ }
+ return true;
+ }
+
+ void unlock() noexcept {
+ if ( spinlock_status::unlocked == splk_.state_.load( std::memory_order_acquire) ) {
+ rtm_end();
+ } else {
+ splk_.unlock();
+ }
+ }
+};
+
+}}}
+
+#endif // BOOST_FIBERS_SPINLOCK_RTM_H
diff --git a/boost/fiber/detail/spinlock_status.hpp b/boost/fiber/detail/spinlock_status.hpp
new file mode 100644
index 0000000000..74f09e4acc
--- /dev/null
+++ b/boost/fiber/detail/spinlock_status.hpp
@@ -0,0 +1,21 @@
+
+// Copyright Oliver Kowalke 2017.
+// Distributed under the Boost Software License, Version 1.0.
+// (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef BOOST_FIBERS_SPINLOCK_STATUS_H
+#define BOOST_FIBERS_SPINLOCK_STATUS_H
+
+namespace boost {
+namespace fibers {
+namespace detail {
+
+enum class spinlock_status {
+ locked = 0,
+ unlocked
+};
+
+}}}
+
+#endif // BOOST_FIBERS_SPINLOCK_STATUS_H
diff --git a/boost/fiber/detail/spinlock_ttas.hpp b/boost/fiber/detail/spinlock_ttas.hpp
index 380773ad6d..f3302ed17e 100644
--- a/boost/fiber/detail/spinlock_ttas.hpp
+++ b/boost/fiber/detail/spinlock_ttas.hpp
@@ -9,41 +9,37 @@
#include <atomic>
#include <chrono>
+#include <cmath>
#include <random>
#include <thread>
#include <boost/fiber/detail/config.hpp>
#include <boost/fiber/detail/cpu_relax.hpp>
+#include <boost/fiber/detail/spinlock_status.hpp>
// based on informations from:
// https://software.intel.com/en-us/articles/benefitting-power-and-performance-sleep-loops
// https://software.intel.com/en-us/articles/long-duration-spin-wait-loops-on-hyper-threading-technology-enabled-intel-processors
-#if BOOST_COMP_CLANG
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wunused-private-field"
-#endif
-
namespace boost {
namespace fibers {
namespace detail {
class spinlock_ttas {
private:
- enum class spinlock_status {
- locked = 0,
- unlocked
- };
+ template< typename FBSplk >
+ friend class spinlock_rtm;
- std::atomic< spinlock_status > state_{ spinlock_status::unlocked };
+ std::atomic< spinlock_status > state_{ spinlock_status::unlocked };
public:
- spinlock_ttas() noexcept = default;
+ spinlock_ttas() = default;
spinlock_ttas( spinlock_ttas const&) = delete;
spinlock_ttas & operator=( spinlock_ttas const&) = delete;
void lock() noexcept {
+ static thread_local std::minstd_rand generator{ std::random_device{}() };
std::size_t collisions = 0 ;
for (;;) {
// avoid using multiple pause instructions for a delay of a specific cycle count
@@ -51,7 +47,7 @@ public:
// the cycle count can not guaranteed from one system to the next
// -> check the shared variable 'state_' in between each cpu_relax() to prevent
// unnecessarily long delays on some systems
- std::size_t tests = 0;
+ std::size_t retries = 0;
// test shared variable 'status_'
// first access to 'state_' -> chache miss
// sucessive acccess to 'state_' -> cache hit
@@ -59,21 +55,26 @@ public:
// cached 'state_' is invalidated -> cache miss
while ( spinlock_status::locked == state_.load( std::memory_order_relaxed) ) {
#if !defined(BOOST_FIBERS_SPIN_SINGLE_CORE)
- if ( BOOST_FIBERS_SPIN_MAX_TESTS > tests) {
- ++tests;
+ if ( BOOST_FIBERS_SPIN_BEFORE_SLEEP0 > retries) {
+ ++retries;
// give CPU a hint that this thread is in a "spin-wait" loop
// delays the next instruction's execution for a finite period of time (depends on processor family)
// the CPU is not under demand, parts of the pipeline are no longer being used
// -> reduces the power consumed by the CPU
// -> prevent pipeline stalls
cpu_relax();
- } else {
+ } else if ( BOOST_FIBERS_SPIN_BEFORE_YIELD > retries) {
// std::this_thread::sleep_for( 0us) has a fairly long instruction path length,
// combined with an expensive ring3 to ring 0 transition costing about 1000 cycles
// std::this_thread::sleep_for( 0us) lets give up this_thread the remaining part of its time slice
// if and only if a thread of equal or greater priority is ready to run
static constexpr std::chrono::microseconds us0{ 0 };
std::this_thread::sleep_for( us0);
+ } else {
+ // std::this_thread::yield() allows this_thread to give up the remaining part of its time slice,
+ // but only to another thread on the same processor
+ // instead of constant checking, a thread only checks if no other useful work is pending
+ std::this_thread::yield();
}
#else
std::this_thread::yield();
@@ -85,8 +86,8 @@ public:
// spinlock now contended
// utilize 'Binary Exponential Backoff' algorithm
// linear_congruential_engine is a random number engine based on Linear congruential generator (LCG)
- static thread_local std::minstd_rand generator;
- static std::uniform_int_distribution< std::size_t > distribution{ 0, static_cast< std::size_t >( 1) << collisions };
+ std::uniform_int_distribution< std::size_t > distribution{
+ 0, static_cast< std::size_t >( 1) << (std::min)(collisions, static_cast< std::size_t >( BOOST_FIBERS_CONTENTION_WINDOW_THRESHOLD)) };
const std::size_t z = distribution( generator);
++collisions;
for ( std::size_t i = 0; i < z; ++i) {
@@ -101,6 +102,10 @@ public:
}
}
+ bool try_lock() noexcept {
+ return spinlock_status::unlocked == state_.exchange( spinlock_status::locked, std::memory_order_acquire);
+ }
+
void unlock() noexcept {
state_.store( spinlock_status::unlocked, std::memory_order_release);
}
@@ -108,8 +113,4 @@ public:
}}}
-#if BOOST_COMP_CLANG
-#pragma clang diagnostic pop
-#endif
-
#endif // BOOST_FIBERS_SPINLOCK_TTAS_H
diff --git a/boost/fiber/detail/spinlock_ttas_adaptive.hpp b/boost/fiber/detail/spinlock_ttas_adaptive.hpp
index da044b6298..d1f8b73cf3 100644
--- a/boost/fiber/detail/spinlock_ttas_adaptive.hpp
+++ b/boost/fiber/detail/spinlock_ttas_adaptive.hpp
@@ -15,6 +15,7 @@
#include <boost/fiber/detail/config.hpp>
#include <boost/fiber/detail/cpu_relax.hpp>
+#include <boost/fiber/detail/spinlock_status.hpp>
// based on informations from:
// https://software.intel.com/en-us/articles/benefitting-power-and-performance-sleep-loops
@@ -26,26 +27,28 @@ namespace detail {
class spinlock_ttas_adaptive {
private:
- enum class spinlock_status {
- locked = 0,
- unlocked
- };
+ template< typename FBSplk >
+ friend class spinlock_rtm;
- std::atomic< spinlock_status > state_{ spinlock_status::unlocked };
- std::atomic< std::size_t > tests_{ 0 };
+ std::atomic< spinlock_status > state_{ spinlock_status::unlocked };
+ std::atomic< std::size_t > retries_{ 0 };
public:
- spinlock_ttas_adaptive() noexcept = default;
+ spinlock_ttas_adaptive() = default;
spinlock_ttas_adaptive( spinlock_ttas_adaptive const&) = delete;
spinlock_ttas_adaptive & operator=( spinlock_ttas_adaptive const&) = delete;
void lock() noexcept {
+ static thread_local std::minstd_rand generator{ std::random_device{}() };
std::size_t collisions = 0 ;
for (;;) {
- std::size_t tests = 0;
- const std::size_t prev_tests = tests_.load( std::memory_order_relaxed);
- const std::size_t max_tests = (std::min)( static_cast< std::size_t >( BOOST_FIBERS_SPIN_MAX_TESTS), 2 * prev_tests + 10);
+ std::size_t retries = 0;
+ const std::size_t prev_retries = retries_.load( std::memory_order_relaxed);
+ const std::size_t max_relax_retries = (std::min)(
+ static_cast< std::size_t >( BOOST_FIBERS_SPIN_BEFORE_SLEEP0), 2 * prev_retries + 10);
+ const std::size_t max_sleep_retries = (std::min)(
+ static_cast< std::size_t >( BOOST_FIBERS_SPIN_BEFORE_YIELD), 2 * prev_retries + 10);
// avoid using multiple pause instructions for a delay of a specific cycle count
// the delay of cpu_relax() (pause on Intel) depends on the processor family
// the cycle count can not guaranteed from one system to the next
@@ -58,22 +61,27 @@ public:
// cached 'state_' is invalidated -> cache miss
while ( spinlock_status::locked == state_.load( std::memory_order_relaxed) ) {
#if !defined(BOOST_FIBERS_SPIN_SINGLE_CORE)
- if ( max_tests > tests) {
- ++tests;
+ if ( max_relax_retries > retries) {
+ ++retries;
// give CPU a hint that this thread is in a "spin-wait" loop
// delays the next instruction's execution for a finite period of time (depends on processor family)
// the CPU is not under demand, parts of the pipeline are no longer being used
// -> reduces the power consumed by the CPU
// -> prevent pipeline stalls
cpu_relax();
- } else {
- ++tests;
+ } else if ( max_sleep_retries > retries) {
+ ++retries;
// std::this_thread::sleep_for( 0us) has a fairly long instruction path length,
// combined with an expensive ring3 to ring 0 transition costing about 1000 cycles
// std::this_thread::sleep_for( 0us) lets give up this_thread the remaining part of its time slice
// if and only if a thread of equal or greater priority is ready to run
static constexpr std::chrono::microseconds us0{ 0 };
std::this_thread::sleep_for( us0);
+ } else {
+ // std::this_thread::yield() allows this_thread to give up the remaining part of its time slice,
+ // but only to another thread on the same processor
+ // instead of constant checking, a thread only checks if no other useful work is pending
+ std::this_thread::yield();
}
#else
std::this_thread::yield();
@@ -85,8 +93,8 @@ public:
// spinlock now contended
// utilize 'Binary Exponential Backoff' algorithm
// linear_congruential_engine is a random number engine based on Linear congruential generator (LCG)
- static thread_local std::minstd_rand generator;
- static std::uniform_int_distribution< std::size_t > distribution{ 0, static_cast< std::size_t >( 1) << collisions };
+ std::uniform_int_distribution< std::size_t > distribution{
+ 0, static_cast< std::size_t >( 1) << (std::min)(collisions, static_cast< std::size_t >( BOOST_FIBERS_CONTENTION_WINDOW_THRESHOLD)) };
const std::size_t z = distribution( generator);
++collisions;
for ( std::size_t i = 0; i < z; ++i) {
@@ -95,13 +103,17 @@ public:
cpu_relax();
}
} else {
- tests_.store( prev_tests + (tests - prev_tests) / 8, std::memory_order_relaxed);
+ retries_.store( prev_retries + (retries - prev_retries) / 8, std::memory_order_relaxed);
// success, thread has acquired the lock
break;
}
}
}
+ bool try_lock() noexcept {
+ return spinlock_status::unlocked == state_.exchange( spinlock_status::locked, std::memory_order_acquire);
+ }
+
void unlock() noexcept {
state_.store( spinlock_status::unlocked, std::memory_order_release);
}
diff --git a/boost/fiber/detail/spinlock_ttas_adaptive_futex.hpp b/boost/fiber/detail/spinlock_ttas_adaptive_futex.hpp
index 61ab47691e..0f0b191e67 100644
--- a/boost/fiber/detail/spinlock_ttas_adaptive_futex.hpp
+++ b/boost/fiber/detail/spinlock_ttas_adaptive_futex.hpp
@@ -26,21 +26,28 @@ namespace detail {
class spinlock_ttas_adaptive_futex {
private:
- std::atomic< std::int32_t > value_{ 0 };
- std::atomic< std::int32_t > tests_{ 0 };
+ template< typename FBSplk >
+ friend class spinlock_rtm;
+
+ std::atomic< std::int32_t > value_{ 0 };
+ std::atomic< std::int32_t > retries_{ 0 };
public:
- spinlock_ttas_adaptive_futex() noexcept = default;
+ spinlock_ttas_adaptive_futex() = default;
spinlock_ttas_adaptive_futex( spinlock_ttas_adaptive_futex const&) = delete;
spinlock_ttas_adaptive_futex & operator=( spinlock_ttas_adaptive_futex const&) = delete;
void lock() noexcept {
- std::int32_t collisions = 0, tests = 0, expected = 0;
- const std::int32_t prev_tests = tests_.load( std::memory_order_relaxed);
- const std::int32_t max_tests = (std::min)( static_cast< std::int32_t >( BOOST_FIBERS_SPIN_MAX_TESTS), 2 * prev_tests + 10);
+ static thread_local std::minstd_rand generator{ std::random_device{}() };
+ std::int32_t collisions = 0, retries = 0, expected = 0;
+ const std::int32_t prev_retries = retries_.load( std::memory_order_relaxed);
+ const std::int32_t max_relax_retries = (std::min)(
+ static_cast< std::int32_t >( BOOST_FIBERS_SPIN_BEFORE_SLEEP0), 2 * prev_retries + 10);
+ const std::int32_t max_sleep_retries = (std::min)(
+ static_cast< std::int32_t >( BOOST_FIBERS_SPIN_BEFORE_YIELD), 2 * prev_retries + 10);
// after max. spins or collisions suspend via futex
- while ( max_tests > tests && BOOST_FIBERS_SPIN_MAX_COLLISIONS > collisions) {
+ while ( retries++ < BOOST_FIBERS_RETRY_THRESHOLD) {
// avoid using multiple pause instructions for a delay of a specific cycle count
// the delay of cpu_relax() (pause on Intel) depends on the processor family
// the cycle count can not guaranteed from one system to the next
@@ -52,26 +59,39 @@ public:
// if 'value_' was released by other fiber
// cached 'value_' is invalidated -> cache miss
if ( 0 != ( expected = value_.load( std::memory_order_relaxed) ) ) {
- ++tests;
#if !defined(BOOST_FIBERS_SPIN_SINGLE_CORE)
- // give CPU a hint that this thread is in a "spin-wait" loop
- // delays the next instruction's execution for a finite period of time (depends on processor family)
- // the CPU is not under demand, parts of the pipeline are no longer being used
- // -> reduces the power consumed by the CPU
- // -> prevent pipeline stalls
- cpu_relax();
+ if ( max_relax_retries > retries) {
+ // give CPU a hint that this thread is in a "spin-wait" loop
+ // delays the next instruction's execution for a finite period of time (depends on processor family)
+ // the CPU is not under demand, parts of the pipeline are no longer being used
+ // -> reduces the power consumed by the CPU
+ // -> prevent pipeline stalls
+ cpu_relax();
+ } else if ( max_sleep_retries > retries) {
+ // std::this_thread::sleep_for( 0us) has a fairly long instruction path length,
+ // combined with an expensive ring3 to ring 0 transition costing about 1000 cycles
+ // std::this_thread::sleep_for( 0us) lets give up this_thread the remaining part of its time slice
+ // if and only if a thread of equal or greater priority is ready to run
+ static constexpr std::chrono::microseconds us0{ 0 };
+ std::this_thread::sleep_for( us0);
+ } else {
+ // std::this_thread::yield() allows this_thread to give up the remaining part of its time slice,
+ // but only to another thread on the same processor
+ // instead of constant checking, a thread only checks if no other useful work is pending
+ std::this_thread::yield();
+ }
#else
// std::this_thread::yield() allows this_thread to give up the remaining part of its time slice,
// but only to another thread on the same processor
// instead of constant checking, a thread only checks if no other useful work is pending
std::this_thread::yield();
#endif
- } else if ( ! value_.compare_exchange_strong( expected, 1, std::memory_order_acquire, std::memory_order_release) ) {
+ } else if ( ! value_.compare_exchange_strong( expected, 1, std::memory_order_acquire) ) {
// spinlock now contended
// utilize 'Binary Exponential Backoff' algorithm
// linear_congruential_engine is a random number engine based on Linear congruential generator (LCG)
- static thread_local std::minstd_rand generator;
- static std::uniform_int_distribution< std::int32_t > distribution{ 0, static_cast< std::int32_t >( 1) << collisions };
+ std::uniform_int_distribution< std::int32_t > distribution{
+ 0, static_cast< std::int32_t >( 1) << (std::min)(collisions, static_cast< std::int32_t >( BOOST_FIBERS_CONTENTION_WINDOW_THRESHOLD)) };
const std::int32_t z = distribution( generator);
++collisions;
for ( std::int32_t i = 0; i < z; ++i) {
@@ -81,7 +101,7 @@ public:
}
} else {
// success, lock acquired
- tests_.store( prev_tests + (tests - prev_tests) / 8, std::memory_order_relaxed);
+ retries_.store( prev_retries + (retries - prev_retries) / 8, std::memory_order_relaxed);
return;
}
}
@@ -95,7 +115,12 @@ public:
expected = value_.exchange( 2, std::memory_order_acquire);
}
// success, lock acquired
- tests_.store( prev_tests + (tests - prev_tests) / 8, std::memory_order_relaxed);
+ retries_.store( prev_retries + (retries - prev_retries) / 8, std::memory_order_relaxed);
+ }
+
+ bool try_lock() noexcept {
+ std::int32_t expected = 0;
+ return value_.compare_exchange_strong( expected, 1, std::memory_order_acquire);
}
void unlock() noexcept {
diff --git a/boost/fiber/detail/spinlock_ttas_futex.hpp b/boost/fiber/detail/spinlock_ttas_futex.hpp
index a427b73ba5..fd30c4120e 100644
--- a/boost/fiber/detail/spinlock_ttas_futex.hpp
+++ b/boost/fiber/detail/spinlock_ttas_futex.hpp
@@ -8,6 +8,7 @@
#define BOOST_FIBERS_spinlock_ttas_futex_FUTEX_H
#include <atomic>
+#include <cmath>
#include <random>
#include <thread>
@@ -25,18 +26,22 @@ namespace detail {
class spinlock_ttas_futex {
private:
- std::atomic< std::int32_t > value_{ 0 };
+ template< typename FBSplk >
+ friend class spinlock_rtm;
+
+ std::atomic< std::int32_t > value_{ 0 };
public:
- spinlock_ttas_futex() noexcept = default;
+ spinlock_ttas_futex() = default;
spinlock_ttas_futex( spinlock_ttas_futex const&) = delete;
spinlock_ttas_futex & operator=( spinlock_ttas_futex const&) = delete;
void lock() noexcept {
- std::int32_t collisions = 0, tests = 0, expected = 0;
+ static thread_local std::minstd_rand generator{ std::random_device{}() };
+ std::int32_t collisions = 0, retries = 0, expected = 0;
// after max. spins or collisions suspend via futex
- while ( BOOST_FIBERS_SPIN_MAX_TESTS > tests && BOOST_FIBERS_SPIN_MAX_COLLISIONS > collisions) {
+ while ( retries++ < BOOST_FIBERS_RETRY_THRESHOLD) {
// avoid using multiple pause instructions for a delay of a specific cycle count
// the delay of cpu_relax() (pause on Intel) depends on the processor family
// the cycle count can not guaranteed from one system to the next
@@ -48,26 +53,39 @@ public:
// if 'value_' was released by other fiber
// cached 'value_' is invalidated -> cache miss
if ( 0 != ( expected = value_.load( std::memory_order_relaxed) ) ) {
- ++tests;
#if !defined(BOOST_FIBERS_SPIN_SINGLE_CORE)
- // give CPU a hint that this thread is in a "spin-wait" loop
- // delays the next instruction's execution for a finite period of time (depends on processor family)
- // the CPU is not under demand, parts of the pipeline are no longer being used
- // -> reduces the power consumed by the CPU
- // -> prevent pipeline stalls
- cpu_relax();
+ if ( BOOST_FIBERS_SPIN_BEFORE_SLEEP0 > retries) {
+ // give CPU a hint that this thread is in a "spin-wait" loop
+ // delays the next instruction's execution for a finite period of time (depends on processor family)
+ // the CPU is not under demand, parts of the pipeline are no longer being used
+ // -> reduces the power consumed by the CPU
+ // -> prevent pipeline stalls
+ cpu_relax();
+ } else if ( BOOST_FIBERS_SPIN_BEFORE_YIELD > retries) {
+ // std::this_thread::sleep_for( 0us) has a fairly long instruction path length,
+ // combined with an expensive ring3 to ring 0 transition costing about 1000 cycles
+ // std::this_thread::sleep_for( 0us) lets give up this_thread the remaining part of its time slice
+ // if and only if a thread of equal or greater priority is ready to run
+ static constexpr std::chrono::microseconds us0{ 0 };
+ std::this_thread::sleep_for( us0);
+ } else {
+ // std::this_thread::yield() allows this_thread to give up the remaining part of its time slice,
+ // but only to another thread on the same processor
+ // instead of constant checking, a thread only checks if no other useful work is pending
+ std::this_thread::yield();
+ }
#else
// std::this_thread::yield() allows this_thread to give up the remaining part of its time slice,
// but only to another thread on the same processor
// instead of constant checking, a thread only checks if no other useful work is pending
std::this_thread::yield();
#endif
- } else if ( ! value_.compare_exchange_strong( expected, 1, std::memory_order_acquire, std::memory_order_release) ) {
+ } else if ( ! value_.compare_exchange_strong( expected, 1, std::memory_order_acquire) ) {
// spinlock now contended
// utilize 'Binary Exponential Backoff' algorithm
// linear_congruential_engine is a random number engine based on Linear congruential generator (LCG)
- static thread_local std::minstd_rand generator;
- static std::uniform_int_distribution< std::int32_t > distribution{ 0, static_cast< std::int32_t >( 1) << collisions };
+ std::uniform_int_distribution< std::int32_t > distribution{
+ 0, static_cast< std::int32_t >( 1) << (std::min)(collisions, static_cast< std::int32_t >( BOOST_FIBERS_CONTENTION_WINDOW_THRESHOLD)) };
const std::int32_t z = distribution( generator);
++collisions;
for ( std::int32_t i = 0; i < z; ++i) {
@@ -91,6 +109,11 @@ public:
}
}
+ bool try_lock() noexcept {
+ std::int32_t expected = 0;
+ return value_.compare_exchange_strong( expected, 1, std::memory_order_acquire);
+ }
+
void unlock() noexcept {
if ( 1 != value_.fetch_sub( 1, std::memory_order_acquire) ) {
value_.store( 0, std::memory_order_release);
diff --git a/boost/fiber/detail/wrap.hpp b/boost/fiber/detail/wrap.hpp
deleted file mode 100644
index 558de6bd94..0000000000
--- a/boost/fiber/detail/wrap.hpp
+++ /dev/null
@@ -1,131 +0,0 @@
-
-// Copyright Oliver Kowalke 2014.
-// Distributed under the Boost Software License, Version 1.0.
-// (See accompanying file LICENSE_1_0.txt or copy at
-// http://www.boost.org/LICENSE_1_0.txt)
-
-#ifndef BOOST_FIBER_DETAIL_WRAP_H
-#define BOOST_FIBER_DETAIL_WRAP_H
-
-#include <type_traits>
-
-#include <boost/config.hpp>
-#if defined(BOOST_NO_CXX17_STD_INVOKE)
-#include <boost/context/detail/invoke.hpp>
-#endif
-#if (BOOST_EXECUTION_CONTEXT==1)
-# include <boost/context/execution_context.hpp>
-#else
-# include <boost/context/continuation.hpp>
-#endif
-
-#include <boost/fiber/detail/config.hpp>
-#include <boost/fiber/detail/data.hpp>
-
-#ifdef BOOST_HAS_ABI_HEADERS
-# include BOOST_ABI_PREFIX
-#endif
-
-namespace boost {
-namespace fibers {
-namespace detail {
-
-#if (BOOST_EXECUTION_CONTEXT==1)
-template< typename Fn1, typename Fn2, typename Tpl >
-class wrapper {
-private:
- typename std::decay< Fn1 >::type fn1_;
- typename std::decay< Fn2 >::type fn2_;
- typename std::decay< Tpl >::type tpl_;
- boost::context::execution_context ctx_;
-
-public:
- wrapper( Fn1 && fn1, Fn2 && fn2, Tpl && tpl,
- boost::context::execution_context const& ctx) :
- fn1_{ std::move( fn1) },
- fn2_{ std::move( fn2) },
- tpl_{ std::move( tpl) },
- ctx_{ ctx } {
- }
-
- wrapper( wrapper const&) = delete;
- wrapper & operator=( wrapper const&) = delete;
-
- wrapper( wrapper && other) = default;
- wrapper & operator=( wrapper && other) = default;
-
- void operator()( void * vp) {
-#if defined(BOOST_NO_CXX17_STD_INVOKE)
- boost::context::detail::invoke( std::move( fn1_), fn2_, tpl_, ctx_, vp);
-#else
- std::invoke( std::move( fn1_), fn2_, tpl_, ctx_, vp);
-#endif
- }
-};
-
-template< typename Fn1, typename Fn2, typename Tpl >
-wrapper< Fn1, Fn2, Tpl >
-wrap( Fn1 && fn1, Fn2 && fn2, Tpl && tpl,
- boost::context::execution_context const& ctx) {
- return wrapper< Fn1, Fn2, Tpl >{
- std::forward< Fn1 >( fn1),
- std::forward< Fn2 >( fn2),
- std::forward< Tpl >( tpl),
- ctx };
-}
-#else
-template< typename Fn1, typename Fn2, typename Tpl >
-class wrapper {
-private:
- typename std::decay< Fn1 >::type fn1_;
- typename std::decay< Fn2 >::type fn2_;
- typename std::decay< Tpl >::type tpl_;
-
-public:
- wrapper( Fn1 && fn1, Fn2 && fn2, Tpl && tpl) :
- fn1_{ std::move( fn1) },
- fn2_{ std::move( fn2) },
- tpl_{ std::move( tpl) } {
- }
-
- wrapper( wrapper const&) = delete;
- wrapper & operator=( wrapper const&) = delete;
-
- wrapper( wrapper && other) = default;
- wrapper & operator=( wrapper && other) = default;
-
- boost::context::continuation
- operator()( boost::context::continuation && c) {
-#if defined(BOOST_NO_CXX17_STD_INVOKE)
- return boost::context::detail::invoke(
- std::move( fn1_),
- fn2_,
- tpl_,
- std::forward< boost::context::continuation >( c) );
-#else
- return std::invoke(
- std::move( fn1_),
- fn2_,
- tpl_,
- std::forward< boost::context::continuation >( c) );
-#endif
- }
-};
-
-template< typename Fn1, typename Fn2, typename Tpl >
-wrapper< Fn1, Fn2, Tpl >
-wrap( Fn1 && fn1, Fn2 && fn2, Tpl && tpl) {
- return wrapper< Fn1, Fn2, Tpl >{
- std::forward< Fn1 >( fn1),
- std::forward< Fn2 >( fn2),
- std::forward< Tpl >( tpl) };
-}
-#endif
-
-}}}
-
-#ifdef BOOST_HAS_ABI_HEADERS
-#include BOOST_ABI_SUFFIX
-#endif
-
-#endif // BOOST_FIBER_DETAIL_WRAP_H