summaryrefslogtreecommitdiff
path: root/boost/fiber/detail
diff options
context:
space:
mode:
Diffstat (limited to 'boost/fiber/detail')
-rw-r--r--boost/fiber/detail/config.hpp27
-rw-r--r--boost/fiber/detail/context_mpsc_queue.hpp98
-rw-r--r--boost/fiber/detail/context_spmc_queue.hpp158
-rw-r--r--boost/fiber/detail/cpu_relax.hpp56
-rw-r--r--boost/fiber/detail/data.hpp2
-rw-r--r--boost/fiber/detail/futex.hpp61
-rw-r--r--boost/fiber/detail/spinlock.hpp49
-rw-r--r--boost/fiber/detail/spinlock_ttas.hpp112
-rw-r--r--boost/fiber/detail/spinlock_ttas_adaptive.hpp117
-rw-r--r--boost/fiber/detail/spinlock_ttas_adaptive_futex.hpp111
-rw-r--r--boost/fiber/detail/spinlock_ttas_futex.hpp104
-rw-r--r--boost/fiber/detail/wrap.hpp7
12 files changed, 886 insertions, 16 deletions
diff --git a/boost/fiber/detail/config.hpp b/boost/fiber/detail/config.hpp
index 92a4c5fc3c..f65d48910d 100644
--- a/boost/fiber/detail/config.hpp
+++ b/boost/fiber/detail/config.hpp
@@ -7,7 +7,10 @@
#ifndef BOOST_FIBERS_DETAIL_CONFIG_H
#define BOOST_FIBERS_DETAIL_CONFIG_H
+#include <cstddef>
+
#include <boost/config.hpp>
+#include <boost/predef.h>
#include <boost/detail/workaround.hpp>
#ifdef BOOST_FIBERS_DECL
@@ -35,4 +38,28 @@
# include <boost/config/auto_link.hpp>
#endif
+#if BOOST_OS_LINUX || BOOST_OS_WINDOWS
+# define BOOST_FIBERS_HAS_FUTEX
+#endif
+
+#if (!defined(BOOST_FIBERS_HAS_FUTEX) && \
+ (defined(BOOST_FIBERS_SPINLOCK_TTAS_FUTEX) || defined(BOOST_FIBERS_SPINLOCK_TTAS_ADAPTIVE_FUTEX)))
+# error "futex not supported on this platform"
+#endif
+
+#if !defined(BOOST_FIBERS_SPIN_MAX_COLLISIONS)
+# define BOOST_FIBERS_SPIN_MAX_COLLISIONS 16
+#endif
+
+#if !defined(BOOST_FIBERS_SPIN_MAX_TESTS)
+# define BOOST_FIBERS_SPIN_MAX_TESTS 100
+#endif
+
+// modern architectures have cachelines with 64byte length
+// ARM Cortex-A15 32/64byte, Cortex-A9 16/32/64bytes
+// MIPS 74K: 32byte, 4KEc: 16byte
+// ist shoudl be safe to use 64byte for all
+static constexpr std::size_t cache_alignment{ 64 };
+static constexpr std::size_t cacheline_length{ 64 };
+
#endif // BOOST_FIBERS_DETAIL_CONFIG_H
diff --git a/boost/fiber/detail/context_mpsc_queue.hpp b/boost/fiber/detail/context_mpsc_queue.hpp
new file mode 100644
index 0000000000..f7e664659c
--- /dev/null
+++ b/boost/fiber/detail/context_mpsc_queue.hpp
@@ -0,0 +1,98 @@
+
+// Copyright Dmitry Vyukov 2010-2011.
+// Copyright Oliver Kowalke 2016.
+// Distributed under the Boost Software License, Version 1.0.
+// (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+//
+// based on Dmitry Vyukov's intrusive MPSC queue
+// http://www.1024cores.net/home/lock-free-algorithms/queues/intrusive-mpsc-node-based-queue
+// https://groups.google.com/forum/#!topic/lock-free/aFHvZhu1G-0
+
+#ifndef BOOST_FIBERS_DETAIL_CONTEXT_MPSC_QUEUE_H
+#define BOOST_FIBERS_DETAIL_CONTEXT_MPSC_QUEUE_H
+
+#include <atomic>
+#include <memory>
+#include <type_traits>
+
+#include <boost/assert.hpp>
+#include <boost/config.hpp>
+
+#include <boost/fiber/context.hpp>
+#include <boost/fiber/detail/config.hpp>
+
+#ifdef BOOST_HAS_ABI_HEADERS
+# include BOOST_ABI_PREFIX
+#endif
+
+namespace boost {
+namespace fibers {
+namespace detail {
+
+// a MPSC queue
+// multiple threads push ready fibers (belonging to local scheduler)
+// (thread) local scheduler pops fibers
+class context_mpsc_queue {
+private:
+ // not default constructor for context - use aligned_storage instead
+ alignas(cache_alignment) std::aligned_storage< sizeof( context), alignof( context) >::type storage_{};
+ context * dummy_;
+ alignas(cache_alignment) std::atomic< context * > head_;
+ alignas(cache_alignment) context * tail_;
+ char pad_[cacheline_length];
+
+public:
+ context_mpsc_queue() :
+ dummy_{ reinterpret_cast< context * >( std::addressof( storage_) ) },
+ head_{ dummy_ },
+ tail_{ dummy_ } {
+ dummy_->remote_nxt_.store( nullptr, std::memory_order_release);
+ }
+
+ context_mpsc_queue( context_mpsc_queue const&) = delete;
+ context_mpsc_queue & operator=( context_mpsc_queue const&) = delete;
+
+ void push( context * ctx) noexcept {
+ BOOST_ASSERT( nullptr != ctx);
+ ctx->remote_nxt_.store( nullptr, std::memory_order_release);
+ context * prev = head_.exchange( ctx, std::memory_order_acq_rel);
+ prev->remote_nxt_.store( ctx, std::memory_order_release);
+ }
+
+ context * pop() noexcept {
+ context * tail = tail_;
+ context * next = tail->remote_nxt_.load( std::memory_order_acquire);
+ if ( dummy_ == tail) {
+ if ( nullptr == next) {
+ return nullptr;
+ }
+ tail_ = next;
+ tail = next;
+ next = next->remote_nxt_.load( std::memory_order_acquire);;
+ }
+ if ( nullptr != next) {
+ tail_ = next;
+ return tail;
+ }
+ context * head = head_.load( std::memory_order_acquire);
+ if ( tail != head) {
+ return nullptr;
+ }
+ push( dummy_);
+ next = tail->remote_nxt_.load( std::memory_order_acquire);
+ if ( nullptr != next) {
+ tail_= next;
+ return tail;
+ }
+ return nullptr;
+ }
+};
+
+}}}
+
+#ifdef BOOST_HAS_ABI_HEADERS
+# include BOOST_ABI_SUFFIX
+#endif
+
+#endif // BOOST_FIBERS_DETAIL_CONTEXT_MPSC_QUEUE_H
diff --git a/boost/fiber/detail/context_spmc_queue.hpp b/boost/fiber/detail/context_spmc_queue.hpp
new file mode 100644
index 0000000000..6449e3658f
--- /dev/null
+++ b/boost/fiber/detail/context_spmc_queue.hpp
@@ -0,0 +1,158 @@
+
+// Copyright Oliver Kowalke 2013.
+// Distributed under the Boost Software License, Version 1.0.
+// (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef BOOST_FIBERS_DETAIL_CONTEXT_SPMC_QUEUE_H
+#define BOOST_FIBERS_DETAIL_CONTEXT_SPMC_QUEUE_H
+
+#include <atomic>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <type_traits>
+#include <utility>
+
+#include <boost/assert.hpp>
+#include <boost/config.hpp>
+
+#include <boost/fiber/detail/config.hpp>
+#include <boost/fiber/context.hpp>
+
+// David Chase and Yossi Lev. Dynamic circular work-stealing deque.
+// In SPAA ’05: Proceedings of the seventeenth annual ACM symposium
+// on Parallelism in algorithms and architectures, pages 21–28,
+// New York, NY, USA, 2005. ACM.
+//
+// Nhat Minh Lê, Antoniu Pop, Albert Cohen, and Francesco Zappa Nardelli. 2013.
+// Correct and efficient work-stealing for weak memory models.
+// In Proceedings of the 18th ACM SIGPLAN symposium on Principles and practice
+// of parallel programming (PPoPP '13). ACM, New York, NY, USA, 69-80.
+
+namespace boost {
+namespace fibers {
+namespace detail {
+
+class context_spmc_queue {
+private:
+ class array {
+ private:
+ typedef std::atomic< context * > atomic_type;
+ typedef std::aligned_storage<
+ sizeof( atomic_type), cache_alignment
+ >::type storage_type;
+
+ std::size_t size_;
+ storage_type * storage_;
+
+ public:
+ array( std::size_t size) :
+ size_{ size },
+ storage_{ new storage_type[size_] } {
+ for ( std::size_t i = 0; i < size_; ++i) {
+ ::new ( static_cast< void * >( std::addressof( storage_[i]) ) ) atomic_type{ nullptr };
+ }
+ }
+
+ ~array() {
+ for ( std::size_t i = 0; i < size_; ++i) {
+ reinterpret_cast< atomic_type * >( std::addressof( storage_[i]) )->~atomic_type();
+ }
+ delete [] storage_;
+ }
+
+ std::size_t size() const noexcept {
+ return size_;
+ }
+
+ void push( std::size_t bottom, context * ctx) noexcept {
+ reinterpret_cast< atomic_type * >(
+ std::addressof( storage_[bottom % size_]) )
+ ->store( ctx, std::memory_order_relaxed);
+ }
+
+ context * pop( std::size_t top) noexcept {
+ return reinterpret_cast< atomic_type * >(
+ std::addressof( storage_[top % size_]) )
+ ->load( std::memory_order_relaxed);
+ }
+
+ array * resize( std::size_t bottom, std::size_t top) {
+ std::unique_ptr< array > tmp{ new array{ 2 * size_ } };
+ for ( std::size_t i = top; i != bottom; ++i) {
+ tmp->push( i, pop( i) );
+ }
+ return tmp.release();
+ }
+ };
+
+ alignas(cache_alignment) std::atomic< std::size_t > top_{ 0 };
+ alignas(cache_alignment) std::atomic< std::size_t > bottom_{ 0 };
+ alignas(cache_alignment) std::atomic< array * > array_;
+ std::vector< array * > old_arrays_{};
+ char padding_[cacheline_length];
+
+public:
+ context_spmc_queue() :
+ array_{ new array{ 1024 } } {
+ old_arrays_.reserve( 32);
+ }
+
+ ~context_spmc_queue() {
+ for ( array * a : old_arrays_) {
+ delete a;
+ }
+ delete array_.load();
+ }
+
+ context_spmc_queue( context_spmc_queue const&) = delete;
+ context_spmc_queue & operator=( context_spmc_queue const&) = delete;
+
+ bool empty() const noexcept {
+ std::size_t bottom{ bottom_.load( std::memory_order_relaxed) };
+ std::size_t top{ top_.load( std::memory_order_relaxed) };
+ return bottom <= top;
+ }
+
+ void push( context * ctx) {
+ std::size_t bottom{ bottom_.load( std::memory_order_relaxed) };
+ std::size_t top{ top_.load( std::memory_order_acquire) };
+ array * a{ array_.load( std::memory_order_relaxed) };
+ if ( (a->size() - 1) < (bottom - top) ) {
+ // queue is full
+ // resize
+ array * tmp{ a->resize( bottom, top) };
+ old_arrays_.push_back( a);
+ std::swap( a, tmp);
+ array_.store( a, std::memory_order_relaxed);
+ }
+ a->push( bottom, ctx);
+ std::atomic_thread_fence( std::memory_order_release);
+ bottom_.store( bottom + 1, std::memory_order_relaxed);
+ }
+
+ context * pop() {
+ std::size_t top{ top_.load( std::memory_order_acquire) };
+ std::atomic_thread_fence( std::memory_order_seq_cst);
+ std::size_t bottom{ bottom_.load( std::memory_order_acquire) };
+ context * ctx{ nullptr };
+ if ( top < bottom) {
+ // queue is not empty
+ array * a{ array_.load( std::memory_order_consume) };
+ ctx = a->pop( top);
+ if ( ctx->is_context( type::pinned_context) ||
+ ! top_.compare_exchange_strong( top, top + 1,
+ std::memory_order_seq_cst,
+ std::memory_order_relaxed) ) {
+ // lose the race
+ return nullptr;
+ }
+ }
+ return ctx;
+ }
+};
+
+}}}
+
+#endif // BOOST_FIBERS_DETAIL_CONTEXT_SPMC_QUEUE_H
diff --git a/boost/fiber/detail/cpu_relax.hpp b/boost/fiber/detail/cpu_relax.hpp
new file mode 100644
index 0000000000..d00020a23b
--- /dev/null
+++ b/boost/fiber/detail/cpu_relax.hpp
@@ -0,0 +1,56 @@
+
+// Copyright Oliver Kowalke 2016.
+// Distributed under the Boost Software License, Version 1.0.
+// (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef BOOST_FIBERS_DETAIL_CPU_RELAX_H
+#define BOOST_FIBERS_DETAIL_CPU_RELAX_H
+
+#include <thread>
+
+#include <boost/config.hpp>
+#include <boost/predef.h>
+
+#include <boost/fiber/detail/config.hpp>
+
+#if BOOST_COMP_MSVC
+# include <Windows.h>
+#endif
+
+#ifdef BOOST_HAS_ABI_HEADERS
+# include BOOST_ABI_PREFIX
+#endif
+
+namespace boost {
+namespace fibers {
+namespace detail {
+
+#if BOOST_ARCH_ARM
+# if BOOST_COMP_MSVC
+# define cpu_relax() YieldProcessor();
+# else
+# define cpu_relax() asm volatile ("yield" ::: "memory");
+# endif
+#elif BOOST_ARCH_MIPS
+# define cpu_relax() asm volatile ("pause" ::: "memory");
+#elif BOOST_ARCH_PPC
+# define cpu_relax() asm volatile ("or 27,27,27" ::: "memory");
+#elif BOOST_ARCH_X86
+# if BOOST_COMP_MSVC
+# define cpu_relax() YieldProcessor();
+# else
+# define cpu_relax() asm volatile ("pause" ::: "memory");
+# endif
+#else
+# warning "architecture does not support yield/pause mnemonic"
+# define cpu_relax() std::this_thread::yield();
+#endif
+
+}}}
+
+#ifdef BOOST_HAS_ABI_HEADERS
+# include BOOST_ABI_SUFFIX
+#endif
+
+#endif // BOOST_FIBERS_DETAIL_CPU_RELAX_H
diff --git a/boost/fiber/detail/data.hpp b/boost/fiber/detail/data.hpp
index a3e8a7e347..24e833a9e8 100644
--- a/boost/fiber/detail/data.hpp
+++ b/boost/fiber/detail/data.hpp
@@ -28,7 +28,7 @@ struct data_t {
spinlock_lock * lk{ nullptr };
context * ctx{ nullptr };
- constexpr data_t() noexcept = default;
+ data_t() noexcept = default;
explicit data_t( spinlock_lock * lk_) noexcept :
lk{ lk_ } {
diff --git a/boost/fiber/detail/futex.hpp b/boost/fiber/detail/futex.hpp
new file mode 100644
index 0000000000..4c966867c5
--- /dev/null
+++ b/boost/fiber/detail/futex.hpp
@@ -0,0 +1,61 @@
+
+// Copyright Oliver Kowalke 2016.
+// Distributed under the Boost Software License, Version 1.0.
+// (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef BOOST_FIBERS_DETAIL_FUTEX_H
+#define BOOST_FIBERS_DETAIL_FUTEX_H
+
+#include <boost/config.hpp>
+#include <boost/predef.h>
+
+#include <boost/fiber/detail/config.hpp>
+
+#if BOOST_OS_LINUX
+extern "C" {
+#include <linux/futex.h>
+#include <sys/syscall.h>
+}
+#elif BOOST_OS_WINDOWS
+#include <Windows.h>
+#endif
+
+namespace boost {
+namespace fibers {
+namespace detail {
+
+#if BOOST_OS_LINUX
+inline
+int sys_futex( void * addr, std::int32_t op, std::int32_t x) {
+ return ::syscall( SYS_futex, addr, op, x, nullptr, nullptr, 0);
+}
+
+inline
+int futex_wake( std::atomic< std::int32_t > * addr) {
+ return 0 <= sys_futex( static_cast< void * >( addr), FUTEX_WAKE_PRIVATE, 1) ? 0 : -1;
+}
+
+inline
+int futex_wait( std::atomic< std::int32_t > * addr, std::int32_t x) {
+ return 0 <= sys_futex( static_cast< void * >( addr), FUTEX_WAIT_PRIVATE, x) ? 0 : -1;
+}
+#elif BOOST_OS_WINDOWS
+inline
+int futex_wake( std::atomic< std::int32_t > * addr) {
+ ::WakeByAddressSingle( static_cast< void * >( addr) );
+ return 0;
+}
+
+inline
+int futex_wait( std::atomic< std::int32_t > * addr, std::int32_t x) {
+ ::WaitOnAddress( static_cast< volatile void * >( addr), & x, sizeof( x), -1);
+ return 0;
+}
+#else
+# warn "no futex support on this platform"
+#endif
+
+}}}
+
+#endif // BOOST_FIBERS_DETAIL_FUTEX_H
diff --git a/boost/fiber/detail/spinlock.hpp b/boost/fiber/detail/spinlock.hpp
index e2af3a86c7..89a6d51a6f 100644
--- a/boost/fiber/detail/spinlock.hpp
+++ b/boost/fiber/detail/spinlock.hpp
@@ -3,40 +3,63 @@
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
-//
-// based on boost::interprocess::sync::interprocess_spin::mutex
#ifndef BOOST_FIBERS_SPINLOCK_H
#define BOOST_FIBERS_SPINLOCK_H
-#include <mutex>
+#include <boost/config.hpp>
#include <boost/fiber/detail/config.hpp>
+#if !defined(BOOST_FIBERS_NO_ATOMICS)
+# include <mutex>
+# include <boost/fiber/detail/spinlock_ttas.hpp>
+# include <boost/fiber/detail/spinlock_ttas_adaptive.hpp>
+# if defined(BOOST_FIBERS_HAS_FUTEX)
+# include <boost/fiber/detail/spinlock_ttas_futex.hpp>
+# include <boost/fiber/detail/spinlock_ttas_adaptive_futex.hpp>
+# endif
+#endif
+
+#ifdef BOOST_HAS_ABI_HEADERS
+# include BOOST_ABI_PREFIX
+#endif
+
namespace boost {
namespace fibers {
namespace detail {
-struct non_spinlock {
- constexpr non_spinlock() noexcept {}
+#if defined(BOOST_FIBERS_NO_ATOMICS)
+struct spinlock {
+ constexpr spinlock() noexcept {}
void lock() noexcept {}
void unlock() noexcept {}
};
-struct non_lock {
- constexpr non_lock( non_spinlock) noexcept {}
+struct spinlock_lock {
+ constexpr spinlock_lock( spinlock &) noexcept {}
void lock() noexcept {}
void unlock() noexcept {}
};
-
-#if ! defined(BOOST_FIBERS_NO_ATOMICS)
-typedef std::mutex spinlock;
-using spinlock_lock = std::unique_lock< spinlock >;
#else
-typedef non_spinlock spinlock;
-using spinlock_lock = non_lock;
+# if defined(BOOST_FIBERS_SPINLOCK_STD_MUTEX)
+using spinlock = std::mutex;
+# elif defined(BOOST_FIBERS_SPINLOCK_TTAS_FUTEX)
+using spinlock = spinlock_ttas_futex;
+# elif defined(BOOST_FIBERS_SPINLOCK_TTAS_ADAPTIVE_FUTEX)
+using spinlock = spinlock_ttas_adaptive_futex;
+# elif defined(BOOST_FIBERS_SPINLOCK_TTAS_ADAPTIVE)
+using spinlock = spinlock_ttas_adaptive;
+# else
+using spinlock = spinlock_ttas;
+# endif
+using spinlock_lock = std::unique_lock< spinlock >;
#endif
}}}
+#ifdef BOOST_HAS_ABI_HEADERS
+# include BOOST_ABI_SUFFIX
+#endif
+
#endif // BOOST_FIBERS_SPINLOCK_H
diff --git a/boost/fiber/detail/spinlock_ttas.hpp b/boost/fiber/detail/spinlock_ttas.hpp
new file mode 100644
index 0000000000..d64630d84d
--- /dev/null
+++ b/boost/fiber/detail/spinlock_ttas.hpp
@@ -0,0 +1,112 @@
+
+// Copyright Oliver Kowalke 2016.
+// Distributed under the Boost Software License, Version 1.0.
+// (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef BOOST_FIBERS_SPINLOCK_TTAS_H
+#define BOOST_FIBERS_SPINLOCK_TTAS_H
+
+#include <atomic>
+#include <chrono>
+#include <random>
+#include <thread>
+
+#include <boost/fiber/detail/config.hpp>
+#include <boost/fiber/detail/cpu_relax.hpp>
+
+// based on informations from:
+// https://software.intel.com/en-us/articles/benefitting-power-and-performance-sleep-loops
+// https://software.intel.com/en-us/articles/long-duration-spin-wait-loops-on-hyper-threading-technology-enabled-intel-processors
+
+namespace boost {
+namespace fibers {
+namespace detail {
+
+class spinlock_ttas {
+private:
+ enum class spinlock_status {
+ locked = 0,
+ unlocked
+ };
+
+ // align shared variable 'state_' at cache line to prevent false sharing
+ alignas(cache_alignment) std::atomic< spinlock_status > state_{ spinlock_status::unlocked };
+ // padding to avoid other data one the cacheline of shared variable 'state_'
+ char pad[cacheline_length];
+
+public:
+ spinlock_ttas() noexcept = default;
+
+ spinlock_ttas( spinlock_ttas const&) = delete;
+ spinlock_ttas & operator=( spinlock_ttas const&) = delete;
+
+ void lock() noexcept {
+ std::size_t collisions = 0 ;
+ for (;;) {
+ // avoid using multiple pause instructions for a delay of a specific cycle count
+ // the delay of cpu_relax() (pause on Intel) depends on the processor family
+ // the cycle count can not guaranteed from one system to the next
+ // -> check the shared variable 'state_' in between each cpu_relax() to prevent
+ // unnecessarily long delays on some systems
+ std::size_t tests = 0;
+ // test shared variable 'status_'
+ // first access to 'state_' -> chache miss
+ // sucessive acccess to 'state_' -> cache hit
+ // if 'state_' was released by other fiber
+ // cached 'state_' is invalidated -> cache miss
+ while ( spinlock_status::locked == state_.load( std::memory_order_relaxed) ) {
+#if !defined(BOOST_FIBERS_SPIN_SINGLE_CORE)
+ if ( BOOST_FIBERS_SPIN_MAX_TESTS > tests) {
+ ++tests;
+ // give CPU a hint that this thread is in a "spin-wait" loop
+ // delays the next instruction's execution for a finite period of time (depends on processor family)
+ // the CPU is not under demand, parts of the pipeline are no longer being used
+ // -> reduces the power consumed by the CPU
+ cpu_relax();
+ } else if ( BOOST_FIBERS_SPIN_MAX_TESTS + 20 > tests) {
+ ++tests;
+ // std::this_thread::sleep_for( 0us) has a fairly long instruction path length,
+ // combined with an expensive ring3 to ring 0 transition costing about 1000 cycles
+ // std::this_thread::sleep_for( 0us) lets give up this_thread the remaining part of its time slice
+ // if and only if a thread of equal or greater priority is ready to run
+ static constexpr std::chrono::microseconds us0{ 0 };
+ std::this_thread::sleep_for( us0);
+ } else {
+ // std::this_thread::yield() allows this_thread to give up the remaining part of its time slice,
+ // but only to another thread on the same processor
+ // instead of constant checking, a thread only checks if no other useful work is pending
+ std::this_thread::yield();
+ }
+#else
+ std::this_thread::yield();
+#endif
+ }
+ // test-and-set shared variable 'status_'
+ // everytime 'status_' is signaled over the bus, even if the test failes
+ if ( spinlock_status::locked == state_.exchange( spinlock_status::locked, std::memory_order_acquire) ) {
+ // spinlock now contended
+ // utilize 'Binary Exponential Backoff' algorithm
+ // linear_congruential_engine is a random number engine based on Linear congruential generator (LCG)
+ static thread_local std::minstd_rand generator;
+ const std::size_t z =
+ std::uniform_int_distribution< std::size_t >{ 0, static_cast< std::size_t >( 1) << collisions }( generator);
+ ++collisions;
+ for ( std::size_t i = 0; i < z; ++i) {
+ cpu_relax();
+ }
+ } else {
+ // success, thread has acquired the lock
+ break;
+ }
+ }
+ }
+
+ void unlock() noexcept {
+ state_.store( spinlock_status::unlocked, std::memory_order_release);
+ }
+};
+
+}}}
+
+#endif // BOOST_FIBERS_SPINLOCK_TTAS_H
diff --git a/boost/fiber/detail/spinlock_ttas_adaptive.hpp b/boost/fiber/detail/spinlock_ttas_adaptive.hpp
new file mode 100644
index 0000000000..c6a9a57d79
--- /dev/null
+++ b/boost/fiber/detail/spinlock_ttas_adaptive.hpp
@@ -0,0 +1,117 @@
+
+// Copyright Oliver Kowalke 2016.
+// Distributed under the Boost Software License, Version 1.0.
+// (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef BOOST_FIBERS_SPINLOCK_TTAS_ADAPTIVE_H
+#define BOOST_FIBERS_SPINLOCK_TTAS_ADAPTIVE_H
+
+#include <atomic>
+#include <chrono>
+#include <cmath>
+#include <random>
+#include <thread>
+
+#include <boost/fiber/detail/config.hpp>
+#include <boost/fiber/detail/cpu_relax.hpp>
+
+// based on informations from:
+// https://software.intel.com/en-us/articles/benefitting-power-and-performance-sleep-loops
+// https://software.intel.com/en-us/articles/long-duration-spin-wait-loops-on-hyper-threading-technology-enabled-intel-processors
+
+namespace boost {
+namespace fibers {
+namespace detail {
+
+class spinlock_ttas_adaptive {
+private:
+ enum class spinlock_status {
+ locked = 0,
+ unlocked
+ };
+
+ // align shared variable 'state_' at cache line to prevent false sharing
+ alignas(cache_alignment) std::atomic< spinlock_status > state_{ spinlock_status::unlocked };
+ std::atomic< std::size_t > tests_{ 0 };
+ // padding to avoid other data one the cacheline of shared variable 'state_'
+ char pad[cacheline_length];
+
+public:
+ spinlock_ttas_adaptive() noexcept = default;
+
+ spinlock_ttas_adaptive( spinlock_ttas_adaptive const&) = delete;
+ spinlock_ttas_adaptive & operator=( spinlock_ttas_adaptive const&) = delete;
+
+ void lock() noexcept {
+ std::size_t collisions = 0 ;
+ for (;;) {
+ std::size_t tests = 0;
+ const std::size_t prev_tests = tests_.load( std::memory_order_relaxed);
+ const std::size_t max_tests = (std::min)( static_cast< std::size_t >( BOOST_FIBERS_SPIN_MAX_TESTS), 2 * prev_tests + 10);
+ // avoid using multiple pause instructions for a delay of a specific cycle count
+ // the delay of cpu_relax() (pause on Intel) depends on the processor family
+ // the cycle count can not guaranteed from one system to the next
+ // -> check the shared variable 'state_' in between each cpu_relax() to prevent
+ // unnecessarily long delays on some systems
+ // test shared variable 'status_'
+ // first access to 'state_' -> chache miss
+ // sucessive acccess to 'state_' -> cache hit
+ // if 'state_' was released by other fiber
+ // cached 'state_' is invalidated -> cache miss
+ while ( spinlock_status::locked == state_.load( std::memory_order_relaxed) ) {
+#if !defined(BOOST_FIBERS_SPIN_SINGLE_CORE)
+ if ( max_tests > tests) {
+ ++tests;
+ // give CPU a hint that this thread is in a "spin-wait" loop
+ // delays the next instruction's execution for a finite period of time (depends on processor family)
+ // the CPU is not under demand, parts of the pipeline are no longer being used
+ // -> reduces the power consumed by the CPU
+ cpu_relax();
+ } else if ( BOOST_FIBERS_SPIN_MAX_TESTS + 20 > tests) {
+ ++tests;
+ // std::this_thread::sleep_for( 0us) has a fairly long instruction path length,
+ // combined with an expensive ring3 to ring 0 transition costing about 1000 cycles
+ // std::this_thread::sleep_for( 0us) lets give up this_thread the remaining part of its time slice
+ // if and only if a thread of equal or greater priority is ready to run
+ static constexpr std::chrono::microseconds us0{ 0 };
+ std::this_thread::sleep_for( us0);
+ } else {
+ // std::this_thread::yield() allows this_thread to give up the remaining part of its time slice,
+ // but only to another thread on the same processor
+ // instead of constant checking, a thread only checks if no other useful work is pending
+ std::this_thread::yield();
+ }
+#else
+ std::this_thread::yield();
+#endif
+ }
+ // test-and-set shared variable 'status_'
+ // everytime 'status_' is signaled over the bus, even if the test failes
+ if ( spinlock_status::locked == state_.exchange( spinlock_status::locked, std::memory_order_acquire) ) {
+ // spinlock now contended
+ // utilize 'Binary Exponential Backoff' algorithm
+ // linear_congruential_engine is a random number engine based on Linear congruential generator (LCG)
+ static thread_local std::minstd_rand generator;
+ const std::size_t z =
+ std::uniform_int_distribution< std::size_t >{ 0, static_cast< std::size_t >( 1) << collisions }( generator);
+ ++collisions;
+ for ( std::size_t i = 0; i < z; ++i) {
+ cpu_relax();
+ }
+ } else {
+ tests_.store( prev_tests + (tests - prev_tests) / 8, std::memory_order_relaxed);
+ // success, thread has acquired the lock
+ break;
+ }
+ }
+ }
+
+ void unlock() noexcept {
+ state_.store( spinlock_status::unlocked, std::memory_order_release);
+ }
+};
+
+}}}
+
+#endif // BOOST_FIBERS_SPINLOCK_TTAS_ADAPTIVE_H
diff --git a/boost/fiber/detail/spinlock_ttas_adaptive_futex.hpp b/boost/fiber/detail/spinlock_ttas_adaptive_futex.hpp
new file mode 100644
index 0000000000..fbd6a0e4d2
--- /dev/null
+++ b/boost/fiber/detail/spinlock_ttas_adaptive_futex.hpp
@@ -0,0 +1,111 @@
+
+// Copyright Oliver Kowalke 2016.
+// Distributed under the Boost Software License, Version 1.0.
+// (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef BOOST_FIBERS_SPINLOCK_TTAS_ADAPTIVE_FUTEX_H
+#define BOOST_FIBERS_SPINLOCK_TTAS_ADAPTIVE_FUTEX_H
+
+#include <atomic>
+#include <cmath>
+#include <random>
+#include <thread>
+
+#include <boost/fiber/detail/config.hpp>
+#include <boost/fiber/detail/cpu_relax.hpp>
+#include <boost/fiber/detail/futex.hpp>
+
+// based on informations from:
+// https://software.intel.com/en-us/articles/benefitting-power-and-performance-sleep-loops
+// https://software.intel.com/en-us/articles/long-duration-spin-wait-loops-on-hyper-threading-technology-enabled-intel-processors
+
+namespace boost {
+namespace fibers {
+namespace detail {
+
+class spinlock_ttas_adaptive_futex {
+private:
+ // align shared variable 'value_' at cache line to prevent false sharing
+ alignas(cache_alignment) std::atomic< std::int32_t > value_{ 0 };
+ std::atomic< std::int32_t > tests_{ 0 };
+ // padding to avoid other data one the cacheline of shared variable 'value_'
+ char pad_[cacheline_length];
+
+public:
+ spinlock_ttas_adaptive_futex() noexcept = default;
+
+ spinlock_ttas_adaptive_futex( spinlock_ttas_adaptive_futex const&) = delete;
+ spinlock_ttas_adaptive_futex & operator=( spinlock_ttas_adaptive_futex const&) = delete;
+
+ void lock() noexcept {
+ std::int32_t collisions = 0, tests = 0, expected = 0;
+ const std::int32_t prev_tests = tests_.load( std::memory_order_relaxed);
+ const std::int32_t max_tests = (std::min)( static_cast< std::int32_t >( BOOST_FIBERS_SPIN_MAX_TESTS), 2 * prev_tests + 10);
+ // after max. spins or collisions suspend via futex
+ while ( max_tests > tests && BOOST_FIBERS_SPIN_MAX_COLLISIONS > collisions) {
+ // avoid using multiple pause instructions for a delay of a specific cycle count
+ // the delay of cpu_relax() (pause on Intel) depends on the processor family
+ // the cycle count can not guaranteed from one system to the next
+ // -> check the shared variable 'value_' in between each cpu_relax() to prevent
+ // unnecessarily long delays on some systems
+ // test shared variable 'status_'
+ // first access to 'value_' -> chache miss
+ // sucessive acccess to 'value_' -> cache hit
+ // if 'value_' was released by other fiber
+ // cached 'value_' is invalidated -> cache miss
+ if ( 0 != ( expected = value_.load( std::memory_order_relaxed) ) ) {
+ ++tests;
+#if !defined(BOOST_FIBERS_SPIN_SINGLE_CORE)
+ // give CPU a hint that this thread is in a "spin-wait" loop
+ // delays the next instruction's execution for a finite period of time (depends on processor family)
+ // the CPU is not under demand, parts of the pipeline are no longer being used
+ // -> reduces the power consumed by the CPU
+ cpu_relax();
+#else
+ // std::this_thread::yield() allows this_thread to give up the remaining part of its time slice,
+ // but only to another thread on the same processor
+ // instead of constant checking, a thread only checks if no other useful work is pending
+ std::this_thread::yield();
+#endif
+ } else if ( ! value_.compare_exchange_strong( expected, 1, std::memory_order_acquire, std::memory_order_release) ) {
+ // spinlock now contended
+ // utilize 'Binary Exponential Backoff' algorithm
+ // linear_congruential_engine is a random number engine based on Linear congruential generator (LCG)
+ static thread_local std::minstd_rand generator;
+ const std::int32_t z = std::uniform_int_distribution< std::int32_t >{
+ 0, static_cast< std::int32_t >( 1) << collisions }( generator);
+ ++collisions;
+ for ( std::int32_t i = 0; i < z; ++i) {
+ cpu_relax();
+ }
+ } else {
+ // success, lock acquired
+ tests_.store( prev_tests + (tests - prev_tests) / 8, std::memory_order_relaxed);
+ return;
+ }
+ }
+ // failure, lock not acquired
+ // pause via futex
+ if ( 2 != expected) {
+ expected = value_.exchange( 2, std::memory_order_acquire);
+ }
+ while ( 0 != expected) {
+ futex_wait( & value_, 2);
+ expected = value_.exchange( 2, std::memory_order_acquire);
+ }
+ // success, lock acquired
+ tests_.store( prev_tests + (tests - prev_tests) / 8, std::memory_order_relaxed);
+ }
+
+ void unlock() noexcept {
+ if ( 1 != value_.fetch_sub( 1, std::memory_order_acquire) ) {
+ value_.store( 0, std::memory_order_release);
+ futex_wake( & value_);
+ }
+ }
+};
+
+}}}
+
+#endif // BOOST_FIBERS_SPINLOCK_TTAS_ADAPTIVE_FUTEX_H
diff --git a/boost/fiber/detail/spinlock_ttas_futex.hpp b/boost/fiber/detail/spinlock_ttas_futex.hpp
new file mode 100644
index 0000000000..b11e63b587
--- /dev/null
+++ b/boost/fiber/detail/spinlock_ttas_futex.hpp
@@ -0,0 +1,104 @@
+
+// Copyright Oliver Kowalke 2016.
+// Distributed under the Boost Software License, Version 1.0.
+// (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+
+#ifndef BOOST_FIBERS_spinlock_ttas_futex_FUTEX_H
+#define BOOST_FIBERS_spinlock_ttas_futex_FUTEX_H
+
+#include <atomic>
+#include <random>
+#include <thread>
+
+#include <boost/fiber/detail/config.hpp>
+#include <boost/fiber/detail/cpu_relax.hpp>
+#include <boost/fiber/detail/futex.hpp>
+
+// based on informations from:
+// https://software.intel.com/en-us/articles/benefitting-power-and-performance-sleep-loops
+// https://software.intel.com/en-us/articles/long-duration-spin-wait-loops-on-hyper-threading-technology-enabled-intel-processors
+
+namespace boost {
+namespace fibers {
+namespace detail {
+
+class spinlock_ttas_futex {
+private:
+ // align shared variable 'value_' at cache line to prevent false sharing
+ alignas(cache_alignment) std::atomic< std::int32_t > value_{ 0 };
+ // padding to avoid other data one the cacheline of shared variable 'value_'
+ char pad_[cacheline_length];
+
+public:
+ spinlock_ttas_futex() noexcept = default;
+
+ spinlock_ttas_futex( spinlock_ttas_futex const&) = delete;
+ spinlock_ttas_futex & operator=( spinlock_ttas_futex const&) = delete;
+
+ void lock() noexcept {
+ std::int32_t collisions = 0, tests = 0, expected = 0;
+ // after max. spins or collisions suspend via futex
+ while ( BOOST_FIBERS_SPIN_MAX_TESTS > tests && BOOST_FIBERS_SPIN_MAX_COLLISIONS > collisions) {
+ // avoid using multiple pause instructions for a delay of a specific cycle count
+ // the delay of cpu_relax() (pause on Intel) depends on the processor family
+ // the cycle count can not guaranteed from one system to the next
+ // -> check the shared variable 'value_' in between each cpu_relax() to prevent
+ // unnecessarily long delays on some systems
+ // test shared variable 'status_'
+ // first access to 'value_' -> chache miss
+ // sucessive acccess to 'value_' -> cache hit
+ // if 'value_' was released by other fiber
+ // cached 'value_' is invalidated -> cache miss
+ if ( 0 != ( expected = value_.load( std::memory_order_relaxed) ) ) {
+ ++tests;
+#if !defined(BOOST_FIBERS_SPIN_SINGLE_CORE)
+ // give CPU a hint that this thread is in a "spin-wait" loop
+ // delays the next instruction's execution for a finite period of time (depends on processor family)
+ // the CPU is not under demand, parts of the pipeline are no longer being used
+ // -> reduces the power consumed by the CPU
+ cpu_relax();
+#else
+ // std::this_thread::yield() allows this_thread to give up the remaining part of its time slice,
+ // but only to another thread on the same processor
+ // instead of constant checking, a thread only checks if no other useful work is pending
+ std::this_thread::yield();
+#endif
+ } else if ( ! value_.compare_exchange_strong( expected, 1, std::memory_order_acquire, std::memory_order_release) ) {
+ // spinlock now contended
+ // utilize 'Binary Exponential Backoff' algorithm
+ // linear_congruential_engine is a random number engine based on Linear congruential generator (LCG)
+ static thread_local std::minstd_rand generator;
+ const std::int32_t z = std::uniform_int_distribution< std::int32_t >{
+ 0, static_cast< std::int32_t >( 1) << collisions }( generator);
+ ++collisions;
+ for ( std::int32_t i = 0; i < z; ++i) {
+ cpu_relax();
+ }
+ } else {
+ // success, lock acquired
+ return;
+ }
+ }
+ // failure, lock not acquired
+ // pause via futex
+ if ( 2 != expected) {
+ expected = value_.exchange( 2, std::memory_order_acquire);
+ }
+ while ( 0 != expected) {
+ futex_wait( & value_, 2);
+ expected = value_.exchange( 2, std::memory_order_acquire);
+ }
+ }
+
+ void unlock() noexcept {
+ if ( 1 != value_.fetch_sub( 1, std::memory_order_acquire) ) {
+ value_.store( 0, std::memory_order_release);
+ futex_wake( & value_);
+ }
+ }
+};
+
+}}}
+
+#endif // BOOST_FIBERS_spinlock_ttas_futex_FUTEX_H
diff --git a/boost/fiber/detail/wrap.hpp b/boost/fiber/detail/wrap.hpp
index 850763b4c8..0369e61ee6 100644
--- a/boost/fiber/detail/wrap.hpp
+++ b/boost/fiber/detail/wrap.hpp
@@ -87,10 +87,13 @@ public:
wrapper & operator=( wrapper && other) = default;
boost::context::execution_context< data_t * >
- operator()( boost::context::execution_context< data_t * > ctx, data_t * dp) {
+ operator()( boost::context::execution_context< data_t * > && ctx, data_t * dp) {
return boost::context::detail::invoke(
std::move( fn1_),
- fn2_, tpl_, std::move( ctx), dp);
+ fn2_,
+ tpl_,
+ std::forward< boost::context::execution_context< data_t * > >( ctx),
+ dp);
}
};