summaryrefslogtreecommitdiff
path: root/boost/fiber/detail
diff options
context:
space:
mode:
Diffstat (limited to 'boost/fiber/detail')
-rw-r--r--boost/fiber/detail/config.hpp2
-rw-r--r--boost/fiber/detail/context_mpsc_queue.hpp98
-rw-r--r--boost/fiber/detail/context_spinlock_queue.hpp118
-rw-r--r--boost/fiber/detail/context_spmc_queue.hpp99
-rw-r--r--boost/fiber/detail/cpu_relax.hpp36
-rw-r--r--boost/fiber/detail/data.hpp2
-rw-r--r--boost/fiber/detail/fss.hpp5
-rw-r--r--boost/fiber/detail/futex.hpp2
-rw-r--r--boost/fiber/detail/spinlock_ttas.hpp29
-rw-r--r--boost/fiber/detail/spinlock_ttas_adaptive.hpp21
-rw-r--r--boost/fiber/detail/spinlock_ttas_adaptive_futex.hpp14
-rw-r--r--boost/fiber/detail/spinlock_ttas_futex.hpp12
-rw-r--r--boost/fiber/detail/wrap.hpp51
13 files changed, 295 insertions, 194 deletions
diff --git a/boost/fiber/detail/config.hpp b/boost/fiber/detail/config.hpp
index f65d48910d..7c7119e1fb 100644
--- a/boost/fiber/detail/config.hpp
+++ b/boost/fiber/detail/config.hpp
@@ -52,7 +52,7 @@
#endif
#if !defined(BOOST_FIBERS_SPIN_MAX_TESTS)
-# define BOOST_FIBERS_SPIN_MAX_TESTS 100
+# define BOOST_FIBERS_SPIN_MAX_TESTS 500
#endif
// modern architectures have cachelines with 64byte length
diff --git a/boost/fiber/detail/context_mpsc_queue.hpp b/boost/fiber/detail/context_mpsc_queue.hpp
deleted file mode 100644
index f7e664659c..0000000000
--- a/boost/fiber/detail/context_mpsc_queue.hpp
+++ /dev/null
@@ -1,98 +0,0 @@
-
-// Copyright Dmitry Vyukov 2010-2011.
-// Copyright Oliver Kowalke 2016.
-// Distributed under the Boost Software License, Version 1.0.
-// (See accompanying file LICENSE_1_0.txt or copy at
-// http://www.boost.org/LICENSE_1_0.txt)
-//
-// based on Dmitry Vyukov's intrusive MPSC queue
-// http://www.1024cores.net/home/lock-free-algorithms/queues/intrusive-mpsc-node-based-queue
-// https://groups.google.com/forum/#!topic/lock-free/aFHvZhu1G-0
-
-#ifndef BOOST_FIBERS_DETAIL_CONTEXT_MPSC_QUEUE_H
-#define BOOST_FIBERS_DETAIL_CONTEXT_MPSC_QUEUE_H
-
-#include <atomic>
-#include <memory>
-#include <type_traits>
-
-#include <boost/assert.hpp>
-#include <boost/config.hpp>
-
-#include <boost/fiber/context.hpp>
-#include <boost/fiber/detail/config.hpp>
-
-#ifdef BOOST_HAS_ABI_HEADERS
-# include BOOST_ABI_PREFIX
-#endif
-
-namespace boost {
-namespace fibers {
-namespace detail {
-
-// a MPSC queue
-// multiple threads push ready fibers (belonging to local scheduler)
-// (thread) local scheduler pops fibers
-class context_mpsc_queue {
-private:
- // not default constructor for context - use aligned_storage instead
- alignas(cache_alignment) std::aligned_storage< sizeof( context), alignof( context) >::type storage_{};
- context * dummy_;
- alignas(cache_alignment) std::atomic< context * > head_;
- alignas(cache_alignment) context * tail_;
- char pad_[cacheline_length];
-
-public:
- context_mpsc_queue() :
- dummy_{ reinterpret_cast< context * >( std::addressof( storage_) ) },
- head_{ dummy_ },
- tail_{ dummy_ } {
- dummy_->remote_nxt_.store( nullptr, std::memory_order_release);
- }
-
- context_mpsc_queue( context_mpsc_queue const&) = delete;
- context_mpsc_queue & operator=( context_mpsc_queue const&) = delete;
-
- void push( context * ctx) noexcept {
- BOOST_ASSERT( nullptr != ctx);
- ctx->remote_nxt_.store( nullptr, std::memory_order_release);
- context * prev = head_.exchange( ctx, std::memory_order_acq_rel);
- prev->remote_nxt_.store( ctx, std::memory_order_release);
- }
-
- context * pop() noexcept {
- context * tail = tail_;
- context * next = tail->remote_nxt_.load( std::memory_order_acquire);
- if ( dummy_ == tail) {
- if ( nullptr == next) {
- return nullptr;
- }
- tail_ = next;
- tail = next;
- next = next->remote_nxt_.load( std::memory_order_acquire);;
- }
- if ( nullptr != next) {
- tail_ = next;
- return tail;
- }
- context * head = head_.load( std::memory_order_acquire);
- if ( tail != head) {
- return nullptr;
- }
- push( dummy_);
- next = tail->remote_nxt_.load( std::memory_order_acquire);
- if ( nullptr != next) {
- tail_= next;
- return tail;
- }
- return nullptr;
- }
-};
-
-}}}
-
-#ifdef BOOST_HAS_ABI_HEADERS
-# include BOOST_ABI_SUFFIX
-#endif
-
-#endif // BOOST_FIBERS_DETAIL_CONTEXT_MPSC_QUEUE_H
diff --git a/boost/fiber/detail/context_spinlock_queue.hpp b/boost/fiber/detail/context_spinlock_queue.hpp
new file mode 100644
index 0000000000..e0ebdabda6
--- /dev/null
+++ b/boost/fiber/detail/context_spinlock_queue.hpp
@@ -0,0 +1,118 @@
+
+// Copyright Oliver Kowalke 2015.
+// Distributed under the Boost Software License, Version 1.0.
+// (See accompanying file LICENSE_1_0.txt or copy at
+// http://www.boost.org/LICENSE_1_0.txt)
+//
+
+#ifndef BOOST_FIBERS_DETAIL_SPINLOCK_QUEUE_H
+#define BOOST_FIBERS_DETAIL_SPINLOCK_QUEUE_H
+
+#include <cstddef>
+#include <cstring>
+#include <mutex>
+
+#include <boost/config.hpp>
+
+#include <boost/fiber/context.hpp>
+#include <boost/fiber/detail/config.hpp>
+#include <boost/fiber/detail/spinlock.hpp>
+
+#ifdef BOOST_HAS_ABI_HEADERS
+# include BOOST_ABI_PREFIX
+#endif
+
+namespace boost {
+namespace fibers {
+namespace detail {
+
+class context_spinlock_queue {
+private:
+ typedef context * slot_type;
+
+ alignas(cache_alignment) mutable spinlock splk_{};
+ std::size_t pidx_{ 0 };
+ std::size_t cidx_{ 0 };
+ std::size_t capacity_;
+ slot_type * slots_;
+
+ void resize_() {
+ slot_type * old_slots = slots_;
+ slots_ = new slot_type[2*capacity_];
+ std::size_t offset = capacity_ - cidx_;
+ std::memcpy( slots_, old_slots + cidx_, offset * sizeof( slot_type) );
+ if ( 0 < cidx_) {
+ std::memcpy( slots_ + offset, old_slots, pidx_ * sizeof( slot_type) );
+ }
+ cidx_ = 0;
+ pidx_ = capacity_ - 1;
+ capacity_ *= 2;
+ delete [] old_slots;
+ }
+
+ bool is_full_() const noexcept {
+ return cidx_ == ((pidx_ + 1) % capacity_);
+ }
+
+ bool is_empty_() const noexcept {
+ return cidx_ == pidx_;
+ }
+
+public:
+ context_spinlock_queue( std::size_t capacity = 4096) :
+ capacity_{ capacity } {
+ slots_ = new slot_type[capacity_];
+ }
+
+ ~context_spinlock_queue() {
+ delete [] slots_;
+ }
+
+ context_spinlock_queue( context_spinlock_queue const&) = delete;
+ context_spinlock_queue & operator=( context_spinlock_queue const&) = delete;
+
+ bool empty() const noexcept {
+ spinlock_lock lk{ splk_ };
+ return is_empty_();
+ }
+
+ void push( context * c) {
+ spinlock_lock lk{ splk_ };
+ if ( is_full_() ) {
+ resize_();
+ }
+ slots_[pidx_] = c;
+ pidx_ = (pidx_ + 1) % capacity_;
+ }
+
+ context * pop() {
+ spinlock_lock lk{ splk_ };
+ context * c = nullptr;
+ if ( ! is_empty_() ) {
+ c = slots_[cidx_];
+ cidx_ = (cidx_ + 1) % capacity_;
+ }
+ return c;
+ }
+
+ context * steal() {
+ spinlock_lock lk{ splk_ };
+ context * c = nullptr;
+ if ( ! is_empty_() ) {
+ c = slots_[cidx_];
+ if ( c->is_context( type::pinned_context) ) {
+ return nullptr;
+ }
+ cidx_ = (cidx_ + 1) % capacity_;
+ }
+ return c;
+ }
+};
+
+}}}
+
+#ifdef BOOST_HAS_ABI_HEADERS
+# include BOOST_ABI_SUFFIX
+#endif
+
+#endif // BOOST_FIBERS_DETAIL_SPINLOCK_QUEUE_H
diff --git a/boost/fiber/detail/context_spmc_queue.hpp b/boost/fiber/detail/context_spmc_queue.hpp
index 6449e3658f..27256233cf 100644
--- a/boost/fiber/detail/context_spmc_queue.hpp
+++ b/boost/fiber/detail/context_spmc_queue.hpp
@@ -30,6 +30,11 @@
// In Proceedings of the 18th ACM SIGPLAN symposium on Principles and practice
// of parallel programming (PPoPP '13). ACM, New York, NY, USA, 69-80.
+#if BOOST_COMP_CLANG
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-private-field"
+#endif
+
namespace boost {
namespace fibers {
namespace detail {
@@ -43,43 +48,43 @@ private:
sizeof( atomic_type), cache_alignment
>::type storage_type;
- std::size_t size_;
+ std::size_t capacity_;
storage_type * storage_;
public:
- array( std::size_t size) :
- size_{ size },
- storage_{ new storage_type[size_] } {
- for ( std::size_t i = 0; i < size_; ++i) {
+ array( std::size_t capacity) :
+ capacity_{ capacity },
+ storage_{ new storage_type[capacity_] } {
+ for ( std::size_t i = 0; i < capacity_; ++i) {
::new ( static_cast< void * >( std::addressof( storage_[i]) ) ) atomic_type{ nullptr };
}
}
~array() {
- for ( std::size_t i = 0; i < size_; ++i) {
+ for ( std::size_t i = 0; i < capacity_; ++i) {
reinterpret_cast< atomic_type * >( std::addressof( storage_[i]) )->~atomic_type();
}
delete [] storage_;
}
- std::size_t size() const noexcept {
- return size_;
+ std::size_t capacity() const noexcept {
+ return capacity_;
}
void push( std::size_t bottom, context * ctx) noexcept {
reinterpret_cast< atomic_type * >(
- std::addressof( storage_[bottom % size_]) )
+ std::addressof( storage_[bottom % capacity_]) )
->store( ctx, std::memory_order_relaxed);
}
context * pop( std::size_t top) noexcept {
return reinterpret_cast< atomic_type * >(
- std::addressof( storage_[top % size_]) )
+ std::addressof( storage_[top % capacity_]) )
->load( std::memory_order_relaxed);
}
array * resize( std::size_t bottom, std::size_t top) {
- std::unique_ptr< array > tmp{ new array{ 2 * size_ } };
+ std::unique_ptr< array > tmp{ new array{ 2 * capacity_ } };
for ( std::size_t i = top; i != bottom; ++i) {
tmp->push( i, pop( i) );
}
@@ -87,15 +92,15 @@ private:
}
};
- alignas(cache_alignment) std::atomic< std::size_t > top_{ 0 };
- alignas(cache_alignment) std::atomic< std::size_t > bottom_{ 0 };
+ alignas(cache_alignment) std::atomic< std::size_t > top_{ 0 };
+ alignas(cache_alignment) std::atomic< std::size_t > bottom_{ 0 };
alignas(cache_alignment) std::atomic< array * > array_;
- std::vector< array * > old_arrays_{};
+ std::vector< array * > old_arrays_{};
char padding_[cacheline_length];
public:
- context_spmc_queue() :
- array_{ new array{ 1024 } } {
+ context_spmc_queue( std::size_t capacity = 4096) :
+ array_{ new array{ capacity } } {
old_arrays_.reserve( 32);
}
@@ -110,19 +115,19 @@ public:
context_spmc_queue & operator=( context_spmc_queue const&) = delete;
bool empty() const noexcept {
- std::size_t bottom{ bottom_.load( std::memory_order_relaxed) };
- std::size_t top{ top_.load( std::memory_order_relaxed) };
+ std::size_t bottom = bottom_.load( std::memory_order_relaxed);
+ std::size_t top = top_.load( std::memory_order_relaxed);
return bottom <= top;
}
void push( context * ctx) {
- std::size_t bottom{ bottom_.load( std::memory_order_relaxed) };
- std::size_t top{ top_.load( std::memory_order_acquire) };
- array * a{ array_.load( std::memory_order_relaxed) };
- if ( (a->size() - 1) < (bottom - top) ) {
+ std::size_t bottom = bottom_.load( std::memory_order_relaxed);
+ std::size_t top = top_.load( std::memory_order_acquire);
+ array * a = array_.load( std::memory_order_relaxed);
+ if ( (a->capacity() - 1) < (bottom - top) ) {
// queue is full
// resize
- array * tmp{ a->resize( bottom, top) };
+ array * tmp = a->resize( bottom, top);
old_arrays_.push_back( a);
std::swap( a, tmp);
array_.store( a, std::memory_order_relaxed);
@@ -133,16 +138,48 @@ public:
}
context * pop() {
- std::size_t top{ top_.load( std::memory_order_acquire) };
+ std::size_t bottom = bottom_.load( std::memory_order_relaxed) - 1;
+ array * a = array_.load( std::memory_order_relaxed);
+ bottom_.store( bottom, std::memory_order_relaxed);
std::atomic_thread_fence( std::memory_order_seq_cst);
- std::size_t bottom{ bottom_.load( std::memory_order_acquire) };
- context * ctx{ nullptr };
+ std::size_t top = top_.load( std::memory_order_relaxed);
+ context * ctx = nullptr;
+ if ( top <= bottom) {
+ // queue is not empty
+ ctx = a->pop( bottom);
+ BOOST_ASSERT( nullptr != ctx);
+ if ( top == bottom) {
+ // last element dequeued
+ if ( ! top_.compare_exchange_strong( top, top + 1,
+ std::memory_order_seq_cst,
+ std::memory_order_relaxed) ) {
+ // lose the race
+ ctx = nullptr;
+ }
+ bottom_.store( bottom + 1, std::memory_order_relaxed);
+ }
+ } else {
+ // queue is empty
+ bottom_.store( bottom + 1, std::memory_order_relaxed);
+ }
+ return ctx;
+ }
+
+ context * steal() {
+ std::size_t top = top_.load( std::memory_order_acquire);
+ std::atomic_thread_fence( std::memory_order_seq_cst);
+ std::size_t bottom = bottom_.load( std::memory_order_acquire);
+ context * ctx = nullptr;
if ( top < bottom) {
// queue is not empty
- array * a{ array_.load( std::memory_order_consume) };
+ array * a = array_.load( std::memory_order_consume);
ctx = a->pop( top);
- if ( ctx->is_context( type::pinned_context) ||
- ! top_.compare_exchange_strong( top, top + 1,
+ BOOST_ASSERT( nullptr != ctx);
+ // do not steal pinned context (e.g. main-/dispatcher-context)
+ if ( ctx->is_context( type::pinned_context) ) {
+ return nullptr;
+ }
+ if ( ! top_.compare_exchange_strong( top, top + 1,
std::memory_order_seq_cst,
std::memory_order_relaxed) ) {
// lose the race
@@ -155,4 +192,8 @@ public:
}}}
+#if BOOST_COMP_CLANG
+#pragma clang diagnostic pop
+#endif
+
#endif // BOOST_FIBERS_DETAIL_CONTEXT_SPMC_QUEUE_H
diff --git a/boost/fiber/detail/cpu_relax.hpp b/boost/fiber/detail/cpu_relax.hpp
index d00020a23b..541b46dfd0 100644
--- a/boost/fiber/detail/cpu_relax.hpp
+++ b/boost/fiber/detail/cpu_relax.hpp
@@ -7,6 +7,7 @@
#ifndef BOOST_FIBERS_DETAIL_CPU_RELAX_H
#define BOOST_FIBERS_DETAIL_CPU_RELAX_H
+#include <chrono>
#include <thread>
#include <boost/config.hpp>
@@ -14,7 +15,7 @@
#include <boost/fiber/detail/config.hpp>
-#if BOOST_COMP_MSVC
+#if BOOST_COMP_MSVC || BOOST_COMP_MSVC_EMULATED
# include <Windows.h>
#endif
@@ -29,22 +30,47 @@ namespace detail {
#if BOOST_ARCH_ARM
# if BOOST_COMP_MSVC
# define cpu_relax() YieldProcessor();
-# else
+# elif (defined(__ARM_ARCH_6K__) || \
+ defined(__ARM_ARCH_6Z__) || \
+ defined(__ARM_ARCH_6ZK__) || \
+ defined(__ARM_ARCH_6T2__) || \
+ defined(__ARM_ARCH_7__) || \
+ defined(__ARM_ARCH_7A__) || \
+ defined(__ARM_ARCH_7R__) || \
+ defined(__ARM_ARCH_7M__) || \
+ defined(__ARM_ARCH_7S__) || \
+ defined(__ARM_ARCH_8A__) || \
+ defined(__aarch64__))
+// http://groups.google.com/a/chromium.org/forum/#!msg/chromium-dev/YGVrZbxYOlU/Vpgy__zeBQAJ
+// mnemonic 'yield' is supported from ARMv6k onwards
# define cpu_relax() asm volatile ("yield" ::: "memory");
+# else
+# define cpu_relax() asm volatile ("nop" ::: "memory");
# endif
#elif BOOST_ARCH_MIPS
# define cpu_relax() asm volatile ("pause" ::: "memory");
#elif BOOST_ARCH_PPC
+// http://code.metager.de/source/xref/gnu/glibc/sysdeps/powerpc/sys/platform/ppc.h
+// http://stackoverflow.com/questions/5425506/equivalent-of-x86-pause-instruction-for-ppc
+// mnemonic 'or' shared resource hints
+// or 27, 27, 27 This form of 'or' provides a hint that performance
+// will probably be imrpoved if shared resources dedicated
+// to the executing processor are released for use by other
+// processors
+// extended mnemonics (available with POWER7)
+// yield == or 27, 27, 27
# define cpu_relax() asm volatile ("or 27,27,27" ::: "memory");
#elif BOOST_ARCH_X86
-# if BOOST_COMP_MSVC
+# if BOOST_COMP_MSVC || BOOST_COMP_MSVC_EMULATED
# define cpu_relax() YieldProcessor();
# else
# define cpu_relax() asm volatile ("pause" ::: "memory");
# endif
#else
-# warning "architecture does not support yield/pause mnemonic"
-# define cpu_relax() std::this_thread::yield();
+# define cpu_relax() { \
+ static constexpr std::chrono::microseconds us0{ 0 }; \
+ std::this_thread::sleep_for( us0); \
+ }
#endif
}}}
diff --git a/boost/fiber/detail/data.hpp b/boost/fiber/detail/data.hpp
index 24e833a9e8..e2b119ec3e 100644
--- a/boost/fiber/detail/data.hpp
+++ b/boost/fiber/detail/data.hpp
@@ -28,7 +28,7 @@ struct data_t {
spinlock_lock * lk{ nullptr };
context * ctx{ nullptr };
- data_t() noexcept = default;
+ data_t() = default;
explicit data_t( spinlock_lock * lk_) noexcept :
lk{ lk_ } {
diff --git a/boost/fiber/detail/fss.hpp b/boost/fiber/detail/fss.hpp
index 54dc5b79d3..27a7d67f26 100644
--- a/boost/fiber/detail/fss.hpp
+++ b/boost/fiber/detail/fss.hpp
@@ -38,12 +38,13 @@ public:
friend inline
void intrusive_ptr_add_ref( fss_cleanup_function * p) noexcept {
- ++p->use_count_;
+ p->use_count_.fetch_add( 1, std::memory_order_relaxed);
}
friend inline
void intrusive_ptr_release( fss_cleanup_function * p) noexcept {
- if ( --p->use_count_ == 0) {
+ if ( 1 == p->use_count_.fetch_sub( 1, std::memory_order_release) ) {
+ std::atomic_thread_fence( std::memory_order_acquire);
delete p;
}
}
diff --git a/boost/fiber/detail/futex.hpp b/boost/fiber/detail/futex.hpp
index 4c966867c5..d383dc4077 100644
--- a/boost/fiber/detail/futex.hpp
+++ b/boost/fiber/detail/futex.hpp
@@ -49,7 +49,7 @@ int futex_wake( std::atomic< std::int32_t > * addr) {
inline
int futex_wait( std::atomic< std::int32_t > * addr, std::int32_t x) {
- ::WaitOnAddress( static_cast< volatile void * >( addr), & x, sizeof( x), -1);
+ ::WaitOnAddress( static_cast< volatile void * >( addr), & x, sizeof( x), INFINITE);
return 0;
}
#else
diff --git a/boost/fiber/detail/spinlock_ttas.hpp b/boost/fiber/detail/spinlock_ttas.hpp
index d64630d84d..380773ad6d 100644
--- a/boost/fiber/detail/spinlock_ttas.hpp
+++ b/boost/fiber/detail/spinlock_ttas.hpp
@@ -19,6 +19,11 @@
// https://software.intel.com/en-us/articles/benefitting-power-and-performance-sleep-loops
// https://software.intel.com/en-us/articles/long-duration-spin-wait-loops-on-hyper-threading-technology-enabled-intel-processors
+#if BOOST_COMP_CLANG
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-private-field"
+#endif
+
namespace boost {
namespace fibers {
namespace detail {
@@ -30,10 +35,7 @@ private:
unlocked
};
- // align shared variable 'state_' at cache line to prevent false sharing
- alignas(cache_alignment) std::atomic< spinlock_status > state_{ spinlock_status::unlocked };
- // padding to avoid other data one the cacheline of shared variable 'state_'
- char pad[cacheline_length];
+ std::atomic< spinlock_status > state_{ spinlock_status::unlocked };
public:
spinlock_ttas() noexcept = default;
@@ -63,20 +65,15 @@ public:
// delays the next instruction's execution for a finite period of time (depends on processor family)
// the CPU is not under demand, parts of the pipeline are no longer being used
// -> reduces the power consumed by the CPU
+ // -> prevent pipeline stalls
cpu_relax();
- } else if ( BOOST_FIBERS_SPIN_MAX_TESTS + 20 > tests) {
- ++tests;
+ } else {
// std::this_thread::sleep_for( 0us) has a fairly long instruction path length,
// combined with an expensive ring3 to ring 0 transition costing about 1000 cycles
// std::this_thread::sleep_for( 0us) lets give up this_thread the remaining part of its time slice
// if and only if a thread of equal or greater priority is ready to run
static constexpr std::chrono::microseconds us0{ 0 };
std::this_thread::sleep_for( us0);
- } else {
- // std::this_thread::yield() allows this_thread to give up the remaining part of its time slice,
- // but only to another thread on the same processor
- // instead of constant checking, a thread only checks if no other useful work is pending
- std::this_thread::yield();
}
#else
std::this_thread::yield();
@@ -89,10 +86,12 @@ public:
// utilize 'Binary Exponential Backoff' algorithm
// linear_congruential_engine is a random number engine based on Linear congruential generator (LCG)
static thread_local std::minstd_rand generator;
- const std::size_t z =
- std::uniform_int_distribution< std::size_t >{ 0, static_cast< std::size_t >( 1) << collisions }( generator);
+ static std::uniform_int_distribution< std::size_t > distribution{ 0, static_cast< std::size_t >( 1) << collisions };
+ const std::size_t z = distribution( generator);
++collisions;
for ( std::size_t i = 0; i < z; ++i) {
+ // -> reduces the power consumed by the CPU
+ // -> prevent pipeline stalls
cpu_relax();
}
} else {
@@ -109,4 +108,8 @@ public:
}}}
+#if BOOST_COMP_CLANG
+#pragma clang diagnostic pop
+#endif
+
#endif // BOOST_FIBERS_SPINLOCK_TTAS_H
diff --git a/boost/fiber/detail/spinlock_ttas_adaptive.hpp b/boost/fiber/detail/spinlock_ttas_adaptive.hpp
index c6a9a57d79..da044b6298 100644
--- a/boost/fiber/detail/spinlock_ttas_adaptive.hpp
+++ b/boost/fiber/detail/spinlock_ttas_adaptive.hpp
@@ -31,11 +31,8 @@ private:
unlocked
};
- // align shared variable 'state_' at cache line to prevent false sharing
- alignas(cache_alignment) std::atomic< spinlock_status > state_{ spinlock_status::unlocked };
- std::atomic< std::size_t > tests_{ 0 };
- // padding to avoid other data one the cacheline of shared variable 'state_'
- char pad[cacheline_length];
+ std::atomic< spinlock_status > state_{ spinlock_status::unlocked };
+ std::atomic< std::size_t > tests_{ 0 };
public:
spinlock_ttas_adaptive() noexcept = default;
@@ -67,8 +64,9 @@ public:
// delays the next instruction's execution for a finite period of time (depends on processor family)
// the CPU is not under demand, parts of the pipeline are no longer being used
// -> reduces the power consumed by the CPU
+ // -> prevent pipeline stalls
cpu_relax();
- } else if ( BOOST_FIBERS_SPIN_MAX_TESTS + 20 > tests) {
+ } else {
++tests;
// std::this_thread::sleep_for( 0us) has a fairly long instruction path length,
// combined with an expensive ring3 to ring 0 transition costing about 1000 cycles
@@ -76,11 +74,6 @@ public:
// if and only if a thread of equal or greater priority is ready to run
static constexpr std::chrono::microseconds us0{ 0 };
std::this_thread::sleep_for( us0);
- } else {
- // std::this_thread::yield() allows this_thread to give up the remaining part of its time slice,
- // but only to another thread on the same processor
- // instead of constant checking, a thread only checks if no other useful work is pending
- std::this_thread::yield();
}
#else
std::this_thread::yield();
@@ -93,10 +86,12 @@ public:
// utilize 'Binary Exponential Backoff' algorithm
// linear_congruential_engine is a random number engine based on Linear congruential generator (LCG)
static thread_local std::minstd_rand generator;
- const std::size_t z =
- std::uniform_int_distribution< std::size_t >{ 0, static_cast< std::size_t >( 1) << collisions }( generator);
+ static std::uniform_int_distribution< std::size_t > distribution{ 0, static_cast< std::size_t >( 1) << collisions };
+ const std::size_t z = distribution( generator);
++collisions;
for ( std::size_t i = 0; i < z; ++i) {
+ // -> reduces the power consumed by the CPU
+ // -> prevent pipeline stalls
cpu_relax();
}
} else {
diff --git a/boost/fiber/detail/spinlock_ttas_adaptive_futex.hpp b/boost/fiber/detail/spinlock_ttas_adaptive_futex.hpp
index fbd6a0e4d2..61ab47691e 100644
--- a/boost/fiber/detail/spinlock_ttas_adaptive_futex.hpp
+++ b/boost/fiber/detail/spinlock_ttas_adaptive_futex.hpp
@@ -26,11 +26,8 @@ namespace detail {
class spinlock_ttas_adaptive_futex {
private:
- // align shared variable 'value_' at cache line to prevent false sharing
- alignas(cache_alignment) std::atomic< std::int32_t > value_{ 0 };
- std::atomic< std::int32_t > tests_{ 0 };
- // padding to avoid other data one the cacheline of shared variable 'value_'
- char pad_[cacheline_length];
+ std::atomic< std::int32_t > value_{ 0 };
+ std::atomic< std::int32_t > tests_{ 0 };
public:
spinlock_ttas_adaptive_futex() noexcept = default;
@@ -61,6 +58,7 @@ public:
// delays the next instruction's execution for a finite period of time (depends on processor family)
// the CPU is not under demand, parts of the pipeline are no longer being used
// -> reduces the power consumed by the CPU
+ // -> prevent pipeline stalls
cpu_relax();
#else
// std::this_thread::yield() allows this_thread to give up the remaining part of its time slice,
@@ -73,10 +71,12 @@ public:
// utilize 'Binary Exponential Backoff' algorithm
// linear_congruential_engine is a random number engine based on Linear congruential generator (LCG)
static thread_local std::minstd_rand generator;
- const std::int32_t z = std::uniform_int_distribution< std::int32_t >{
- 0, static_cast< std::int32_t >( 1) << collisions }( generator);
+ static std::uniform_int_distribution< std::int32_t > distribution{ 0, static_cast< std::int32_t >( 1) << collisions };
+ const std::int32_t z = distribution( generator);
++collisions;
for ( std::int32_t i = 0; i < z; ++i) {
+ // -> reduces the power consumed by the CPU
+ // -> prevent pipeline stalls
cpu_relax();
}
} else {
diff --git a/boost/fiber/detail/spinlock_ttas_futex.hpp b/boost/fiber/detail/spinlock_ttas_futex.hpp
index b11e63b587..a427b73ba5 100644
--- a/boost/fiber/detail/spinlock_ttas_futex.hpp
+++ b/boost/fiber/detail/spinlock_ttas_futex.hpp
@@ -25,10 +25,7 @@ namespace detail {
class spinlock_ttas_futex {
private:
- // align shared variable 'value_' at cache line to prevent false sharing
- alignas(cache_alignment) std::atomic< std::int32_t > value_{ 0 };
- // padding to avoid other data one the cacheline of shared variable 'value_'
- char pad_[cacheline_length];
+ std::atomic< std::int32_t > value_{ 0 };
public:
spinlock_ttas_futex() noexcept = default;
@@ -57,6 +54,7 @@ public:
// delays the next instruction's execution for a finite period of time (depends on processor family)
// the CPU is not under demand, parts of the pipeline are no longer being used
// -> reduces the power consumed by the CPU
+ // -> prevent pipeline stalls
cpu_relax();
#else
// std::this_thread::yield() allows this_thread to give up the remaining part of its time slice,
@@ -69,10 +67,12 @@ public:
// utilize 'Binary Exponential Backoff' algorithm
// linear_congruential_engine is a random number engine based on Linear congruential generator (LCG)
static thread_local std::minstd_rand generator;
- const std::int32_t z = std::uniform_int_distribution< std::int32_t >{
- 0, static_cast< std::int32_t >( 1) << collisions }( generator);
+ static std::uniform_int_distribution< std::int32_t > distribution{ 0, static_cast< std::int32_t >( 1) << collisions };
+ const std::int32_t z = distribution( generator);
++collisions;
for ( std::int32_t i = 0; i < z; ++i) {
+ // -> reduces the power consumed by the CPU
+ // -> prevent pipeline stalls
cpu_relax();
}
} else {
diff --git a/boost/fiber/detail/wrap.hpp b/boost/fiber/detail/wrap.hpp
index 0369e61ee6..558de6bd94 100644
--- a/boost/fiber/detail/wrap.hpp
+++ b/boost/fiber/detail/wrap.hpp
@@ -10,8 +10,14 @@
#include <type_traits>
#include <boost/config.hpp>
+#if defined(BOOST_NO_CXX17_STD_INVOKE)
#include <boost/context/detail/invoke.hpp>
-#include <boost/context/execution_context.hpp>
+#endif
+#if (BOOST_EXECUTION_CONTEXT==1)
+# include <boost/context/execution_context.hpp>
+#else
+# include <boost/context/continuation.hpp>
+#endif
#include <boost/fiber/detail/config.hpp>
#include <boost/fiber/detail/data.hpp>
@@ -36,9 +42,9 @@ private:
public:
wrapper( Fn1 && fn1, Fn2 && fn2, Tpl && tpl,
boost::context::execution_context const& ctx) :
- fn1_( std::move( fn1) ),
- fn2_( std::move( fn2) ),
- tpl_( std::move( tpl) ),
+ fn1_{ std::move( fn1) },
+ fn2_{ std::move( fn2) },
+ tpl_{ std::move( tpl) },
ctx_{ ctx } {
}
@@ -49,9 +55,11 @@ public:
wrapper & operator=( wrapper && other) = default;
void operator()( void * vp) {
- boost::context::detail::invoke(
- std::move( fn1_),
- fn2_, tpl_, ctx_, vp);
+#if defined(BOOST_NO_CXX17_STD_INVOKE)
+ boost::context::detail::invoke( std::move( fn1_), fn2_, tpl_, ctx_, vp);
+#else
+ std::invoke( std::move( fn1_), fn2_, tpl_, ctx_, vp);
+#endif
}
};
@@ -59,11 +67,11 @@ template< typename Fn1, typename Fn2, typename Tpl >
wrapper< Fn1, Fn2, Tpl >
wrap( Fn1 && fn1, Fn2 && fn2, Tpl && tpl,
boost::context::execution_context const& ctx) {
- return wrapper< Fn1, Fn2, Tpl >(
+ return wrapper< Fn1, Fn2, Tpl >{
std::forward< Fn1 >( fn1),
std::forward< Fn2 >( fn2),
std::forward< Tpl >( tpl),
- ctx);
+ ctx };
}
#else
template< typename Fn1, typename Fn2, typename Tpl >
@@ -75,9 +83,9 @@ private:
public:
wrapper( Fn1 && fn1, Fn2 && fn2, Tpl && tpl) :
- fn1_( std::move( fn1) ),
- fn2_( std::move( fn2) ),
- tpl_( std::move( tpl) ) {
+ fn1_{ std::move( fn1) },
+ fn2_{ std::move( fn2) },
+ tpl_{ std::move( tpl) } {
}
wrapper( wrapper const&) = delete;
@@ -86,24 +94,31 @@ public:
wrapper( wrapper && other) = default;
wrapper & operator=( wrapper && other) = default;
- boost::context::execution_context< data_t * >
- operator()( boost::context::execution_context< data_t * > && ctx, data_t * dp) {
+ boost::context::continuation
+ operator()( boost::context::continuation && c) {
+#if defined(BOOST_NO_CXX17_STD_INVOKE)
return boost::context::detail::invoke(
std::move( fn1_),
fn2_,
tpl_,
- std::forward< boost::context::execution_context< data_t * > >( ctx),
- dp);
+ std::forward< boost::context::continuation >( c) );
+#else
+ return std::invoke(
+ std::move( fn1_),
+ fn2_,
+ tpl_,
+ std::forward< boost::context::continuation >( c) );
+#endif
}
};
template< typename Fn1, typename Fn2, typename Tpl >
wrapper< Fn1, Fn2, Tpl >
wrap( Fn1 && fn1, Fn2 && fn2, Tpl && tpl) {
- return wrapper< Fn1, Fn2, Tpl >(
+ return wrapper< Fn1, Fn2, Tpl >{
std::forward< Fn1 >( fn1),
std::forward< Fn2 >( fn2),
- std::forward< Tpl >( tpl) );
+ std::forward< Tpl >( tpl) };
}
#endif