diff options
Diffstat (limited to 'boost/fiber/detail')
-rw-r--r-- | boost/fiber/detail/config.hpp | 2 | ||||
-rw-r--r-- | boost/fiber/detail/context_mpsc_queue.hpp | 98 | ||||
-rw-r--r-- | boost/fiber/detail/context_spinlock_queue.hpp | 118 | ||||
-rw-r--r-- | boost/fiber/detail/context_spmc_queue.hpp | 99 | ||||
-rw-r--r-- | boost/fiber/detail/cpu_relax.hpp | 36 | ||||
-rw-r--r-- | boost/fiber/detail/data.hpp | 2 | ||||
-rw-r--r-- | boost/fiber/detail/fss.hpp | 5 | ||||
-rw-r--r-- | boost/fiber/detail/futex.hpp | 2 | ||||
-rw-r--r-- | boost/fiber/detail/spinlock_ttas.hpp | 29 | ||||
-rw-r--r-- | boost/fiber/detail/spinlock_ttas_adaptive.hpp | 21 | ||||
-rw-r--r-- | boost/fiber/detail/spinlock_ttas_adaptive_futex.hpp | 14 | ||||
-rw-r--r-- | boost/fiber/detail/spinlock_ttas_futex.hpp | 12 | ||||
-rw-r--r-- | boost/fiber/detail/wrap.hpp | 51 |
13 files changed, 295 insertions, 194 deletions
diff --git a/boost/fiber/detail/config.hpp b/boost/fiber/detail/config.hpp index f65d48910d..7c7119e1fb 100644 --- a/boost/fiber/detail/config.hpp +++ b/boost/fiber/detail/config.hpp @@ -52,7 +52,7 @@ #endif #if !defined(BOOST_FIBERS_SPIN_MAX_TESTS) -# define BOOST_FIBERS_SPIN_MAX_TESTS 100 +# define BOOST_FIBERS_SPIN_MAX_TESTS 500 #endif // modern architectures have cachelines with 64byte length diff --git a/boost/fiber/detail/context_mpsc_queue.hpp b/boost/fiber/detail/context_mpsc_queue.hpp deleted file mode 100644 index f7e664659c..0000000000 --- a/boost/fiber/detail/context_mpsc_queue.hpp +++ /dev/null @@ -1,98 +0,0 @@ - -// Copyright Dmitry Vyukov 2010-2011. -// Copyright Oliver Kowalke 2016. -// Distributed under the Boost Software License, Version 1.0. -// (See accompanying file LICENSE_1_0.txt or copy at -// http://www.boost.org/LICENSE_1_0.txt) -// -// based on Dmitry Vyukov's intrusive MPSC queue -// http://www.1024cores.net/home/lock-free-algorithms/queues/intrusive-mpsc-node-based-queue -// https://groups.google.com/forum/#!topic/lock-free/aFHvZhu1G-0 - -#ifndef BOOST_FIBERS_DETAIL_CONTEXT_MPSC_QUEUE_H -#define BOOST_FIBERS_DETAIL_CONTEXT_MPSC_QUEUE_H - -#include <atomic> -#include <memory> -#include <type_traits> - -#include <boost/assert.hpp> -#include <boost/config.hpp> - -#include <boost/fiber/context.hpp> -#include <boost/fiber/detail/config.hpp> - -#ifdef BOOST_HAS_ABI_HEADERS -# include BOOST_ABI_PREFIX -#endif - -namespace boost { -namespace fibers { -namespace detail { - -// a MPSC queue -// multiple threads push ready fibers (belonging to local scheduler) -// (thread) local scheduler pops fibers -class context_mpsc_queue { -private: - // not default constructor for context - use aligned_storage instead - alignas(cache_alignment) std::aligned_storage< sizeof( context), alignof( context) >::type storage_{}; - context * dummy_; - alignas(cache_alignment) std::atomic< context * > head_; - alignas(cache_alignment) context * tail_; - char pad_[cacheline_length]; - -public: - context_mpsc_queue() : - dummy_{ reinterpret_cast< context * >( std::addressof( storage_) ) }, - head_{ dummy_ }, - tail_{ dummy_ } { - dummy_->remote_nxt_.store( nullptr, std::memory_order_release); - } - - context_mpsc_queue( context_mpsc_queue const&) = delete; - context_mpsc_queue & operator=( context_mpsc_queue const&) = delete; - - void push( context * ctx) noexcept { - BOOST_ASSERT( nullptr != ctx); - ctx->remote_nxt_.store( nullptr, std::memory_order_release); - context * prev = head_.exchange( ctx, std::memory_order_acq_rel); - prev->remote_nxt_.store( ctx, std::memory_order_release); - } - - context * pop() noexcept { - context * tail = tail_; - context * next = tail->remote_nxt_.load( std::memory_order_acquire); - if ( dummy_ == tail) { - if ( nullptr == next) { - return nullptr; - } - tail_ = next; - tail = next; - next = next->remote_nxt_.load( std::memory_order_acquire);; - } - if ( nullptr != next) { - tail_ = next; - return tail; - } - context * head = head_.load( std::memory_order_acquire); - if ( tail != head) { - return nullptr; - } - push( dummy_); - next = tail->remote_nxt_.load( std::memory_order_acquire); - if ( nullptr != next) { - tail_= next; - return tail; - } - return nullptr; - } -}; - -}}} - -#ifdef BOOST_HAS_ABI_HEADERS -# include BOOST_ABI_SUFFIX -#endif - -#endif // BOOST_FIBERS_DETAIL_CONTEXT_MPSC_QUEUE_H diff --git a/boost/fiber/detail/context_spinlock_queue.hpp b/boost/fiber/detail/context_spinlock_queue.hpp new file mode 100644 index 0000000000..e0ebdabda6 --- /dev/null +++ b/boost/fiber/detail/context_spinlock_queue.hpp @@ -0,0 +1,118 @@ + +// Copyright Oliver Kowalke 2015. +// Distributed under the Boost Software License, Version 1.0. +// (See accompanying file LICENSE_1_0.txt or copy at +// http://www.boost.org/LICENSE_1_0.txt) +// + +#ifndef BOOST_FIBERS_DETAIL_SPINLOCK_QUEUE_H +#define BOOST_FIBERS_DETAIL_SPINLOCK_QUEUE_H + +#include <cstddef> +#include <cstring> +#include <mutex> + +#include <boost/config.hpp> + +#include <boost/fiber/context.hpp> +#include <boost/fiber/detail/config.hpp> +#include <boost/fiber/detail/spinlock.hpp> + +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_PREFIX +#endif + +namespace boost { +namespace fibers { +namespace detail { + +class context_spinlock_queue { +private: + typedef context * slot_type; + + alignas(cache_alignment) mutable spinlock splk_{}; + std::size_t pidx_{ 0 }; + std::size_t cidx_{ 0 }; + std::size_t capacity_; + slot_type * slots_; + + void resize_() { + slot_type * old_slots = slots_; + slots_ = new slot_type[2*capacity_]; + std::size_t offset = capacity_ - cidx_; + std::memcpy( slots_, old_slots + cidx_, offset * sizeof( slot_type) ); + if ( 0 < cidx_) { + std::memcpy( slots_ + offset, old_slots, pidx_ * sizeof( slot_type) ); + } + cidx_ = 0; + pidx_ = capacity_ - 1; + capacity_ *= 2; + delete [] old_slots; + } + + bool is_full_() const noexcept { + return cidx_ == ((pidx_ + 1) % capacity_); + } + + bool is_empty_() const noexcept { + return cidx_ == pidx_; + } + +public: + context_spinlock_queue( std::size_t capacity = 4096) : + capacity_{ capacity } { + slots_ = new slot_type[capacity_]; + } + + ~context_spinlock_queue() { + delete [] slots_; + } + + context_spinlock_queue( context_spinlock_queue const&) = delete; + context_spinlock_queue & operator=( context_spinlock_queue const&) = delete; + + bool empty() const noexcept { + spinlock_lock lk{ splk_ }; + return is_empty_(); + } + + void push( context * c) { + spinlock_lock lk{ splk_ }; + if ( is_full_() ) { + resize_(); + } + slots_[pidx_] = c; + pidx_ = (pidx_ + 1) % capacity_; + } + + context * pop() { + spinlock_lock lk{ splk_ }; + context * c = nullptr; + if ( ! is_empty_() ) { + c = slots_[cidx_]; + cidx_ = (cidx_ + 1) % capacity_; + } + return c; + } + + context * steal() { + spinlock_lock lk{ splk_ }; + context * c = nullptr; + if ( ! is_empty_() ) { + c = slots_[cidx_]; + if ( c->is_context( type::pinned_context) ) { + return nullptr; + } + cidx_ = (cidx_ + 1) % capacity_; + } + return c; + } +}; + +}}} + +#ifdef BOOST_HAS_ABI_HEADERS +# include BOOST_ABI_SUFFIX +#endif + +#endif // BOOST_FIBERS_DETAIL_SPINLOCK_QUEUE_H diff --git a/boost/fiber/detail/context_spmc_queue.hpp b/boost/fiber/detail/context_spmc_queue.hpp index 6449e3658f..27256233cf 100644 --- a/boost/fiber/detail/context_spmc_queue.hpp +++ b/boost/fiber/detail/context_spmc_queue.hpp @@ -30,6 +30,11 @@ // In Proceedings of the 18th ACM SIGPLAN symposium on Principles and practice // of parallel programming (PPoPP '13). ACM, New York, NY, USA, 69-80. +#if BOOST_COMP_CLANG +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-private-field" +#endif + namespace boost { namespace fibers { namespace detail { @@ -43,43 +48,43 @@ private: sizeof( atomic_type), cache_alignment >::type storage_type; - std::size_t size_; + std::size_t capacity_; storage_type * storage_; public: - array( std::size_t size) : - size_{ size }, - storage_{ new storage_type[size_] } { - for ( std::size_t i = 0; i < size_; ++i) { + array( std::size_t capacity) : + capacity_{ capacity }, + storage_{ new storage_type[capacity_] } { + for ( std::size_t i = 0; i < capacity_; ++i) { ::new ( static_cast< void * >( std::addressof( storage_[i]) ) ) atomic_type{ nullptr }; } } ~array() { - for ( std::size_t i = 0; i < size_; ++i) { + for ( std::size_t i = 0; i < capacity_; ++i) { reinterpret_cast< atomic_type * >( std::addressof( storage_[i]) )->~atomic_type(); } delete [] storage_; } - std::size_t size() const noexcept { - return size_; + std::size_t capacity() const noexcept { + return capacity_; } void push( std::size_t bottom, context * ctx) noexcept { reinterpret_cast< atomic_type * >( - std::addressof( storage_[bottom % size_]) ) + std::addressof( storage_[bottom % capacity_]) ) ->store( ctx, std::memory_order_relaxed); } context * pop( std::size_t top) noexcept { return reinterpret_cast< atomic_type * >( - std::addressof( storage_[top % size_]) ) + std::addressof( storage_[top % capacity_]) ) ->load( std::memory_order_relaxed); } array * resize( std::size_t bottom, std::size_t top) { - std::unique_ptr< array > tmp{ new array{ 2 * size_ } }; + std::unique_ptr< array > tmp{ new array{ 2 * capacity_ } }; for ( std::size_t i = top; i != bottom; ++i) { tmp->push( i, pop( i) ); } @@ -87,15 +92,15 @@ private: } }; - alignas(cache_alignment) std::atomic< std::size_t > top_{ 0 }; - alignas(cache_alignment) std::atomic< std::size_t > bottom_{ 0 }; + alignas(cache_alignment) std::atomic< std::size_t > top_{ 0 }; + alignas(cache_alignment) std::atomic< std::size_t > bottom_{ 0 }; alignas(cache_alignment) std::atomic< array * > array_; - std::vector< array * > old_arrays_{}; + std::vector< array * > old_arrays_{}; char padding_[cacheline_length]; public: - context_spmc_queue() : - array_{ new array{ 1024 } } { + context_spmc_queue( std::size_t capacity = 4096) : + array_{ new array{ capacity } } { old_arrays_.reserve( 32); } @@ -110,19 +115,19 @@ public: context_spmc_queue & operator=( context_spmc_queue const&) = delete; bool empty() const noexcept { - std::size_t bottom{ bottom_.load( std::memory_order_relaxed) }; - std::size_t top{ top_.load( std::memory_order_relaxed) }; + std::size_t bottom = bottom_.load( std::memory_order_relaxed); + std::size_t top = top_.load( std::memory_order_relaxed); return bottom <= top; } void push( context * ctx) { - std::size_t bottom{ bottom_.load( std::memory_order_relaxed) }; - std::size_t top{ top_.load( std::memory_order_acquire) }; - array * a{ array_.load( std::memory_order_relaxed) }; - if ( (a->size() - 1) < (bottom - top) ) { + std::size_t bottom = bottom_.load( std::memory_order_relaxed); + std::size_t top = top_.load( std::memory_order_acquire); + array * a = array_.load( std::memory_order_relaxed); + if ( (a->capacity() - 1) < (bottom - top) ) { // queue is full // resize - array * tmp{ a->resize( bottom, top) }; + array * tmp = a->resize( bottom, top); old_arrays_.push_back( a); std::swap( a, tmp); array_.store( a, std::memory_order_relaxed); @@ -133,16 +138,48 @@ public: } context * pop() { - std::size_t top{ top_.load( std::memory_order_acquire) }; + std::size_t bottom = bottom_.load( std::memory_order_relaxed) - 1; + array * a = array_.load( std::memory_order_relaxed); + bottom_.store( bottom, std::memory_order_relaxed); std::atomic_thread_fence( std::memory_order_seq_cst); - std::size_t bottom{ bottom_.load( std::memory_order_acquire) }; - context * ctx{ nullptr }; + std::size_t top = top_.load( std::memory_order_relaxed); + context * ctx = nullptr; + if ( top <= bottom) { + // queue is not empty + ctx = a->pop( bottom); + BOOST_ASSERT( nullptr != ctx); + if ( top == bottom) { + // last element dequeued + if ( ! top_.compare_exchange_strong( top, top + 1, + std::memory_order_seq_cst, + std::memory_order_relaxed) ) { + // lose the race + ctx = nullptr; + } + bottom_.store( bottom + 1, std::memory_order_relaxed); + } + } else { + // queue is empty + bottom_.store( bottom + 1, std::memory_order_relaxed); + } + return ctx; + } + + context * steal() { + std::size_t top = top_.load( std::memory_order_acquire); + std::atomic_thread_fence( std::memory_order_seq_cst); + std::size_t bottom = bottom_.load( std::memory_order_acquire); + context * ctx = nullptr; if ( top < bottom) { // queue is not empty - array * a{ array_.load( std::memory_order_consume) }; + array * a = array_.load( std::memory_order_consume); ctx = a->pop( top); - if ( ctx->is_context( type::pinned_context) || - ! top_.compare_exchange_strong( top, top + 1, + BOOST_ASSERT( nullptr != ctx); + // do not steal pinned context (e.g. main-/dispatcher-context) + if ( ctx->is_context( type::pinned_context) ) { + return nullptr; + } + if ( ! top_.compare_exchange_strong( top, top + 1, std::memory_order_seq_cst, std::memory_order_relaxed) ) { // lose the race @@ -155,4 +192,8 @@ public: }}} +#if BOOST_COMP_CLANG +#pragma clang diagnostic pop +#endif + #endif // BOOST_FIBERS_DETAIL_CONTEXT_SPMC_QUEUE_H diff --git a/boost/fiber/detail/cpu_relax.hpp b/boost/fiber/detail/cpu_relax.hpp index d00020a23b..541b46dfd0 100644 --- a/boost/fiber/detail/cpu_relax.hpp +++ b/boost/fiber/detail/cpu_relax.hpp @@ -7,6 +7,7 @@ #ifndef BOOST_FIBERS_DETAIL_CPU_RELAX_H #define BOOST_FIBERS_DETAIL_CPU_RELAX_H +#include <chrono> #include <thread> #include <boost/config.hpp> @@ -14,7 +15,7 @@ #include <boost/fiber/detail/config.hpp> -#if BOOST_COMP_MSVC +#if BOOST_COMP_MSVC || BOOST_COMP_MSVC_EMULATED # include <Windows.h> #endif @@ -29,22 +30,47 @@ namespace detail { #if BOOST_ARCH_ARM # if BOOST_COMP_MSVC # define cpu_relax() YieldProcessor(); -# else +# elif (defined(__ARM_ARCH_6K__) || \ + defined(__ARM_ARCH_6Z__) || \ + defined(__ARM_ARCH_6ZK__) || \ + defined(__ARM_ARCH_6T2__) || \ + defined(__ARM_ARCH_7__) || \ + defined(__ARM_ARCH_7A__) || \ + defined(__ARM_ARCH_7R__) || \ + defined(__ARM_ARCH_7M__) || \ + defined(__ARM_ARCH_7S__) || \ + defined(__ARM_ARCH_8A__) || \ + defined(__aarch64__)) +// http://groups.google.com/a/chromium.org/forum/#!msg/chromium-dev/YGVrZbxYOlU/Vpgy__zeBQAJ +// mnemonic 'yield' is supported from ARMv6k onwards # define cpu_relax() asm volatile ("yield" ::: "memory"); +# else +# define cpu_relax() asm volatile ("nop" ::: "memory"); # endif #elif BOOST_ARCH_MIPS # define cpu_relax() asm volatile ("pause" ::: "memory"); #elif BOOST_ARCH_PPC +// http://code.metager.de/source/xref/gnu/glibc/sysdeps/powerpc/sys/platform/ppc.h +// http://stackoverflow.com/questions/5425506/equivalent-of-x86-pause-instruction-for-ppc +// mnemonic 'or' shared resource hints +// or 27, 27, 27 This form of 'or' provides a hint that performance +// will probably be imrpoved if shared resources dedicated +// to the executing processor are released for use by other +// processors +// extended mnemonics (available with POWER7) +// yield == or 27, 27, 27 # define cpu_relax() asm volatile ("or 27,27,27" ::: "memory"); #elif BOOST_ARCH_X86 -# if BOOST_COMP_MSVC +# if BOOST_COMP_MSVC || BOOST_COMP_MSVC_EMULATED # define cpu_relax() YieldProcessor(); # else # define cpu_relax() asm volatile ("pause" ::: "memory"); # endif #else -# warning "architecture does not support yield/pause mnemonic" -# define cpu_relax() std::this_thread::yield(); +# define cpu_relax() { \ + static constexpr std::chrono::microseconds us0{ 0 }; \ + std::this_thread::sleep_for( us0); \ + } #endif }}} diff --git a/boost/fiber/detail/data.hpp b/boost/fiber/detail/data.hpp index 24e833a9e8..e2b119ec3e 100644 --- a/boost/fiber/detail/data.hpp +++ b/boost/fiber/detail/data.hpp @@ -28,7 +28,7 @@ struct data_t { spinlock_lock * lk{ nullptr }; context * ctx{ nullptr }; - data_t() noexcept = default; + data_t() = default; explicit data_t( spinlock_lock * lk_) noexcept : lk{ lk_ } { diff --git a/boost/fiber/detail/fss.hpp b/boost/fiber/detail/fss.hpp index 54dc5b79d3..27a7d67f26 100644 --- a/boost/fiber/detail/fss.hpp +++ b/boost/fiber/detail/fss.hpp @@ -38,12 +38,13 @@ public: friend inline void intrusive_ptr_add_ref( fss_cleanup_function * p) noexcept { - ++p->use_count_; + p->use_count_.fetch_add( 1, std::memory_order_relaxed); } friend inline void intrusive_ptr_release( fss_cleanup_function * p) noexcept { - if ( --p->use_count_ == 0) { + if ( 1 == p->use_count_.fetch_sub( 1, std::memory_order_release) ) { + std::atomic_thread_fence( std::memory_order_acquire); delete p; } } diff --git a/boost/fiber/detail/futex.hpp b/boost/fiber/detail/futex.hpp index 4c966867c5..d383dc4077 100644 --- a/boost/fiber/detail/futex.hpp +++ b/boost/fiber/detail/futex.hpp @@ -49,7 +49,7 @@ int futex_wake( std::atomic< std::int32_t > * addr) { inline int futex_wait( std::atomic< std::int32_t > * addr, std::int32_t x) { - ::WaitOnAddress( static_cast< volatile void * >( addr), & x, sizeof( x), -1); + ::WaitOnAddress( static_cast< volatile void * >( addr), & x, sizeof( x), INFINITE); return 0; } #else diff --git a/boost/fiber/detail/spinlock_ttas.hpp b/boost/fiber/detail/spinlock_ttas.hpp index d64630d84d..380773ad6d 100644 --- a/boost/fiber/detail/spinlock_ttas.hpp +++ b/boost/fiber/detail/spinlock_ttas.hpp @@ -19,6 +19,11 @@ // https://software.intel.com/en-us/articles/benefitting-power-and-performance-sleep-loops // https://software.intel.com/en-us/articles/long-duration-spin-wait-loops-on-hyper-threading-technology-enabled-intel-processors +#if BOOST_COMP_CLANG +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-private-field" +#endif + namespace boost { namespace fibers { namespace detail { @@ -30,10 +35,7 @@ private: unlocked }; - // align shared variable 'state_' at cache line to prevent false sharing - alignas(cache_alignment) std::atomic< spinlock_status > state_{ spinlock_status::unlocked }; - // padding to avoid other data one the cacheline of shared variable 'state_' - char pad[cacheline_length]; + std::atomic< spinlock_status > state_{ spinlock_status::unlocked }; public: spinlock_ttas() noexcept = default; @@ -63,20 +65,15 @@ public: // delays the next instruction's execution for a finite period of time (depends on processor family) // the CPU is not under demand, parts of the pipeline are no longer being used // -> reduces the power consumed by the CPU + // -> prevent pipeline stalls cpu_relax(); - } else if ( BOOST_FIBERS_SPIN_MAX_TESTS + 20 > tests) { - ++tests; + } else { // std::this_thread::sleep_for( 0us) has a fairly long instruction path length, // combined with an expensive ring3 to ring 0 transition costing about 1000 cycles // std::this_thread::sleep_for( 0us) lets give up this_thread the remaining part of its time slice // if and only if a thread of equal or greater priority is ready to run static constexpr std::chrono::microseconds us0{ 0 }; std::this_thread::sleep_for( us0); - } else { - // std::this_thread::yield() allows this_thread to give up the remaining part of its time slice, - // but only to another thread on the same processor - // instead of constant checking, a thread only checks if no other useful work is pending - std::this_thread::yield(); } #else std::this_thread::yield(); @@ -89,10 +86,12 @@ public: // utilize 'Binary Exponential Backoff' algorithm // linear_congruential_engine is a random number engine based on Linear congruential generator (LCG) static thread_local std::minstd_rand generator; - const std::size_t z = - std::uniform_int_distribution< std::size_t >{ 0, static_cast< std::size_t >( 1) << collisions }( generator); + static std::uniform_int_distribution< std::size_t > distribution{ 0, static_cast< std::size_t >( 1) << collisions }; + const std::size_t z = distribution( generator); ++collisions; for ( std::size_t i = 0; i < z; ++i) { + // -> reduces the power consumed by the CPU + // -> prevent pipeline stalls cpu_relax(); } } else { @@ -109,4 +108,8 @@ public: }}} +#if BOOST_COMP_CLANG +#pragma clang diagnostic pop +#endif + #endif // BOOST_FIBERS_SPINLOCK_TTAS_H diff --git a/boost/fiber/detail/spinlock_ttas_adaptive.hpp b/boost/fiber/detail/spinlock_ttas_adaptive.hpp index c6a9a57d79..da044b6298 100644 --- a/boost/fiber/detail/spinlock_ttas_adaptive.hpp +++ b/boost/fiber/detail/spinlock_ttas_adaptive.hpp @@ -31,11 +31,8 @@ private: unlocked }; - // align shared variable 'state_' at cache line to prevent false sharing - alignas(cache_alignment) std::atomic< spinlock_status > state_{ spinlock_status::unlocked }; - std::atomic< std::size_t > tests_{ 0 }; - // padding to avoid other data one the cacheline of shared variable 'state_' - char pad[cacheline_length]; + std::atomic< spinlock_status > state_{ spinlock_status::unlocked }; + std::atomic< std::size_t > tests_{ 0 }; public: spinlock_ttas_adaptive() noexcept = default; @@ -67,8 +64,9 @@ public: // delays the next instruction's execution for a finite period of time (depends on processor family) // the CPU is not under demand, parts of the pipeline are no longer being used // -> reduces the power consumed by the CPU + // -> prevent pipeline stalls cpu_relax(); - } else if ( BOOST_FIBERS_SPIN_MAX_TESTS + 20 > tests) { + } else { ++tests; // std::this_thread::sleep_for( 0us) has a fairly long instruction path length, // combined with an expensive ring3 to ring 0 transition costing about 1000 cycles @@ -76,11 +74,6 @@ public: // if and only if a thread of equal or greater priority is ready to run static constexpr std::chrono::microseconds us0{ 0 }; std::this_thread::sleep_for( us0); - } else { - // std::this_thread::yield() allows this_thread to give up the remaining part of its time slice, - // but only to another thread on the same processor - // instead of constant checking, a thread only checks if no other useful work is pending - std::this_thread::yield(); } #else std::this_thread::yield(); @@ -93,10 +86,12 @@ public: // utilize 'Binary Exponential Backoff' algorithm // linear_congruential_engine is a random number engine based on Linear congruential generator (LCG) static thread_local std::minstd_rand generator; - const std::size_t z = - std::uniform_int_distribution< std::size_t >{ 0, static_cast< std::size_t >( 1) << collisions }( generator); + static std::uniform_int_distribution< std::size_t > distribution{ 0, static_cast< std::size_t >( 1) << collisions }; + const std::size_t z = distribution( generator); ++collisions; for ( std::size_t i = 0; i < z; ++i) { + // -> reduces the power consumed by the CPU + // -> prevent pipeline stalls cpu_relax(); } } else { diff --git a/boost/fiber/detail/spinlock_ttas_adaptive_futex.hpp b/boost/fiber/detail/spinlock_ttas_adaptive_futex.hpp index fbd6a0e4d2..61ab47691e 100644 --- a/boost/fiber/detail/spinlock_ttas_adaptive_futex.hpp +++ b/boost/fiber/detail/spinlock_ttas_adaptive_futex.hpp @@ -26,11 +26,8 @@ namespace detail { class spinlock_ttas_adaptive_futex { private: - // align shared variable 'value_' at cache line to prevent false sharing - alignas(cache_alignment) std::atomic< std::int32_t > value_{ 0 }; - std::atomic< std::int32_t > tests_{ 0 }; - // padding to avoid other data one the cacheline of shared variable 'value_' - char pad_[cacheline_length]; + std::atomic< std::int32_t > value_{ 0 }; + std::atomic< std::int32_t > tests_{ 0 }; public: spinlock_ttas_adaptive_futex() noexcept = default; @@ -61,6 +58,7 @@ public: // delays the next instruction's execution for a finite period of time (depends on processor family) // the CPU is not under demand, parts of the pipeline are no longer being used // -> reduces the power consumed by the CPU + // -> prevent pipeline stalls cpu_relax(); #else // std::this_thread::yield() allows this_thread to give up the remaining part of its time slice, @@ -73,10 +71,12 @@ public: // utilize 'Binary Exponential Backoff' algorithm // linear_congruential_engine is a random number engine based on Linear congruential generator (LCG) static thread_local std::minstd_rand generator; - const std::int32_t z = std::uniform_int_distribution< std::int32_t >{ - 0, static_cast< std::int32_t >( 1) << collisions }( generator); + static std::uniform_int_distribution< std::int32_t > distribution{ 0, static_cast< std::int32_t >( 1) << collisions }; + const std::int32_t z = distribution( generator); ++collisions; for ( std::int32_t i = 0; i < z; ++i) { + // -> reduces the power consumed by the CPU + // -> prevent pipeline stalls cpu_relax(); } } else { diff --git a/boost/fiber/detail/spinlock_ttas_futex.hpp b/boost/fiber/detail/spinlock_ttas_futex.hpp index b11e63b587..a427b73ba5 100644 --- a/boost/fiber/detail/spinlock_ttas_futex.hpp +++ b/boost/fiber/detail/spinlock_ttas_futex.hpp @@ -25,10 +25,7 @@ namespace detail { class spinlock_ttas_futex { private: - // align shared variable 'value_' at cache line to prevent false sharing - alignas(cache_alignment) std::atomic< std::int32_t > value_{ 0 }; - // padding to avoid other data one the cacheline of shared variable 'value_' - char pad_[cacheline_length]; + std::atomic< std::int32_t > value_{ 0 }; public: spinlock_ttas_futex() noexcept = default; @@ -57,6 +54,7 @@ public: // delays the next instruction's execution for a finite period of time (depends on processor family) // the CPU is not under demand, parts of the pipeline are no longer being used // -> reduces the power consumed by the CPU + // -> prevent pipeline stalls cpu_relax(); #else // std::this_thread::yield() allows this_thread to give up the remaining part of its time slice, @@ -69,10 +67,12 @@ public: // utilize 'Binary Exponential Backoff' algorithm // linear_congruential_engine is a random number engine based on Linear congruential generator (LCG) static thread_local std::minstd_rand generator; - const std::int32_t z = std::uniform_int_distribution< std::int32_t >{ - 0, static_cast< std::int32_t >( 1) << collisions }( generator); + static std::uniform_int_distribution< std::int32_t > distribution{ 0, static_cast< std::int32_t >( 1) << collisions }; + const std::int32_t z = distribution( generator); ++collisions; for ( std::int32_t i = 0; i < z; ++i) { + // -> reduces the power consumed by the CPU + // -> prevent pipeline stalls cpu_relax(); } } else { diff --git a/boost/fiber/detail/wrap.hpp b/boost/fiber/detail/wrap.hpp index 0369e61ee6..558de6bd94 100644 --- a/boost/fiber/detail/wrap.hpp +++ b/boost/fiber/detail/wrap.hpp @@ -10,8 +10,14 @@ #include <type_traits> #include <boost/config.hpp> +#if defined(BOOST_NO_CXX17_STD_INVOKE) #include <boost/context/detail/invoke.hpp> -#include <boost/context/execution_context.hpp> +#endif +#if (BOOST_EXECUTION_CONTEXT==1) +# include <boost/context/execution_context.hpp> +#else +# include <boost/context/continuation.hpp> +#endif #include <boost/fiber/detail/config.hpp> #include <boost/fiber/detail/data.hpp> @@ -36,9 +42,9 @@ private: public: wrapper( Fn1 && fn1, Fn2 && fn2, Tpl && tpl, boost::context::execution_context const& ctx) : - fn1_( std::move( fn1) ), - fn2_( std::move( fn2) ), - tpl_( std::move( tpl) ), + fn1_{ std::move( fn1) }, + fn2_{ std::move( fn2) }, + tpl_{ std::move( tpl) }, ctx_{ ctx } { } @@ -49,9 +55,11 @@ public: wrapper & operator=( wrapper && other) = default; void operator()( void * vp) { - boost::context::detail::invoke( - std::move( fn1_), - fn2_, tpl_, ctx_, vp); +#if defined(BOOST_NO_CXX17_STD_INVOKE) + boost::context::detail::invoke( std::move( fn1_), fn2_, tpl_, ctx_, vp); +#else + std::invoke( std::move( fn1_), fn2_, tpl_, ctx_, vp); +#endif } }; @@ -59,11 +67,11 @@ template< typename Fn1, typename Fn2, typename Tpl > wrapper< Fn1, Fn2, Tpl > wrap( Fn1 && fn1, Fn2 && fn2, Tpl && tpl, boost::context::execution_context const& ctx) { - return wrapper< Fn1, Fn2, Tpl >( + return wrapper< Fn1, Fn2, Tpl >{ std::forward< Fn1 >( fn1), std::forward< Fn2 >( fn2), std::forward< Tpl >( tpl), - ctx); + ctx }; } #else template< typename Fn1, typename Fn2, typename Tpl > @@ -75,9 +83,9 @@ private: public: wrapper( Fn1 && fn1, Fn2 && fn2, Tpl && tpl) : - fn1_( std::move( fn1) ), - fn2_( std::move( fn2) ), - tpl_( std::move( tpl) ) { + fn1_{ std::move( fn1) }, + fn2_{ std::move( fn2) }, + tpl_{ std::move( tpl) } { } wrapper( wrapper const&) = delete; @@ -86,24 +94,31 @@ public: wrapper( wrapper && other) = default; wrapper & operator=( wrapper && other) = default; - boost::context::execution_context< data_t * > - operator()( boost::context::execution_context< data_t * > && ctx, data_t * dp) { + boost::context::continuation + operator()( boost::context::continuation && c) { +#if defined(BOOST_NO_CXX17_STD_INVOKE) return boost::context::detail::invoke( std::move( fn1_), fn2_, tpl_, - std::forward< boost::context::execution_context< data_t * > >( ctx), - dp); + std::forward< boost::context::continuation >( c) ); +#else + return std::invoke( + std::move( fn1_), + fn2_, + tpl_, + std::forward< boost::context::continuation >( c) ); +#endif } }; template< typename Fn1, typename Fn2, typename Tpl > wrapper< Fn1, Fn2, Tpl > wrap( Fn1 && fn1, Fn2 && fn2, Tpl && tpl) { - return wrapper< Fn1, Fn2, Tpl >( + return wrapper< Fn1, Fn2, Tpl >{ std::forward< Fn1 >( fn1), std::forward< Fn2 >( fn2), - std::forward< Tpl >( tpl) ); + std::forward< Tpl >( tpl) }; } #endif |