summaryrefslogtreecommitdiff
path: root/boost/atomic/detail/ops_gcc_x86_dcas.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'boost/atomic/detail/ops_gcc_x86_dcas.hpp')
-rw-r--r--boost/atomic/detail/ops_gcc_x86_dcas.hpp566
1 files changed, 228 insertions, 338 deletions
diff --git a/boost/atomic/detail/ops_gcc_x86_dcas.hpp b/boost/atomic/detail/ops_gcc_x86_dcas.hpp
index 28cbc225e3..4dacc66fe2 100644
--- a/boost/atomic/detail/ops_gcc_x86_dcas.hpp
+++ b/boost/atomic/detail/ops_gcc_x86_dcas.hpp
@@ -5,7 +5,7 @@
*
* Copyright (c) 2009 Helge Bahmann
* Copyright (c) 2012 Tim Blechmann
- * Copyright (c) 2014 Andrey Semashev
+ * Copyright (c) 2014 - 2018 Andrey Semashev
*/
/*!
* \file atomic/detail/ops_gcc_x86_dcas.hpp
@@ -20,6 +20,7 @@
#include <boost/memory_order.hpp>
#include <boost/atomic/detail/config.hpp>
#include <boost/atomic/detail/storage_type.hpp>
+#include <boost/atomic/detail/string_ops.hpp>
#include <boost/atomic/capabilities.hpp>
#ifdef BOOST_HAS_PRAGMA_ONCE
@@ -30,119 +31,91 @@ namespace boost {
namespace atomics {
namespace detail {
+// Note: In the 32-bit PIC code guarded with BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX below we have to avoid using memory
+// operand constraints because the compiler may choose to use ebx as the base register for that operand. At least, clang
+// is known to do that. For this reason we have to pre-compute a pointer to storage and pass it in edi. For the same reason
+// we cannot save ebx to the stack with a mov instruction, so we use esi as a scratch register and restore it afterwards.
+// Alternatively, we could push/pop the register to the stack, but exchanging the registers is faster.
+// The need to pass a pointer in edi is a bit wasteful because normally the memory operand would use a base pointer
+// with an offset (e.g. `this` + offset). But unfortunately, there seems to be no way around it.
+
#if defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG8B)
template< bool Signed >
struct gcc_dcas_x86
{
- typedef typename make_storage_type< 8u, Signed >::type storage_type;
- typedef typename make_storage_type< 8u, Signed >::aligned aligned_storage_type;
+ typedef typename make_storage_type< 8u >::type storage_type;
+ typedef typename make_storage_type< 8u >::aligned aligned_storage_type;
+ typedef uint32_t BOOST_ATOMIC_DETAIL_MAY_ALIAS aliasing_uint32_t;
+ static BOOST_CONSTEXPR_OR_CONST bool full_cas_based = true;
static BOOST_CONSTEXPR_OR_CONST bool is_always_lock_free = true;
static BOOST_FORCEINLINE void store(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
{
- if ((((uint32_t)&storage) & 0x00000007) == 0)
+ if (BOOST_LIKELY((((uint32_t)&storage) & 0x00000007) == 0u))
{
-#if defined(__SSE2__)
+#if defined(__SSE__)
+ typedef float xmm_t __attribute__((__vector_size__(16)));
+ xmm_t xmm_scratch;
__asm__ __volatile__
(
#if defined(__AVX__)
- "vmovq %1, %%xmm4\n\t"
- "vmovq %%xmm4, %0\n\t"
+ "vmovq %[value], %[xmm_scratch]\n\t"
+ "vmovq %[xmm_scratch], %[storage]\n\t"
+#elif defined(__SSE2__)
+ "movq %[value], %[xmm_scratch]\n\t"
+ "movq %[xmm_scratch], %[storage]\n\t"
#else
- "movq %1, %%xmm4\n\t"
- "movq %%xmm4, %0\n\t"
+ "xorps %[xmm_scratch], %[xmm_scratch]\n\t"
+ "movlps %[value], %[xmm_scratch]\n\t"
+ "movlps %[xmm_scratch], %[storage]\n\t"
#endif
- : "=m" (storage)
- : "m" (v)
- : "memory", "xmm4"
+ : [storage] "=m" (storage), [xmm_scratch] "=x" (xmm_scratch)
+ : [value] "m" (v)
+ : "memory"
);
#else
__asm__ __volatile__
(
- "fildll %1\n\t"
- "fistpll %0\n\t"
- : "=m" (storage)
- : "m" (v)
+ "fildll %[value]\n\t"
+ "fistpll %[storage]\n\t"
+ : [storage] "=m" (storage)
+ : [value] "m" (v)
: "memory"
);
#endif
}
else
{
-#if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS)
-#if defined(__PIC__)
- uint32_t v_lo = (uint32_t)v;
- uint32_t scratch;
- __asm__ __volatile__
- (
- "movl %%ebx, %[scratch]\n\t"
- "movl %[value_lo], %%ebx\n\t"
- "movl %[dest], %%eax\n\t"
- "movl 4+%[dest], %%edx\n\t"
- ".align 16\n\t"
- "1: lock; cmpxchg8b %[dest]\n\t"
- "jne 1b\n\t"
- "movl %[scratch], %%ebx\n\t"
- : [scratch] "=m" (scratch), [dest] "=o" (storage), [value_lo] "+a" (v_lo)
- : "c" ((uint32_t)(v >> 32))
- : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "edx", "memory"
- );
-#else // defined(__PIC__)
- __asm__ __volatile__
- (
- "movl %[dest], %%eax\n\t"
- "movl 4+%[dest], %%edx\n\t"
- ".align 16\n\t"
- "1: lock; cmpxchg8b %[dest]\n\t"
- "jne 1b\n\t"
- : [dest] "=o" (storage)
- : [value_lo] "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32))
- : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "eax", "edx", "memory"
- );
-#endif // defined(__PIC__)
-#else // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS)
-#if defined(__PIC__)
- uint32_t v_lo = (uint32_t)v;
- uint32_t scratch;
+#if defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
__asm__ __volatile__
(
- "movl %%ebx, %[scratch]\n\t"
- "movl %[value_lo], %%ebx\n\t"
- "movl 0(%[dest]), %%eax\n\t"
+ "xchgl %%ebx, %%esi\n\t"
+ "movl %%eax, %%ebx\n\t"
+ "movl (%[dest]), %%eax\n\t"
"movl 4(%[dest]), %%edx\n\t"
".align 16\n\t"
- "1: lock; cmpxchg8b 0(%[dest])\n\t"
+ "1: lock; cmpxchg8b (%[dest])\n\t"
"jne 1b\n\t"
- "movl %[scratch], %%ebx\n\t"
-#if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES)
- : [scratch] "=m,m" (scratch), [value_lo] "+a,a" (v_lo)
- : "c,c" ((uint32_t)(v >> 32)), [dest] "D,S" (&storage)
-#else
- : [scratch] "=m" (scratch), [value_lo] "+a" (v_lo)
- : "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage)
-#endif
+ "xchgl %%ebx, %%esi\n\t"
+ :
+ : "a" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage)
: BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "edx", "memory"
);
-#else // defined(__PIC__)
+#else // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
__asm__ __volatile__
(
- "movl 0(%[dest]), %%eax\n\t"
- "movl 4(%[dest]), %%edx\n\t"
+ "movl %[dest_lo], %%eax\n\t"
+ "movl %[dest_hi], %%edx\n\t"
".align 16\n\t"
- "1: lock; cmpxchg8b 0(%[dest])\n\t"
+ "1: lock; cmpxchg8b %[dest_lo]\n\t"
"jne 1b\n\t"
- :
-#if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES)
- : [value_lo] "b,b" ((uint32_t)v), "c,c" ((uint32_t)(v >> 32)), [dest] "D,S" (&storage)
-#else
- : [value_lo] "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage)
-#endif
+ : [dest_lo] "=m" (storage), [dest_hi] "=m" (reinterpret_cast< volatile aliasing_uint32_t* >(&storage)[1])
+ : [value_lo] "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32))
: BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "eax", "edx", "memory"
);
-#endif // defined(__PIC__)
-#endif // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS)
+#endif // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
}
}
@@ -150,29 +123,35 @@ struct gcc_dcas_x86
{
storage_type value;
- if ((((uint32_t)&storage) & 0x00000007) == 0)
+ if (BOOST_LIKELY((((uint32_t)&storage) & 0x00000007) == 0u))
{
-#if defined(__SSE2__)
+#if defined(__SSE__)
+ typedef float xmm_t __attribute__((__vector_size__(16)));
+ xmm_t xmm_scratch;
__asm__ __volatile__
(
#if defined(__AVX__)
- "vmovq %1, %%xmm4\n\t"
- "vmovq %%xmm4, %0\n\t"
+ "vmovq %[storage], %[xmm_scratch]\n\t"
+ "vmovq %[xmm_scratch], %[value]\n\t"
+#elif defined(__SSE2__)
+ "movq %[storage], %[xmm_scratch]\n\t"
+ "movq %[xmm_scratch], %[value]\n\t"
#else
- "movq %1, %%xmm4\n\t"
- "movq %%xmm4, %0\n\t"
+ "xorps %[xmm_scratch], %[xmm_scratch]\n\t"
+ "movlps %[storage], %[xmm_scratch]\n\t"
+ "movlps %[xmm_scratch], %[value]\n\t"
#endif
- : "=m" (value)
- : "m" (storage)
- : "memory", "xmm4"
+ : [value] "=m" (value), [xmm_scratch] "=x" (xmm_scratch)
+ : [storage] "m" (storage)
+ : "memory"
);
#else
__asm__ __volatile__
(
- "fildll %1\n\t"
- "fistpll %0\n\t"
- : "=m" (value)
- : "m" (storage)
+ "fildll %[storage]\n\t"
+ "fistpll %[value]\n\t"
+ : [value] "=m" (value)
+ : [storage] "m" (storage)
: "memory"
);
#endif
@@ -182,7 +161,21 @@ struct gcc_dcas_x86
#if defined(__clang__)
// Clang cannot allocate eax:edx register pairs but it has sync intrinsics
value = __sync_val_compare_and_swap(&storage, (storage_type)0, (storage_type)0);
-#else
+#elif defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
+ uint32_t value_bits[2];
+ // We don't care for comparison result here; the previous value will be stored into value anyway.
+ // Also we don't care for ebx and ecx values, they just have to be equal to eax and edx before cmpxchg8b.
+ __asm__ __volatile__
+ (
+ "movl %%ebx, %%eax\n\t"
+ "movl %%ecx, %%edx\n\t"
+ "lock; cmpxchg8b %[storage]\n\t"
+ : "=&a" (value_bits[0]), "=&d" (value_bits[1])
+ : [storage] "m" (storage)
+ : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
+ );
+ BOOST_ATOMIC_DETAIL_MEMCPY(&value, value_bits, sizeof(value));
+#else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
// We don't care for comparison result here; the previous value will be stored into value anyway.
// Also we don't care for ebx and ecx values, they just have to be equal to eax and edx before cmpxchg8b.
__asm__ __volatile__
@@ -194,7 +187,7 @@ struct gcc_dcas_x86
: [storage] "m" (storage)
: BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
);
-#endif
+#endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
}
return value;
@@ -210,56 +203,39 @@ struct gcc_dcas_x86
expected = __sync_val_compare_and_swap(&storage, old_expected, desired);
return expected == old_expected;
-#elif defined(__PIC__)
-
- // Make sure ebx is saved and restored properly in case
- // of position independent code. To make this work
- // setup register constraints such that ebx can not be
- // used by accident e.g. as base address for the variable
- // to be modified. Accessing "scratch" should always be okay,
- // as it can only be placed on the stack (and therefore
- // accessed through ebp or esp only).
- //
- // In theory, could push/pop ebx onto/off the stack, but movs
- // to a prepared stack slot turn out to be faster.
+#elif defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
- uint32_t scratch;
bool success;
+
#if defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS)
__asm__ __volatile__
(
- "movl %%ebx, %[scratch]\n\t"
- "movl %[desired_lo], %%ebx\n\t"
+ "xchgl %%ebx, %%esi\n\t"
"lock; cmpxchg8b (%[dest])\n\t"
- "movl %[scratch], %%ebx\n\t"
- : "+A" (expected), [scratch] "=m" (scratch), [success] "=@ccz" (success)
- : [desired_lo] "Sm" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)), [dest] "D" (&storage)
+ "xchgl %%ebx, %%esi\n\t"
+ : "+A" (expected), [success] "=@ccz" (success)
+ : "S" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)), [dest] "D" (&storage)
: BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
);
#else // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS)
__asm__ __volatile__
(
- "movl %%ebx, %[scratch]\n\t"
- "movl %[desired_lo], %%ebx\n\t"
+ "xchgl %%ebx, %%esi\n\t"
"lock; cmpxchg8b (%[dest])\n\t"
- "movl %[scratch], %%ebx\n\t"
+ "xchgl %%ebx, %%esi\n\t"
"sete %[success]\n\t"
-#if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES)
- : "+A,A,A,A,A,A" (expected), [scratch] "=m,m,m,m,m,m" (scratch), [success] "=q,m,q,m,q,m" (success)
- : [desired_lo] "S,S,D,D,m,m" ((uint32_t)desired), "c,c,c,c,c,c" ((uint32_t)(desired >> 32)), [dest] "D,D,S,S,D,D" (&storage)
-#else
- : "+A" (expected), [scratch] "=m" (scratch), [success] "=q" (success)
- : [desired_lo] "S" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)), [dest] "D" (&storage)
-#endif
+ : "+A" (expected), [success] "=qm" (success)
+ : "S" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)), [dest] "D" (&storage)
: BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
);
#endif // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS)
return success;
-#else // defined(__PIC__)
+#else // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
bool success;
+
#if defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS)
__asm__ __volatile__
(
@@ -273,20 +249,15 @@ struct gcc_dcas_x86
(
"lock; cmpxchg8b %[dest]\n\t"
"sete %[success]\n\t"
-#if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES)
- : "+A,A" (expected), [dest] "+m,m" (storage), [success] "=q,m" (success)
- : "b,b" ((uint32_t)desired), "c,c" ((uint32_t)(desired >> 32))
-#else
- : "+A" (expected), [dest] "+m" (storage), [success] "=q" (success)
+ : "+A" (expected), [dest] "+m" (storage), [success] "=qm" (success)
: "b" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32))
-#endif
: BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
);
#endif // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS)
return success;
-#endif // defined(__PIC__)
+#endif // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
}
static BOOST_FORCEINLINE bool compare_exchange_weak(
@@ -297,93 +268,105 @@ struct gcc_dcas_x86
static BOOST_FORCEINLINE storage_type exchange(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT
{
-#if defined(__clang__)
- // Clang cannot allocate eax:edx register pairs but it has sync intrinsics
- storage_type old_val = storage;
- while (true)
- {
- storage_type val = __sync_val_compare_and_swap(&storage, old_val, v);
- if (val == old_val)
- return val;
- old_val = val;
- }
-#elif !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS)
-#if defined(__PIC__)
- uint32_t scratch;
+#if defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
+#if defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
+
+ uint32_t old_bits[2];
__asm__ __volatile__
(
- "movl %%ebx, %[scratch]\n\t"
- "movl %%eax, %%ebx\n\t"
- "movl %%edx, %%ecx\n\t"
- "movl %[dest], %%eax\n\t"
- "movl 4+%[dest], %%edx\n\t"
+ "xchgl %%ebx, %%esi\n\t"
+ "movl (%[dest]), %%eax\n\t"
+ "movl 4(%[dest]), %%edx\n\t"
".align 16\n\t"
- "1: lock; cmpxchg8b %[dest]\n\t"
+ "1: lock; cmpxchg8b (%[dest])\n\t"
"jne 1b\n\t"
- "movl %[scratch], %%ebx\n\t"
- : "+A" (v), [scratch] "=m" (scratch), [dest] "+o" (storage)
- :
- : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "ecx", "memory"
+ "xchgl %%ebx, %%esi\n\t"
+ : "=a" (old_bits[0]), "=d" (old_bits[1])
+ : "S" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage)
+ : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
);
- return v;
-#else // defined(__PIC__)
+
+ storage_type old_value;
+ BOOST_ATOMIC_DETAIL_MEMCPY(&old_value, old_bits, sizeof(old_value));
+ return old_value;
+
+#else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
+
+ storage_type old_value;
__asm__ __volatile__
(
- "movl %[dest], %%eax\n\t"
- "movl 4+%[dest], %%edx\n\t"
+ "xchgl %%ebx, %%esi\n\t"
+ "movl (%[dest]), %%eax\n\t"
+ "movl 4(%[dest]), %%edx\n\t"
".align 16\n\t"
- "1: lock; cmpxchg8b %[dest]\n\t"
+ "1: lock; cmpxchg8b (%[dest])\n\t"
"jne 1b\n\t"
- : "=A" (v), [dest] "+o" (storage)
- : "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32))
+ "xchgl %%ebx, %%esi\n\t"
+ : "=A" (old_value)
+ : "S" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage)
: BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
);
- return v;
-#endif // defined(__PIC__)
-#else // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS)
-#if defined(__PIC__)
- uint32_t scratch;
+ return old_value;
+
+#endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
+#else // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
+#if defined(__MINGW32__) && ((__GNUC__+0) * 100 + (__GNUC_MINOR__+0)) < 407
+
+ // MinGW gcc up to 4.6 has problems with allocating registers in the asm blocks below
+ uint32_t old_bits[2];
__asm__ __volatile__
(
- "movl %%ebx, %[scratch]\n\t"
- "movl %%eax, %%ebx\n\t"
- "movl %%edx, %%ecx\n\t"
- "movl 0(%[dest]), %%eax\n\t"
+ "movl (%[dest]), %%eax\n\t"
"movl 4(%[dest]), %%edx\n\t"
".align 16\n\t"
- "1: lock; cmpxchg8b 0(%[dest])\n\t"
+ "1: lock; cmpxchg8b (%[dest])\n\t"
"jne 1b\n\t"
- "movl %[scratch], %%ebx\n\t"
-#if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES)
- : "+A,A" (v), [scratch] "=m,m" (scratch)
- : [dest] "D,S" (&storage)
-#else
- : "+A" (v), [scratch] "=m" (scratch)
- : [dest] "D" (&storage)
-#endif
- : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "ecx", "memory"
+ : "=&a" (old_bits[0]), "=&d" (old_bits[1])
+ : "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "DS" (&storage)
+ : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
);
- return v;
-#else // defined(__PIC__)
+
+ storage_type old_value;
+ BOOST_ATOMIC_DETAIL_MEMCPY(&old_value, old_bits, sizeof(old_value));
+ return old_value;
+
+#elif defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
+
+ uint32_t old_bits[2];
__asm__ __volatile__
(
- "movl 0(%[dest]), %%eax\n\t"
- "movl 4(%[dest]), %%edx\n\t"
+ "movl %[dest_lo], %%eax\n\t"
+ "movl %[dest_hi], %%edx\n\t"
".align 16\n\t"
- "1: lock; cmpxchg8b 0(%[dest])\n\t"
+ "1: lock; cmpxchg8b %[dest_lo]\n\t"
"jne 1b\n\t"
-#if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES)
- : "=A,A" (v)
- : "b,b" ((uint32_t)v), "c,c" ((uint32_t)(v >> 32)), [dest] "D,S" (&storage)
-#else
- : "=A" (v)
- : "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage)
-#endif
+ : "=&a" (old_bits[0]), "=&d" (old_bits[1]), [dest_lo] "+m" (storage), [dest_hi] "+m" (reinterpret_cast< volatile aliasing_uint32_t* >(&storage)[1])
+ : "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32))
: BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
);
- return v;
-#endif // defined(__PIC__)
-#endif
+
+ storage_type old_value;
+ BOOST_ATOMIC_DETAIL_MEMCPY(&old_value, old_bits, sizeof(old_value));
+ return old_value;
+
+#else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
+
+ storage_type old_value;
+ __asm__ __volatile__
+ (
+ "movl %[dest_lo], %%eax\n\t"
+ "movl %[dest_hi], %%edx\n\t"
+ ".align 16\n\t"
+ "1: lock; cmpxchg8b %[dest_lo]\n\t"
+ "jne 1b\n\t"
+ : "=&A" (old_value), [dest_lo] "+m" (storage), [dest_hi] "+m" (reinterpret_cast< volatile aliasing_uint32_t* >(&storage)[1])
+ : "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32))
+ : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
+ );
+ return old_value;
+
+#endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
+#endif // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX)
}
};
@@ -394,82 +377,59 @@ struct gcc_dcas_x86
template< bool Signed >
struct gcc_dcas_x86_64
{
- typedef typename make_storage_type< 16u, Signed >::type storage_type;
- typedef typename make_storage_type< 16u, Signed >::aligned aligned_storage_type;
+ typedef typename make_storage_type< 16u >::type storage_type;
+ typedef typename make_storage_type< 16u >::aligned aligned_storage_type;
+ typedef uint64_t BOOST_ATOMIC_DETAIL_MAY_ALIAS aliasing_uint64_t;
+ static BOOST_CONSTEXPR_OR_CONST bool full_cas_based = true;
static BOOST_CONSTEXPR_OR_CONST bool is_always_lock_free = true;
static BOOST_FORCEINLINE void store(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
{
- uint64_t const* p_value = (uint64_t const*)&v;
- const uint64_t v_lo = p_value[0], v_hi = p_value[1];
-#if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS)
__asm__ __volatile__
(
- "movq %[dest], %%rax\n\t"
- "movq 8+%[dest], %%rdx\n\t"
+ "movq %[dest_lo], %%rax\n\t"
+ "movq %[dest_hi], %%rdx\n\t"
".align 16\n\t"
- "1: lock; cmpxchg16b %[dest]\n\t"
+ "1: lock; cmpxchg16b %[dest_lo]\n\t"
"jne 1b\n\t"
- : [dest] "=o" (storage)
- : "b" (v_lo), "c" (v_hi)
+ : [dest_lo] "=m" (storage), [dest_hi] "=m" (reinterpret_cast< volatile aliasing_uint64_t* >(&storage)[1])
+ : "b" (reinterpret_cast< const aliasing_uint64_t* >(&v)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&v)[1])
: BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "rax", "rdx", "memory"
);
-#else // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS)
- __asm__ __volatile__
- (
- "movq 0(%[dest]), %%rax\n\t"
- "movq 8(%[dest]), %%rdx\n\t"
- ".align 16\n\t"
- "1: lock; cmpxchg16b 0(%[dest])\n\t"
- "jne 1b\n\t"
- :
- : "b" (v_lo), "c" (v_hi), [dest] "r" (&storage)
- : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "rax", "rdx", "memory"
- );
-#endif // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS)
}
static BOOST_FORCEINLINE storage_type load(storage_type const volatile& storage, memory_order) BOOST_NOEXCEPT
{
#if defined(__clang__)
+
// Clang cannot allocate rax:rdx register pairs but it has sync intrinsics
storage_type value = storage_type();
return __sync_val_compare_and_swap(&storage, value, value);
-#elif defined(BOOST_ATOMIC_DETAIL_NO_ASM_RAX_RDX_PAIRS)
- // GCC 4.4 can't allocate rax:rdx register pair either but it also doesn't support 128-bit __sync_val_compare_and_swap
- storage_type value;
+
+#elif defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
+
+ // Some compilers can't allocate rax:rdx register pair either and also don't support 128-bit __sync_val_compare_and_swap
+ uint64_t value_bits[2];
// We don't care for comparison result here; the previous value will be stored into value anyway.
// Also we don't care for rbx and rcx values, they just have to be equal to rax and rdx before cmpxchg16b.
-#if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS)
__asm__ __volatile__
(
"movq %%rbx, %%rax\n\t"
"movq %%rcx, %%rdx\n\t"
"lock; cmpxchg16b %[storage]\n\t"
- "movq %%rax, %[value]\n\t"
- "movq %%rdx, 8+%[value]\n\t"
- : [value] "=o" (value)
+ : "=&a" (value_bits[0]), "=&d" (value_bits[1])
: [storage] "m" (storage)
- : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory", "rax", "rdx"
- );
-#else // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS)
- __asm__ __volatile__
- (
- "movq %%rbx, %%rax\n\t"
- "movq %%rcx, %%rdx\n\t"
- "lock; cmpxchg16b %[storage]\n\t"
- "movq %%rax, 0(%[value])\n\t"
- "movq %%rdx, 8(%[value])\n\t"
- :
- : [storage] "m" (storage), [value] "r" (&value)
- : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory", "rax", "rdx"
+ : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
);
-#endif // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS)
+ storage_type value;
+ BOOST_ATOMIC_DETAIL_MEMCPY(&value, value_bits, sizeof(value));
return value;
-#else // defined(BOOST_ATOMIC_DETAIL_NO_ASM_RAX_RDX_PAIRS)
+
+#else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
+
storage_type value;
// We don't care for comparison result here; the previous value will be stored into value anyway.
@@ -485,7 +445,8 @@ struct gcc_dcas_x86_64
);
return value;
-#endif
+
+#endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
}
static BOOST_FORCEINLINE bool compare_exchange_strong(
@@ -498,53 +459,31 @@ struct gcc_dcas_x86_64
expected = __sync_val_compare_and_swap(&storage, old_expected, desired);
return expected == old_expected;
-#elif defined(BOOST_ATOMIC_DETAIL_NO_ASM_RAX_RDX_PAIRS)
+#elif defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
- // GCC 4.4 can't allocate rax:rdx register pair either but it also doesn't support 128-bit __sync_val_compare_and_swap
- uint64_t const* p_desired = (uint64_t const*)&desired;
- const uint64_t desired_lo = p_desired[0], desired_hi = p_desired[1];
+ // Some compilers can't allocate rax:rdx register pair either but also don't support 128-bit __sync_val_compare_and_swap
bool success;
-#if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS)
- __asm__ __volatile__
- (
- "movq %[expected], %%rax\n\t"
- "movq 8+%[expected], %%rdx\n\t"
- "lock; cmpxchg16b %[dest]\n\t"
- "sete %[success]\n\t"
- "movq %%rax, %[expected]\n\t"
- "movq %%rdx, 8+%[expected]\n\t"
- : [dest] "+m" (storage), [expected] "+o" (expected), [success] "=q" (success)
- : "b" (desired_lo), "c" (desired_hi)
- : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory", "rax", "rdx"
- );
-#else // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS)
__asm__ __volatile__
(
- "movq 0(%[expected]), %%rax\n\t"
- "movq 8(%[expected]), %%rdx\n\t"
"lock; cmpxchg16b %[dest]\n\t"
"sete %[success]\n\t"
- "movq %%rax, 0(%[expected])\n\t"
- "movq %%rdx, 8(%[expected])\n\t"
- : [dest] "+m" (storage), [success] "=q" (success)
- : "b" (desired_lo), "c" (desired_hi), [expected] "r" (&expected)
- : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory", "rax", "rdx"
+ : [dest] "+m" (storage), "+a" (reinterpret_cast< aliasing_uint64_t* >(&expected)[0]), "+d" (reinterpret_cast< aliasing_uint64_t* >(&expected)[1]), [success] "=q" (success)
+ : "b" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[1])
+ : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
);
-#endif // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS)
return success;
-#else // defined(BOOST_ATOMIC_DETAIL_NO_ASM_RAX_RDX_PAIRS)
+#else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
- uint64_t const* p_desired = (uint64_t const*)&desired;
- const uint64_t desired_lo = p_desired[0], desired_hi = p_desired[1];
bool success;
+
#if defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS)
__asm__ __volatile__
(
"lock; cmpxchg16b %[dest]\n\t"
- : "+A" (expected), [dest] "+m" (storage), [success] "=@ccz" (success)
- : "b" (desired_lo), "c" (desired_hi)
+ : "+A" (expected), [dest] "+m" (storage), "=@ccz" (success)
+ : "b" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[1])
: BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
);
#else // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS)
@@ -552,20 +491,15 @@ struct gcc_dcas_x86_64
(
"lock; cmpxchg16b %[dest]\n\t"
"sete %[success]\n\t"
-#if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES)
- : "+A,A" (expected), [dest] "+m,m" (storage), [success] "=q,m" (success)
- : "b,b" (desired_lo), "c,c" (desired_hi)
-#else
- : "+A" (expected), [dest] "+m" (storage), [success] "=q" (success)
- : "b" (desired_lo), "c" (desired_hi)
-#endif
+ : "+A" (expected), [dest] "+m" (storage), [success] "=qm" (success)
+ : "b" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[1])
: BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
);
#endif // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS)
return success;
-#endif // defined(BOOST_ATOMIC_DETAIL_NO_ASM_RAX_RDX_PAIRS)
+#endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
}
static BOOST_FORCEINLINE bool compare_exchange_weak(
@@ -576,83 +510,39 @@ struct gcc_dcas_x86_64
static BOOST_FORCEINLINE storage_type exchange(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT
{
-#if defined(__clang__)
- // Clang cannot allocate eax:edx register pairs but it has sync intrinsics
- storage_type old_val = storage;
- while (true)
- {
- storage_type val = __sync_val_compare_and_swap(&storage, old_val, v);
- if (val == old_val)
- return val;
- old_val = val;
- }
-#elif defined(BOOST_ATOMIC_DETAIL_NO_ASM_RAX_RDX_PAIRS)
- // GCC 4.4 can't allocate rax:rdx register pair either but it also doesn't support 128-bit __sync_val_compare_and_swap
- storage_type old_value;
- uint64_t const* p_value = (uint64_t const*)&v;
- const uint64_t v_lo = p_value[0], v_hi = p_value[1];
-#if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS)
+#if defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
+ uint64_t old_bits[2];
__asm__ __volatile__
(
- "movq %[dest], %%rax\n\t"
- "movq 8+%[dest], %%rdx\n\t"
+ "movq %[dest_lo], %%rax\n\t"
+ "movq %[dest_hi], %%rdx\n\t"
".align 16\n\t"
- "1: lock; cmpxchg16b %[dest]\n\t"
+ "1: lock; cmpxchg16b %[dest_lo]\n\t"
"jne 1b\n\t"
- "movq %%rax, %[old_value]\n\t"
- "movq %%rdx, 8+%[old_value]\n\t"
- : [dest] "+o" (storage), [old_value] "=o" (old_value)
- : "b" (v_lo), "c" (v_hi)
- : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory", "rax", "rdx"
- );
-#else // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS)
- __asm__ __volatile__
- (
- "movq 0(%[dest]), %%rax\n\t"
- "movq 8(%[dest]), %%rdx\n\t"
- ".align 16\n\t"
- "1: lock; cmpxchg16b 0(%[dest])\n\t"
- "jne 1b\n\t"
- "movq %%rax, 0(%[old_value])\n\t"
- "movq %%rdx, 8(%[old_value])\n\t"
- :
- : "b" (v_lo), "c" (v_hi), [dest] "r" (&storage), [old_value] "r" (&old_value)
- : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory", "rax", "rdx"
+ : [dest_lo] "+m" (storage), [dest_hi] "+m" (reinterpret_cast< volatile aliasing_uint64_t* >(&storage)[1]), "=&a" (old_bits[0]), "=&d" (old_bits[1])
+ : "b" (reinterpret_cast< const aliasing_uint64_t* >(&v)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&v)[1])
+ : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
);
-#endif // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS)
+ storage_type old_value;
+ BOOST_ATOMIC_DETAIL_MEMCPY(&old_value, old_bits, sizeof(old_value));
return old_value;
-#else // defined(BOOST_ATOMIC_DETAIL_NO_ASM_RAX_RDX_PAIRS)
- uint64_t const* p_value = (uint64_t const*)&v;
- const uint64_t v_lo = p_value[0], v_hi = p_value[1];
-#if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS)
- __asm__ __volatile__
- (
- "movq %[dest], %%rax\n\t"
- "movq 8+%[dest], %%rdx\n\t"
- ".align 16\n\t"
- "1: lock; cmpxchg16b %[dest]\n\t"
- "jne 1b\n\t"
- : "=&A" (v), [dest] "+o" (storage)
- : "b" (v_lo), "c" (v_hi)
- : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
- );
-#else // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS)
+#else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
+ storage_type old_value;
__asm__ __volatile__
(
- "movq 0(%[dest]), %%rax\n\t"
- "movq 8(%[dest]), %%rdx\n\t"
+ "movq %[dest_lo], %%rax\n\t"
+ "movq %[dest_hi], %%rdx\n\t"
".align 16\n\t"
- "1: lock; cmpxchg16b 0(%[dest])\n\t"
+ "1: lock; cmpxchg16b %[dest_lo]\n\t"
"jne 1b\n\t"
- : "=&A" (v)
- : "b" (v_lo), "c" (v_hi), [dest] "r" (&storage)
+ : "=&A" (old_value), [dest_lo] "+m" (storage), [dest_hi] "+m" (reinterpret_cast< volatile aliasing_uint64_t* >(&storage)[1])
+ : "b" (reinterpret_cast< const aliasing_uint64_t* >(&v)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&v)[1])
: BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory"
);
-#endif // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS)
- return v;
-#endif
+ return old_value;
+#endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS)
}
};