diff options
Diffstat (limited to 'boost/atomic/detail/ops_gcc_x86_dcas.hpp')
-rw-r--r-- | boost/atomic/detail/ops_gcc_x86_dcas.hpp | 566 |
1 files changed, 228 insertions, 338 deletions
diff --git a/boost/atomic/detail/ops_gcc_x86_dcas.hpp b/boost/atomic/detail/ops_gcc_x86_dcas.hpp index 28cbc225e3..4dacc66fe2 100644 --- a/boost/atomic/detail/ops_gcc_x86_dcas.hpp +++ b/boost/atomic/detail/ops_gcc_x86_dcas.hpp @@ -5,7 +5,7 @@ * * Copyright (c) 2009 Helge Bahmann * Copyright (c) 2012 Tim Blechmann - * Copyright (c) 2014 Andrey Semashev + * Copyright (c) 2014 - 2018 Andrey Semashev */ /*! * \file atomic/detail/ops_gcc_x86_dcas.hpp @@ -20,6 +20,7 @@ #include <boost/memory_order.hpp> #include <boost/atomic/detail/config.hpp> #include <boost/atomic/detail/storage_type.hpp> +#include <boost/atomic/detail/string_ops.hpp> #include <boost/atomic/capabilities.hpp> #ifdef BOOST_HAS_PRAGMA_ONCE @@ -30,119 +31,91 @@ namespace boost { namespace atomics { namespace detail { +// Note: In the 32-bit PIC code guarded with BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX below we have to avoid using memory +// operand constraints because the compiler may choose to use ebx as the base register for that operand. At least, clang +// is known to do that. For this reason we have to pre-compute a pointer to storage and pass it in edi. For the same reason +// we cannot save ebx to the stack with a mov instruction, so we use esi as a scratch register and restore it afterwards. +// Alternatively, we could push/pop the register to the stack, but exchanging the registers is faster. +// The need to pass a pointer in edi is a bit wasteful because normally the memory operand would use a base pointer +// with an offset (e.g. `this` + offset). But unfortunately, there seems to be no way around it. + #if defined(BOOST_ATOMIC_DETAIL_X86_HAS_CMPXCHG8B) template< bool Signed > struct gcc_dcas_x86 { - typedef typename make_storage_type< 8u, Signed >::type storage_type; - typedef typename make_storage_type< 8u, Signed >::aligned aligned_storage_type; + typedef typename make_storage_type< 8u >::type storage_type; + typedef typename make_storage_type< 8u >::aligned aligned_storage_type; + typedef uint32_t BOOST_ATOMIC_DETAIL_MAY_ALIAS aliasing_uint32_t; + static BOOST_CONSTEXPR_OR_CONST bool full_cas_based = true; static BOOST_CONSTEXPR_OR_CONST bool is_always_lock_free = true; static BOOST_FORCEINLINE void store(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT { - if ((((uint32_t)&storage) & 0x00000007) == 0) + if (BOOST_LIKELY((((uint32_t)&storage) & 0x00000007) == 0u)) { -#if defined(__SSE2__) +#if defined(__SSE__) + typedef float xmm_t __attribute__((__vector_size__(16))); + xmm_t xmm_scratch; __asm__ __volatile__ ( #if defined(__AVX__) - "vmovq %1, %%xmm4\n\t" - "vmovq %%xmm4, %0\n\t" + "vmovq %[value], %[xmm_scratch]\n\t" + "vmovq %[xmm_scratch], %[storage]\n\t" +#elif defined(__SSE2__) + "movq %[value], %[xmm_scratch]\n\t" + "movq %[xmm_scratch], %[storage]\n\t" #else - "movq %1, %%xmm4\n\t" - "movq %%xmm4, %0\n\t" + "xorps %[xmm_scratch], %[xmm_scratch]\n\t" + "movlps %[value], %[xmm_scratch]\n\t" + "movlps %[xmm_scratch], %[storage]\n\t" #endif - : "=m" (storage) - : "m" (v) - : "memory", "xmm4" + : [storage] "=m" (storage), [xmm_scratch] "=x" (xmm_scratch) + : [value] "m" (v) + : "memory" ); #else __asm__ __volatile__ ( - "fildll %1\n\t" - "fistpll %0\n\t" - : "=m" (storage) - : "m" (v) + "fildll %[value]\n\t" + "fistpll %[storage]\n\t" + : [storage] "=m" (storage) + : [value] "m" (v) : "memory" ); #endif } else { -#if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) -#if defined(__PIC__) - uint32_t v_lo = (uint32_t)v; - uint32_t scratch; - __asm__ __volatile__ - ( - "movl %%ebx, %[scratch]\n\t" - "movl %[value_lo], %%ebx\n\t" - "movl %[dest], %%eax\n\t" - "movl 4+%[dest], %%edx\n\t" - ".align 16\n\t" - "1: lock; cmpxchg8b %[dest]\n\t" - "jne 1b\n\t" - "movl %[scratch], %%ebx\n\t" - : [scratch] "=m" (scratch), [dest] "=o" (storage), [value_lo] "+a" (v_lo) - : "c" ((uint32_t)(v >> 32)) - : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "edx", "memory" - ); -#else // defined(__PIC__) - __asm__ __volatile__ - ( - "movl %[dest], %%eax\n\t" - "movl 4+%[dest], %%edx\n\t" - ".align 16\n\t" - "1: lock; cmpxchg8b %[dest]\n\t" - "jne 1b\n\t" - : [dest] "=o" (storage) - : [value_lo] "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)) - : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "eax", "edx", "memory" - ); -#endif // defined(__PIC__) -#else // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) -#if defined(__PIC__) - uint32_t v_lo = (uint32_t)v; - uint32_t scratch; +#if defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) __asm__ __volatile__ ( - "movl %%ebx, %[scratch]\n\t" - "movl %[value_lo], %%ebx\n\t" - "movl 0(%[dest]), %%eax\n\t" + "xchgl %%ebx, %%esi\n\t" + "movl %%eax, %%ebx\n\t" + "movl (%[dest]), %%eax\n\t" "movl 4(%[dest]), %%edx\n\t" ".align 16\n\t" - "1: lock; cmpxchg8b 0(%[dest])\n\t" + "1: lock; cmpxchg8b (%[dest])\n\t" "jne 1b\n\t" - "movl %[scratch], %%ebx\n\t" -#if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES) - : [scratch] "=m,m" (scratch), [value_lo] "+a,a" (v_lo) - : "c,c" ((uint32_t)(v >> 32)), [dest] "D,S" (&storage) -#else - : [scratch] "=m" (scratch), [value_lo] "+a" (v_lo) - : "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage) -#endif + "xchgl %%ebx, %%esi\n\t" + : + : "a" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage) : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "edx", "memory" ); -#else // defined(__PIC__) +#else // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) __asm__ __volatile__ ( - "movl 0(%[dest]), %%eax\n\t" - "movl 4(%[dest]), %%edx\n\t" + "movl %[dest_lo], %%eax\n\t" + "movl %[dest_hi], %%edx\n\t" ".align 16\n\t" - "1: lock; cmpxchg8b 0(%[dest])\n\t" + "1: lock; cmpxchg8b %[dest_lo]\n\t" "jne 1b\n\t" - : -#if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES) - : [value_lo] "b,b" ((uint32_t)v), "c,c" ((uint32_t)(v >> 32)), [dest] "D,S" (&storage) -#else - : [value_lo] "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage) -#endif + : [dest_lo] "=m" (storage), [dest_hi] "=m" (reinterpret_cast< volatile aliasing_uint32_t* >(&storage)[1]) + : [value_lo] "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)) : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "eax", "edx", "memory" ); -#endif // defined(__PIC__) -#endif // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) +#endif // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) } } @@ -150,29 +123,35 @@ struct gcc_dcas_x86 { storage_type value; - if ((((uint32_t)&storage) & 0x00000007) == 0) + if (BOOST_LIKELY((((uint32_t)&storage) & 0x00000007) == 0u)) { -#if defined(__SSE2__) +#if defined(__SSE__) + typedef float xmm_t __attribute__((__vector_size__(16))); + xmm_t xmm_scratch; __asm__ __volatile__ ( #if defined(__AVX__) - "vmovq %1, %%xmm4\n\t" - "vmovq %%xmm4, %0\n\t" + "vmovq %[storage], %[xmm_scratch]\n\t" + "vmovq %[xmm_scratch], %[value]\n\t" +#elif defined(__SSE2__) + "movq %[storage], %[xmm_scratch]\n\t" + "movq %[xmm_scratch], %[value]\n\t" #else - "movq %1, %%xmm4\n\t" - "movq %%xmm4, %0\n\t" + "xorps %[xmm_scratch], %[xmm_scratch]\n\t" + "movlps %[storage], %[xmm_scratch]\n\t" + "movlps %[xmm_scratch], %[value]\n\t" #endif - : "=m" (value) - : "m" (storage) - : "memory", "xmm4" + : [value] "=m" (value), [xmm_scratch] "=x" (xmm_scratch) + : [storage] "m" (storage) + : "memory" ); #else __asm__ __volatile__ ( - "fildll %1\n\t" - "fistpll %0\n\t" - : "=m" (value) - : "m" (storage) + "fildll %[storage]\n\t" + "fistpll %[value]\n\t" + : [value] "=m" (value) + : [storage] "m" (storage) : "memory" ); #endif @@ -182,7 +161,21 @@ struct gcc_dcas_x86 #if defined(__clang__) // Clang cannot allocate eax:edx register pairs but it has sync intrinsics value = __sync_val_compare_and_swap(&storage, (storage_type)0, (storage_type)0); -#else +#elif defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) + uint32_t value_bits[2]; + // We don't care for comparison result here; the previous value will be stored into value anyway. + // Also we don't care for ebx and ecx values, they just have to be equal to eax and edx before cmpxchg8b. + __asm__ __volatile__ + ( + "movl %%ebx, %%eax\n\t" + "movl %%ecx, %%edx\n\t" + "lock; cmpxchg8b %[storage]\n\t" + : "=&a" (value_bits[0]), "=&d" (value_bits[1]) + : [storage] "m" (storage) + : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" + ); + BOOST_ATOMIC_DETAIL_MEMCPY(&value, value_bits, sizeof(value)); +#else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) // We don't care for comparison result here; the previous value will be stored into value anyway. // Also we don't care for ebx and ecx values, they just have to be equal to eax and edx before cmpxchg8b. __asm__ __volatile__ @@ -194,7 +187,7 @@ struct gcc_dcas_x86 : [storage] "m" (storage) : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" ); -#endif +#endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) } return value; @@ -210,56 +203,39 @@ struct gcc_dcas_x86 expected = __sync_val_compare_and_swap(&storage, old_expected, desired); return expected == old_expected; -#elif defined(__PIC__) - - // Make sure ebx is saved and restored properly in case - // of position independent code. To make this work - // setup register constraints such that ebx can not be - // used by accident e.g. as base address for the variable - // to be modified. Accessing "scratch" should always be okay, - // as it can only be placed on the stack (and therefore - // accessed through ebp or esp only). - // - // In theory, could push/pop ebx onto/off the stack, but movs - // to a prepared stack slot turn out to be faster. +#elif defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) - uint32_t scratch; bool success; + #if defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) __asm__ __volatile__ ( - "movl %%ebx, %[scratch]\n\t" - "movl %[desired_lo], %%ebx\n\t" + "xchgl %%ebx, %%esi\n\t" "lock; cmpxchg8b (%[dest])\n\t" - "movl %[scratch], %%ebx\n\t" - : "+A" (expected), [scratch] "=m" (scratch), [success] "=@ccz" (success) - : [desired_lo] "Sm" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)), [dest] "D" (&storage) + "xchgl %%ebx, %%esi\n\t" + : "+A" (expected), [success] "=@ccz" (success) + : "S" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)), [dest] "D" (&storage) : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" ); #else // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) __asm__ __volatile__ ( - "movl %%ebx, %[scratch]\n\t" - "movl %[desired_lo], %%ebx\n\t" + "xchgl %%ebx, %%esi\n\t" "lock; cmpxchg8b (%[dest])\n\t" - "movl %[scratch], %%ebx\n\t" + "xchgl %%ebx, %%esi\n\t" "sete %[success]\n\t" -#if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES) - : "+A,A,A,A,A,A" (expected), [scratch] "=m,m,m,m,m,m" (scratch), [success] "=q,m,q,m,q,m" (success) - : [desired_lo] "S,S,D,D,m,m" ((uint32_t)desired), "c,c,c,c,c,c" ((uint32_t)(desired >> 32)), [dest] "D,D,S,S,D,D" (&storage) -#else - : "+A" (expected), [scratch] "=m" (scratch), [success] "=q" (success) - : [desired_lo] "S" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)), [dest] "D" (&storage) -#endif + : "+A" (expected), [success] "=qm" (success) + : "S" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)), [dest] "D" (&storage) : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" ); #endif // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) return success; -#else // defined(__PIC__) +#else // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) bool success; + #if defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) __asm__ __volatile__ ( @@ -273,20 +249,15 @@ struct gcc_dcas_x86 ( "lock; cmpxchg8b %[dest]\n\t" "sete %[success]\n\t" -#if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES) - : "+A,A" (expected), [dest] "+m,m" (storage), [success] "=q,m" (success) - : "b,b" ((uint32_t)desired), "c,c" ((uint32_t)(desired >> 32)) -#else - : "+A" (expected), [dest] "+m" (storage), [success] "=q" (success) + : "+A" (expected), [dest] "+m" (storage), [success] "=qm" (success) : "b" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)) -#endif : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" ); #endif // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) return success; -#endif // defined(__PIC__) +#endif // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) } static BOOST_FORCEINLINE bool compare_exchange_weak( @@ -297,93 +268,105 @@ struct gcc_dcas_x86 static BOOST_FORCEINLINE storage_type exchange(storage_type volatile& storage, storage_type v, memory_order order) BOOST_NOEXCEPT { -#if defined(__clang__) - // Clang cannot allocate eax:edx register pairs but it has sync intrinsics - storage_type old_val = storage; - while (true) - { - storage_type val = __sync_val_compare_and_swap(&storage, old_val, v); - if (val == old_val) - return val; - old_val = val; - } -#elif !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) -#if defined(__PIC__) - uint32_t scratch; +#if defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) +#if defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) + + uint32_t old_bits[2]; __asm__ __volatile__ ( - "movl %%ebx, %[scratch]\n\t" - "movl %%eax, %%ebx\n\t" - "movl %%edx, %%ecx\n\t" - "movl %[dest], %%eax\n\t" - "movl 4+%[dest], %%edx\n\t" + "xchgl %%ebx, %%esi\n\t" + "movl (%[dest]), %%eax\n\t" + "movl 4(%[dest]), %%edx\n\t" ".align 16\n\t" - "1: lock; cmpxchg8b %[dest]\n\t" + "1: lock; cmpxchg8b (%[dest])\n\t" "jne 1b\n\t" - "movl %[scratch], %%ebx\n\t" - : "+A" (v), [scratch] "=m" (scratch), [dest] "+o" (storage) - : - : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "ecx", "memory" + "xchgl %%ebx, %%esi\n\t" + : "=a" (old_bits[0]), "=d" (old_bits[1]) + : "S" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage) + : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" ); - return v; -#else // defined(__PIC__) + + storage_type old_value; + BOOST_ATOMIC_DETAIL_MEMCPY(&old_value, old_bits, sizeof(old_value)); + return old_value; + +#else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) + + storage_type old_value; __asm__ __volatile__ ( - "movl %[dest], %%eax\n\t" - "movl 4+%[dest], %%edx\n\t" + "xchgl %%ebx, %%esi\n\t" + "movl (%[dest]), %%eax\n\t" + "movl 4(%[dest]), %%edx\n\t" ".align 16\n\t" - "1: lock; cmpxchg8b %[dest]\n\t" + "1: lock; cmpxchg8b (%[dest])\n\t" "jne 1b\n\t" - : "=A" (v), [dest] "+o" (storage) - : "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)) + "xchgl %%ebx, %%esi\n\t" + : "=A" (old_value) + : "S" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage) : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" ); - return v; -#endif // defined(__PIC__) -#else // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) -#if defined(__PIC__) - uint32_t scratch; + return old_value; + +#endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) +#else // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) +#if defined(__MINGW32__) && ((__GNUC__+0) * 100 + (__GNUC_MINOR__+0)) < 407 + + // MinGW gcc up to 4.6 has problems with allocating registers in the asm blocks below + uint32_t old_bits[2]; __asm__ __volatile__ ( - "movl %%ebx, %[scratch]\n\t" - "movl %%eax, %%ebx\n\t" - "movl %%edx, %%ecx\n\t" - "movl 0(%[dest]), %%eax\n\t" + "movl (%[dest]), %%eax\n\t" "movl 4(%[dest]), %%edx\n\t" ".align 16\n\t" - "1: lock; cmpxchg8b 0(%[dest])\n\t" + "1: lock; cmpxchg8b (%[dest])\n\t" "jne 1b\n\t" - "movl %[scratch], %%ebx\n\t" -#if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES) - : "+A,A" (v), [scratch] "=m,m" (scratch) - : [dest] "D,S" (&storage) -#else - : "+A" (v), [scratch] "=m" (scratch) - : [dest] "D" (&storage) -#endif - : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "ecx", "memory" + : "=&a" (old_bits[0]), "=&d" (old_bits[1]) + : "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "DS" (&storage) + : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" ); - return v; -#else // defined(__PIC__) + + storage_type old_value; + BOOST_ATOMIC_DETAIL_MEMCPY(&old_value, old_bits, sizeof(old_value)); + return old_value; + +#elif defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) + + uint32_t old_bits[2]; __asm__ __volatile__ ( - "movl 0(%[dest]), %%eax\n\t" - "movl 4(%[dest]), %%edx\n\t" + "movl %[dest_lo], %%eax\n\t" + "movl %[dest_hi], %%edx\n\t" ".align 16\n\t" - "1: lock; cmpxchg8b 0(%[dest])\n\t" + "1: lock; cmpxchg8b %[dest_lo]\n\t" "jne 1b\n\t" -#if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES) - : "=A,A" (v) - : "b,b" ((uint32_t)v), "c,c" ((uint32_t)(v >> 32)), [dest] "D,S" (&storage) -#else - : "=A" (v) - : "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)), [dest] "D" (&storage) -#endif + : "=&a" (old_bits[0]), "=&d" (old_bits[1]), [dest_lo] "+m" (storage), [dest_hi] "+m" (reinterpret_cast< volatile aliasing_uint32_t* >(&storage)[1]) + : "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)) : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" ); - return v; -#endif // defined(__PIC__) -#endif + + storage_type old_value; + BOOST_ATOMIC_DETAIL_MEMCPY(&old_value, old_bits, sizeof(old_value)); + return old_value; + +#else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) + + storage_type old_value; + __asm__ __volatile__ + ( + "movl %[dest_lo], %%eax\n\t" + "movl %[dest_hi], %%edx\n\t" + ".align 16\n\t" + "1: lock; cmpxchg8b %[dest_lo]\n\t" + "jne 1b\n\t" + : "=&A" (old_value), [dest_lo] "+m" (storage), [dest_hi] "+m" (reinterpret_cast< volatile aliasing_uint32_t* >(&storage)[1]) + : "b" ((uint32_t)v), "c" ((uint32_t)(v >> 32)) + : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" + ); + return old_value; + +#endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) +#endif // defined(BOOST_ATOMIC_DETAIL_X86_ASM_PRESERVE_EBX) } }; @@ -394,82 +377,59 @@ struct gcc_dcas_x86 template< bool Signed > struct gcc_dcas_x86_64 { - typedef typename make_storage_type< 16u, Signed >::type storage_type; - typedef typename make_storage_type< 16u, Signed >::aligned aligned_storage_type; + typedef typename make_storage_type< 16u >::type storage_type; + typedef typename make_storage_type< 16u >::aligned aligned_storage_type; + typedef uint64_t BOOST_ATOMIC_DETAIL_MAY_ALIAS aliasing_uint64_t; + static BOOST_CONSTEXPR_OR_CONST bool full_cas_based = true; static BOOST_CONSTEXPR_OR_CONST bool is_always_lock_free = true; static BOOST_FORCEINLINE void store(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT { - uint64_t const* p_value = (uint64_t const*)&v; - const uint64_t v_lo = p_value[0], v_hi = p_value[1]; -#if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) __asm__ __volatile__ ( - "movq %[dest], %%rax\n\t" - "movq 8+%[dest], %%rdx\n\t" + "movq %[dest_lo], %%rax\n\t" + "movq %[dest_hi], %%rdx\n\t" ".align 16\n\t" - "1: lock; cmpxchg16b %[dest]\n\t" + "1: lock; cmpxchg16b %[dest_lo]\n\t" "jne 1b\n\t" - : [dest] "=o" (storage) - : "b" (v_lo), "c" (v_hi) + : [dest_lo] "=m" (storage), [dest_hi] "=m" (reinterpret_cast< volatile aliasing_uint64_t* >(&storage)[1]) + : "b" (reinterpret_cast< const aliasing_uint64_t* >(&v)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&v)[1]) : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "rax", "rdx", "memory" ); -#else // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) - __asm__ __volatile__ - ( - "movq 0(%[dest]), %%rax\n\t" - "movq 8(%[dest]), %%rdx\n\t" - ".align 16\n\t" - "1: lock; cmpxchg16b 0(%[dest])\n\t" - "jne 1b\n\t" - : - : "b" (v_lo), "c" (v_hi), [dest] "r" (&storage) - : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "rax", "rdx", "memory" - ); -#endif // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) } static BOOST_FORCEINLINE storage_type load(storage_type const volatile& storage, memory_order) BOOST_NOEXCEPT { #if defined(__clang__) + // Clang cannot allocate rax:rdx register pairs but it has sync intrinsics storage_type value = storage_type(); return __sync_val_compare_and_swap(&storage, value, value); -#elif defined(BOOST_ATOMIC_DETAIL_NO_ASM_RAX_RDX_PAIRS) - // GCC 4.4 can't allocate rax:rdx register pair either but it also doesn't support 128-bit __sync_val_compare_and_swap - storage_type value; + +#elif defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) + + // Some compilers can't allocate rax:rdx register pair either and also don't support 128-bit __sync_val_compare_and_swap + uint64_t value_bits[2]; // We don't care for comparison result here; the previous value will be stored into value anyway. // Also we don't care for rbx and rcx values, they just have to be equal to rax and rdx before cmpxchg16b. -#if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) __asm__ __volatile__ ( "movq %%rbx, %%rax\n\t" "movq %%rcx, %%rdx\n\t" "lock; cmpxchg16b %[storage]\n\t" - "movq %%rax, %[value]\n\t" - "movq %%rdx, 8+%[value]\n\t" - : [value] "=o" (value) + : "=&a" (value_bits[0]), "=&d" (value_bits[1]) : [storage] "m" (storage) - : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory", "rax", "rdx" - ); -#else // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) - __asm__ __volatile__ - ( - "movq %%rbx, %%rax\n\t" - "movq %%rcx, %%rdx\n\t" - "lock; cmpxchg16b %[storage]\n\t" - "movq %%rax, 0(%[value])\n\t" - "movq %%rdx, 8(%[value])\n\t" - : - : [storage] "m" (storage), [value] "r" (&value) - : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory", "rax", "rdx" + : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" ); -#endif // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) + storage_type value; + BOOST_ATOMIC_DETAIL_MEMCPY(&value, value_bits, sizeof(value)); return value; -#else // defined(BOOST_ATOMIC_DETAIL_NO_ASM_RAX_RDX_PAIRS) + +#else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) + storage_type value; // We don't care for comparison result here; the previous value will be stored into value anyway. @@ -485,7 +445,8 @@ struct gcc_dcas_x86_64 ); return value; -#endif + +#endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) } static BOOST_FORCEINLINE bool compare_exchange_strong( @@ -498,53 +459,31 @@ struct gcc_dcas_x86_64 expected = __sync_val_compare_and_swap(&storage, old_expected, desired); return expected == old_expected; -#elif defined(BOOST_ATOMIC_DETAIL_NO_ASM_RAX_RDX_PAIRS) +#elif defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) - // GCC 4.4 can't allocate rax:rdx register pair either but it also doesn't support 128-bit __sync_val_compare_and_swap - uint64_t const* p_desired = (uint64_t const*)&desired; - const uint64_t desired_lo = p_desired[0], desired_hi = p_desired[1]; + // Some compilers can't allocate rax:rdx register pair either but also don't support 128-bit __sync_val_compare_and_swap bool success; -#if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) - __asm__ __volatile__ - ( - "movq %[expected], %%rax\n\t" - "movq 8+%[expected], %%rdx\n\t" - "lock; cmpxchg16b %[dest]\n\t" - "sete %[success]\n\t" - "movq %%rax, %[expected]\n\t" - "movq %%rdx, 8+%[expected]\n\t" - : [dest] "+m" (storage), [expected] "+o" (expected), [success] "=q" (success) - : "b" (desired_lo), "c" (desired_hi) - : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory", "rax", "rdx" - ); -#else // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) __asm__ __volatile__ ( - "movq 0(%[expected]), %%rax\n\t" - "movq 8(%[expected]), %%rdx\n\t" "lock; cmpxchg16b %[dest]\n\t" "sete %[success]\n\t" - "movq %%rax, 0(%[expected])\n\t" - "movq %%rdx, 8(%[expected])\n\t" - : [dest] "+m" (storage), [success] "=q" (success) - : "b" (desired_lo), "c" (desired_hi), [expected] "r" (&expected) - : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory", "rax", "rdx" + : [dest] "+m" (storage), "+a" (reinterpret_cast< aliasing_uint64_t* >(&expected)[0]), "+d" (reinterpret_cast< aliasing_uint64_t* >(&expected)[1]), [success] "=q" (success) + : "b" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[1]) + : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" ); -#endif // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) return success; -#else // defined(BOOST_ATOMIC_DETAIL_NO_ASM_RAX_RDX_PAIRS) +#else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) - uint64_t const* p_desired = (uint64_t const*)&desired; - const uint64_t desired_lo = p_desired[0], desired_hi = p_desired[1]; bool success; + #if defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) __asm__ __volatile__ ( "lock; cmpxchg16b %[dest]\n\t" - : "+A" (expected), [dest] "+m" (storage), [success] "=@ccz" (success) - : "b" (desired_lo), "c" (desired_hi) + : "+A" (expected), [dest] "+m" (storage), "=@ccz" (success) + : "b" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[1]) : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" ); #else // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) @@ -552,20 +491,15 @@ struct gcc_dcas_x86_64 ( "lock; cmpxchg16b %[dest]\n\t" "sete %[success]\n\t" -#if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_CONSTRAINT_ALTERNATIVES) - : "+A,A" (expected), [dest] "+m,m" (storage), [success] "=q,m" (success) - : "b,b" (desired_lo), "c,c" (desired_hi) -#else - : "+A" (expected), [dest] "+m" (storage), [success] "=q" (success) - : "b" (desired_lo), "c" (desired_hi) -#endif + : "+A" (expected), [dest] "+m" (storage), [success] "=qm" (success) + : "b" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&desired)[1]) : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" ); #endif // defined(BOOST_ATOMIC_DETAIL_ASM_HAS_FLAG_OUTPUTS) return success; -#endif // defined(BOOST_ATOMIC_DETAIL_NO_ASM_RAX_RDX_PAIRS) +#endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) } static BOOST_FORCEINLINE bool compare_exchange_weak( @@ -576,83 +510,39 @@ struct gcc_dcas_x86_64 static BOOST_FORCEINLINE storage_type exchange(storage_type volatile& storage, storage_type v, memory_order) BOOST_NOEXCEPT { -#if defined(__clang__) - // Clang cannot allocate eax:edx register pairs but it has sync intrinsics - storage_type old_val = storage; - while (true) - { - storage_type val = __sync_val_compare_and_swap(&storage, old_val, v); - if (val == old_val) - return val; - old_val = val; - } -#elif defined(BOOST_ATOMIC_DETAIL_NO_ASM_RAX_RDX_PAIRS) - // GCC 4.4 can't allocate rax:rdx register pair either but it also doesn't support 128-bit __sync_val_compare_and_swap - storage_type old_value; - uint64_t const* p_value = (uint64_t const*)&v; - const uint64_t v_lo = p_value[0], v_hi = p_value[1]; -#if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) +#if defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) + uint64_t old_bits[2]; __asm__ __volatile__ ( - "movq %[dest], %%rax\n\t" - "movq 8+%[dest], %%rdx\n\t" + "movq %[dest_lo], %%rax\n\t" + "movq %[dest_hi], %%rdx\n\t" ".align 16\n\t" - "1: lock; cmpxchg16b %[dest]\n\t" + "1: lock; cmpxchg16b %[dest_lo]\n\t" "jne 1b\n\t" - "movq %%rax, %[old_value]\n\t" - "movq %%rdx, 8+%[old_value]\n\t" - : [dest] "+o" (storage), [old_value] "=o" (old_value) - : "b" (v_lo), "c" (v_hi) - : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory", "rax", "rdx" - ); -#else // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) - __asm__ __volatile__ - ( - "movq 0(%[dest]), %%rax\n\t" - "movq 8(%[dest]), %%rdx\n\t" - ".align 16\n\t" - "1: lock; cmpxchg16b 0(%[dest])\n\t" - "jne 1b\n\t" - "movq %%rax, 0(%[old_value])\n\t" - "movq %%rdx, 8(%[old_value])\n\t" - : - : "b" (v_lo), "c" (v_hi), [dest] "r" (&storage), [old_value] "r" (&old_value) - : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory", "rax", "rdx" + : [dest_lo] "+m" (storage), [dest_hi] "+m" (reinterpret_cast< volatile aliasing_uint64_t* >(&storage)[1]), "=&a" (old_bits[0]), "=&d" (old_bits[1]) + : "b" (reinterpret_cast< const aliasing_uint64_t* >(&v)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&v)[1]) + : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" ); -#endif // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) + storage_type old_value; + BOOST_ATOMIC_DETAIL_MEMCPY(&old_value, old_bits, sizeof(old_value)); return old_value; -#else // defined(BOOST_ATOMIC_DETAIL_NO_ASM_RAX_RDX_PAIRS) - uint64_t const* p_value = (uint64_t const*)&v; - const uint64_t v_lo = p_value[0], v_hi = p_value[1]; -#if !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) - __asm__ __volatile__ - ( - "movq %[dest], %%rax\n\t" - "movq 8+%[dest], %%rdx\n\t" - ".align 16\n\t" - "1: lock; cmpxchg16b %[dest]\n\t" - "jne 1b\n\t" - : "=&A" (v), [dest] "+o" (storage) - : "b" (v_lo), "c" (v_hi) - : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" - ); -#else // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) +#else // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) + storage_type old_value; __asm__ __volatile__ ( - "movq 0(%[dest]), %%rax\n\t" - "movq 8(%[dest]), %%rdx\n\t" + "movq %[dest_lo], %%rax\n\t" + "movq %[dest_hi], %%rdx\n\t" ".align 16\n\t" - "1: lock; cmpxchg16b 0(%[dest])\n\t" + "1: lock; cmpxchg16b %[dest_lo]\n\t" "jne 1b\n\t" - : "=&A" (v) - : "b" (v_lo), "c" (v_hi), [dest] "r" (&storage) + : "=&A" (old_value), [dest_lo] "+m" (storage), [dest_hi] "+m" (reinterpret_cast< volatile aliasing_uint64_t* >(&storage)[1]) + : "b" (reinterpret_cast< const aliasing_uint64_t* >(&v)[0]), "c" (reinterpret_cast< const aliasing_uint64_t* >(&v)[1]) : BOOST_ATOMIC_DETAIL_ASM_CLOBBER_CC_COMMA "memory" ); -#endif // !defined(BOOST_ATOMIC_DETAIL_NO_ASM_IMPLIED_ZERO_DISPLACEMENTS) - return v; -#endif + return old_value; +#endif // defined(BOOST_ATOMIC_DETAIL_X86_NO_ASM_AX_DX_PAIRS) } }; |