From d23a2bc131eae32c473e39fcb3b640af1a95d244 Mon Sep 17 00:00:00 2001 From: Steve MacLean Date: Wed, 14 Jun 2017 02:04:01 -0400 Subject: [Arn64/Unix] Revise Volatile.T barriers (#12156) * [Arm64] Revise Volatile barriers * Remove VolateStore from CopyValueClassUnchecked * Replace MemoryBarrier() in CopyValueClassUnchecked --- src/gc/env/gcenv.base.h | 29 ++++++++++++++++++++++++++++- src/inc/volatile.h | 29 ++++++++++++++++++++++++++++- src/vm/object.cpp | 14 ++++++++++---- 3 files changed, 66 insertions(+), 6 deletions(-) diff --git a/src/gc/env/gcenv.base.h b/src/gc/env/gcenv.base.h index af1a3cd9f0..1e9406c296 100644 --- a/src/gc/env/gcenv.base.h +++ b/src/gc/env/gcenv.base.h @@ -351,7 +351,7 @@ typedef PTR_PTR_Object PTR_UNCHECKED_OBJECTREF; #if defined(__clang__) #if defined(_ARM_) || defined(_ARM64_) // This is functionally equivalent to the MemoryBarrier() macro used on ARM on Windows. -#define VOLATILE_MEMORY_BARRIER() asm volatile ("dmb sy" : : : "memory") +#define VOLATILE_MEMORY_BARRIER() asm volatile ("dmb ish" : : : "memory") #else // // For Clang, we prevent reordering by the compiler by inserting the following after a volatile @@ -392,8 +392,22 @@ template inline T VolatileLoad(T const * pt) { +#if defined(_ARM64_) && defined(__clang__) + T val; + static const unsigned lockFreeAtomicSizeMask = (1 << 1) | (1 << 2) | (1 << 4) | (1 << 8); + if((1 << sizeof(T)) & lockFreeAtomicSizeMask) + { + __atomic_load((T volatile const *)pt, &val, __ATOMIC_ACQUIRE); + } + else + { + val = *(T volatile const *)pt; + asm volatile ("dmb ishld" : : : "memory"); + } +#else T val = *(T volatile const *)pt; VOLATILE_MEMORY_BARRIER(); +#endif return val; } @@ -420,8 +434,21 @@ template inline void VolatileStore(T* pt, T val) { +#if defined(_ARM64_) && defined(__clang__) + static const unsigned lockFreeAtomicSizeMask = (1 << 1) | (1 << 2) | (1 << 4) | (1 << 8); + if((1 << sizeof(T)) & lockFreeAtomicSizeMask) + { + __atomic_store((T volatile *)pt, &val, __ATOMIC_RELEASE); + } + else + { + VOLATILE_MEMORY_BARRIER(); + *(T volatile *)pt = val; + } +#else VOLATILE_MEMORY_BARRIER(); *(T volatile *)pt = val; +#endif } extern GCSystemInfo g_SystemInfo; diff --git a/src/inc/volatile.h b/src/inc/volatile.h index 9531d98085..5aa0e50866 100644 --- a/src/inc/volatile.h +++ b/src/inc/volatile.h @@ -76,7 +76,7 @@ #if defined(__GNUC__) #if defined(_ARM_) || defined(_ARM64_) // This is functionally equivalent to the MemoryBarrier() macro used on ARM on Windows. -#define VOLATILE_MEMORY_BARRIER() asm volatile ("dmb sy" : : : "memory") +#define VOLATILE_MEMORY_BARRIER() asm volatile ("dmb ish" : : : "memory") #else // // For GCC, we prevent reordering by the compiler by inserting the following after a volatile @@ -120,8 +120,22 @@ T VolatileLoad(T const * pt) STATIC_CONTRACT_SUPPORTS_DAC_HOST_ONLY; #ifndef DACCESS_COMPILE +#if defined(_ARM64_) && defined(__GNUC__) + T val; + static const unsigned lockFreeAtomicSizeMask = (1 << 1) | (1 << 2) | (1 << 4) | (1 << 8); + if((1 << sizeof(T)) & lockFreeAtomicSizeMask) + { + __atomic_load((T volatile const *)pt, &val, __ATOMIC_ACQUIRE); + } + else + { + val = *(T volatile const *)pt; + asm volatile ("dmb ishld" : : : "memory"); + } +#else T val = *(T volatile const *)pt; VOLATILE_MEMORY_BARRIER(); +#endif #else T val = *pt; #endif @@ -166,8 +180,21 @@ void VolatileStore(T* pt, T val) STATIC_CONTRACT_SUPPORTS_DAC_HOST_ONLY; #ifndef DACCESS_COMPILE +#if defined(_ARM64_) && defined(__GNUC__) + static const unsigned lockFreeAtomicSizeMask = (1 << 1) | (1 << 2) | (1 << 4) | (1 << 8); + if((1 << sizeof(T)) & lockFreeAtomicSizeMask) + { + __atomic_store((T volatile *)pt, &val, __ATOMIC_RELEASE); + } + else + { + VOLATILE_MEMORY_BARRIER(); + *(T volatile *)pt = val; + } +#else VOLATILE_MEMORY_BARRIER(); *(T volatile *)pt = val; +#endif #else *pt = val; #endif diff --git a/src/vm/object.cpp b/src/vm/object.cpp index 3e3f6d120a..15c3a457a9 100644 --- a/src/vm/object.cpp +++ b/src/vm/object.cpp @@ -1521,11 +1521,17 @@ void STDCALL CopyValueClassUnchecked(void* dest, void* src, MethodTable *pMT) _ASSERTE(!pMT->IsArray()); // bunch of assumptions about arrays wrong. + // @todo Only call MemoryBarrier() if needed. + // Reflection is a known use case where this is required. + // Unboxing is a use case where this should not be required. + // + MemoryBarrier(); + // Copy the bulk of the data, and any non-GC refs. switch (pMT->GetNumInstanceFieldBytes()) { case 1: - VolatileStore((UINT8*)dest, *(UINT8*)src); + *(UINT8*)dest = *(UINT8*)src; break; #ifndef ALIGN_ACCESS // we can hit an alignment fault if the value type has multiple @@ -1533,13 +1539,13 @@ void STDCALL CopyValueClassUnchecked(void* dest, void* src, MethodTable *pMT) // value class can be aligned to 4-byte boundaries, yet the // NumInstanceFieldBytes is 8 case 2: - VolatileStore((UINT16*)dest, *(UINT16*)src); + *(UINT16*)dest = *(UINT16*)src; break; case 4: - VolatileStore((UINT32*)dest, *(UINT32*)src); + *(UINT32*)dest = *(UINT32*)src; break; case 8: - VolatileStore((UINT64*)dest, *(UINT64*)src); + *(UINT64*)dest = *(UINT64*)src; break; #endif // !ALIGN_ACCESS default: -- cgit v1.2.3