diff options
author | Koundinya Veluri <kouvel@microsoft.com> | 2017-03-09 13:12:43 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-03-09 13:12:43 -0800 |
commit | a6a7bde881a404fd3e44b2909960675072b4fca1 (patch) | |
tree | 461458153ed9001dafe537f28a1552124ca9064c /src/classlibnative | |
parent | 515af5ef422e33f35c13a84bf3d4bd812ca1e0de (diff) | |
download | coreclr-a6a7bde881a404fd3e44b2909960675072b4fca1.tar.gz coreclr-a6a7bde881a404fd3e44b2909960675072b4fca1.tar.bz2 coreclr-a6a7bde881a404fd3e44b2909960675072b4fca1.zip |
Improve span copy of pointers and structs containing pointers (#9999)
Improve span copy of pointers and structs containing pointers
Fixes #9161
PR #9786 fixes perf of span copy of types that don't contain references
Diffstat (limited to 'src/classlibnative')
-rw-r--r-- | src/classlibnative/bcltype/arraynative.cpp | 76 | ||||
-rw-r--r-- | src/classlibnative/bcltype/arraynative.inl | 329 |
2 files changed, 338 insertions, 67 deletions
diff --git a/src/classlibnative/bcltype/arraynative.cpp b/src/classlibnative/bcltype/arraynative.cpp index 39899b80ea..d12867ef1e 100644 --- a/src/classlibnative/bcltype/arraynative.cpp +++ b/src/classlibnative/bcltype/arraynative.cpp @@ -17,6 +17,8 @@ #include "security.h" #include "invokeutil.h" +#include "arraynative.inl" + FCIMPL1(INT32, ArrayNative::GetRank, ArrayBase* array) { FCALL_CONTRACT; @@ -883,85 +885,25 @@ void memmoveGCRefs(void *dest, const void *src, size_t len) NOTHROW; GC_NOTRIGGER; MODE_COOPERATIVE; - PRECONDITION(CheckPointer(dest)); - PRECONDITION(CheckPointer(src)); - PRECONDITION(len >= 0); SO_TOLERANT; } CONTRACTL_END; + _ASSERTE(dest != nullptr); + _ASSERTE(src != nullptr); + // Make sure everything is pointer aligned _ASSERTE(IS_ALIGNED(dest, sizeof(SIZE_T))); _ASSERTE(IS_ALIGNED(src, sizeof(SIZE_T))); _ASSERTE(IS_ALIGNED(len, sizeof(SIZE_T))); - size_t size = len; - BYTE * dmem = (BYTE *)dest; - BYTE * smem = (BYTE *)src; - - GCHeapMemoryBarrier(); - - if (dmem <= smem || smem + size <= dmem) - { - // copy 16 bytes at a time - while (size >= 4 * sizeof(SIZE_T)) - { - size -= 4 * sizeof(SIZE_T); - ((SIZE_T *)dmem)[0] = ((SIZE_T *)smem)[0]; - ((SIZE_T *)dmem)[1] = ((SIZE_T *)smem)[1]; - ((SIZE_T *)dmem)[2] = ((SIZE_T *)smem)[2]; - ((SIZE_T *)dmem)[3] = ((SIZE_T *)smem)[3]; - smem += 4 * sizeof(SIZE_T); - dmem += 4 * sizeof(SIZE_T); - } - - if ((size & (2 * sizeof(SIZE_T))) != 0) - { - ((SIZE_T *)dmem)[0] = ((SIZE_T *)smem)[0]; - ((SIZE_T *)dmem)[1] = ((SIZE_T *)smem)[1]; - smem += 2 * sizeof(SIZE_T); - dmem += 2 * sizeof(SIZE_T); - } + _ASSERTE(CheckPointer(dest)); + _ASSERTE(CheckPointer(src)); - if ((size & sizeof(SIZE_T)) != 0) - { - ((SIZE_T *)dmem)[0] = ((SIZE_T *)smem)[0]; - } - } - else + if (len != 0 && dest != src) { - smem += size; - dmem += size; - - // copy 16 bytes at a time - while (size >= 4 * sizeof(SIZE_T)) - { - size -= 4 * sizeof(SIZE_T); - smem -= 4 * sizeof(SIZE_T); - dmem -= 4 * sizeof(SIZE_T); - ((SIZE_T *)dmem)[3] = ((SIZE_T *)smem)[3]; - ((SIZE_T *)dmem)[2] = ((SIZE_T *)smem)[2]; - ((SIZE_T *)dmem)[1] = ((SIZE_T *)smem)[1]; - ((SIZE_T *)dmem)[0] = ((SIZE_T *)smem)[0]; - } - - if ((size & (2 * sizeof(SIZE_T))) != 0) - { - smem -= 2 * sizeof(SIZE_T); - dmem -= 2 * sizeof(SIZE_T); - ((SIZE_T *)dmem)[1] = ((SIZE_T *)smem)[1]; - ((SIZE_T *)dmem)[0] = ((SIZE_T *)smem)[0]; - } - - if ((size & sizeof(SIZE_T)) != 0) - { - smem -= sizeof(SIZE_T); - dmem -= sizeof(SIZE_T); - ((SIZE_T *)dmem)[0] = ((SIZE_T *)smem)[0]; - } + InlinedMemmoveGCRefsHelper(dest, src, len); } - - SetCardsAfterBulkCopy((Object**)dest, len); } void ArrayNative::ArrayCopyNoTypeCheck(BASEARRAYREF pSrc, unsigned int srcIndex, BASEARRAYREF pDest, unsigned int destIndex, unsigned int length) diff --git a/src/classlibnative/bcltype/arraynative.inl b/src/classlibnative/bcltype/arraynative.inl new file mode 100644 index 0000000000..b29e1a9b73 --- /dev/null +++ b/src/classlibnative/bcltype/arraynative.inl @@ -0,0 +1,329 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// +// File: ArrayNative.cpp +// + +// +// This file contains the native methods that support the Array class +// + +#ifndef _ARRAYNATIVE_INL_ +#define _ARRAYNATIVE_INL_ + +#include "gchelpers.inl" + +FORCEINLINE void InlinedForwardGCSafeCopyHelper(void *dest, const void *src, size_t len) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_COOPERATIVE; + SO_TOLERANT; + } + CONTRACTL_END; + + _ASSERTE(dest != nullptr); + _ASSERTE(src != nullptr); + _ASSERTE(dest != src); + _ASSERTE(len != 0); + + // To be able to copy forwards, the destination buffer cannot start inside the source buffer + _ASSERTE((SIZE_T)dest - (SIZE_T)src >= len); + + // Make sure everything is pointer aligned + _ASSERTE(IS_ALIGNED(dest, sizeof(SIZE_T))); + _ASSERTE(IS_ALIGNED(src, sizeof(SIZE_T))); + _ASSERTE(IS_ALIGNED(len, sizeof(SIZE_T))); + + _ASSERTE(CheckPointer(dest)); + _ASSERTE(CheckPointer(src)); + + SIZE_T *dptr = (SIZE_T *)dest; + SIZE_T *sptr = (SIZE_T *)src; + + while (true) + { + if ((len & sizeof(SIZE_T)) != 0) + { + *dptr = *sptr; + + len ^= sizeof(SIZE_T); + if (len == 0) + { + return; + } + ++sptr; + ++dptr; + } + +#if defined(_AMD64_) && (defined(_MSC_VER) || defined(__clang__)) + if ((len & (2 * sizeof(SIZE_T))) != 0) + { + __m128 v = _mm_loadu_ps((float *)sptr); + _mm_storeu_ps((float *)dptr, v); + + len ^= 2 * sizeof(SIZE_T); + if (len == 0) + { + return; + } + sptr += 2; + dptr += 2; + } + + // Align the destination pointer to 16 bytes for the next set of 16-byte copies + if (((SIZE_T)dptr & sizeof(SIZE_T)) != 0) + { + *dptr = *sptr; + + ++sptr; + ++dptr; + len -= sizeof(SIZE_T); + if (len < 4 * sizeof(SIZE_T)) + { + continue; + } + } + + // Copy 32 bytes at a time + _ASSERTE(len >= 4 * sizeof(SIZE_T)); + do + { + __m128 v = _mm_loadu_ps((float *)sptr); + _mm_store_ps((float *)dptr, v); + v = _mm_loadu_ps((float *)(sptr + 2)); + _mm_store_ps((float *)(dptr + 2), v); + + sptr += 4; + dptr += 4; + len -= 4 * sizeof(SIZE_T); + } while (len >= 4 * sizeof(SIZE_T)); + if (len == 0) + { + return; + } +#else // !(defined(_AMD64_) && (defined(_MSC_VER) || defined(__clang__))) + if ((len & (2 * sizeof(SIZE_T))) != 0) + { + // Read two values and write two values to hint the use of wide loads and stores + SIZE_T p0 = sptr[0]; + SIZE_T p1 = sptr[1]; + dptr[0] = p0; + dptr[1] = p1; + + len ^= 2 * sizeof(SIZE_T); + if (len == 0) + { + return; + } + sptr += 2; + dptr += 2; + } + + // Copy 16 (on 32-bit systems) or 32 (on 64-bit systems) bytes at a time + _ASSERTE(len >= 4 * sizeof(SIZE_T)); + while (true) + { + // Read two values and write two values to hint the use of wide loads and stores + SIZE_T p0 = sptr[0]; + SIZE_T p1 = sptr[1]; + dptr[0] = p0; + dptr[1] = p1; + p0 = sptr[2]; + p1 = sptr[3]; + dptr[2] = p0; + dptr[3] = p1; + + len -= 4 * sizeof(SIZE_T); + if (len == 0) + { + return; + } + sptr += 4; + dptr += 4; + } +#endif // defined(_AMD64_) && (defined(_MSC_VER) || defined(__clang__)) + } +} + +FORCEINLINE void InlinedBackwardGCSafeCopyHelper(void *dest, const void *src, size_t len) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_COOPERATIVE; + SO_TOLERANT; + } + CONTRACTL_END; + + _ASSERTE(dest != nullptr); + _ASSERTE(src != nullptr); + _ASSERTE(dest != src); + _ASSERTE(len != 0); + + // To be able to copy backwards, the source buffer cannot start inside the destination buffer + _ASSERTE((SIZE_T)src - (SIZE_T)dest >= len); + + // Make sure everything is pointer aligned + _ASSERTE(IS_ALIGNED(dest, sizeof(SIZE_T))); + _ASSERTE(IS_ALIGNED(src, sizeof(SIZE_T))); + _ASSERTE(IS_ALIGNED(len, sizeof(SIZE_T))); + + _ASSERTE(CheckPointer(dest)); + _ASSERTE(CheckPointer(src)); + + SIZE_T *dptr = (SIZE_T *)((BYTE *)dest + len); + SIZE_T *sptr = (SIZE_T *)((BYTE *)src + len); + + while (true) + { + if ((len & sizeof(SIZE_T)) != 0) + { + --sptr; + --dptr; + + *dptr = *sptr; + + len ^= sizeof(SIZE_T); + if (len == 0) + { + return; + } + } + +#if defined(_AMD64_) && (defined(_MSC_VER) || defined(__clang__)) + if ((len & (2 * sizeof(SIZE_T))) != 0) + { + sptr -= 2; + dptr -= 2; + + __m128 v = _mm_loadu_ps((float *)sptr); + _mm_storeu_ps((float *)dptr, v); + + len ^= 2 * sizeof(SIZE_T); + if (len == 0) + { + return; + } + } + + // Align the destination pointer to 16 bytes for the next set of 16-byte copies + if (((SIZE_T)dptr & sizeof(SIZE_T)) != 0) + { + --sptr; + --dptr; + + *dptr = *sptr; + + len -= sizeof(SIZE_T); + if (len < 4 * sizeof(SIZE_T)) + { + continue; + } + } + + // Copy 32 bytes at a time + _ASSERTE(len >= 4 * sizeof(SIZE_T)); + do + { + sptr -= 4; + dptr -= 4; + + __m128 v = _mm_loadu_ps((float *)(sptr + 2)); + _mm_store_ps((float *)(dptr + 2), v); + v = _mm_loadu_ps((float *)sptr); + _mm_store_ps((float *)dptr, v); + + len -= 4 * sizeof(SIZE_T); + } while (len >= 4 * sizeof(SIZE_T)); + if (len == 0) + { + return; + } +#else // !(defined(_AMD64_) && (defined(_MSC_VER) || defined(__clang__))) + if ((len & (2 * sizeof(SIZE_T))) != 0) + { + sptr -= 2; + dptr -= 2; + + // Read two values and write two values to hint the use of wide loads and stores + SIZE_T p1 = sptr[1]; + SIZE_T p0 = sptr[0]; + dptr[1] = p1; + dptr[0] = p0; + + len ^= 2 * sizeof(SIZE_T); + if (len == 0) + { + return; + } + } + + // Copy 16 (on 32-bit systems) or 32 (on 64-bit systems) bytes at a time + _ASSERTE(len >= 4 * sizeof(SIZE_T)); + do + { + sptr -= 4; + dptr -= 4; + + // Read two values and write two values to hint the use of wide loads and stores + SIZE_T p0 = sptr[2]; + SIZE_T p1 = sptr[3]; + dptr[2] = p0; + dptr[3] = p1; + p0 = sptr[0]; + p1 = sptr[1]; + dptr[0] = p0; + dptr[1] = p1; + + len -= 4 * sizeof(SIZE_T); + } while (len != 0); + return; +#endif // defined(_AMD64_) && (defined(_MSC_VER) || defined(__clang__)) + } +} + +FORCEINLINE void InlinedMemmoveGCRefsHelper(void *dest, const void *src, size_t len) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_COOPERATIVE; + SO_TOLERANT; + } + CONTRACTL_END; + + _ASSERTE(dest != nullptr); + _ASSERTE(src != nullptr); + _ASSERTE(dest != src); + _ASSERTE(len != 0); + + // Make sure everything is pointer aligned + _ASSERTE(IS_ALIGNED(dest, sizeof(SIZE_T))); + _ASSERTE(IS_ALIGNED(src, sizeof(SIZE_T))); + _ASSERTE(IS_ALIGNED(len, sizeof(SIZE_T))); + + _ASSERTE(CheckPointer(dest)); + _ASSERTE(CheckPointer(src)); + + GCHeapMemoryBarrier(); + + // To be able to copy forwards, the destination buffer cannot start inside the source buffer + if ((size_t)dest - (size_t)src >= len) + { + InlinedForwardGCSafeCopyHelper(dest, src, len); + } + else + { + InlinedBackwardGCSafeCopyHelper(dest, src, len); + } + + InlinedSetCardsAfterBulkCopyHelper((Object**)dest, len); +} + +#endif // !_ARRAYNATIVE_INL_ |