summaryrefslogtreecommitdiff
path: root/src/mscorlib/src/System/Buffer.cs
diff options
context:
space:
mode:
Diffstat (limited to 'src/mscorlib/src/System/Buffer.cs')
-rw-r--r--src/mscorlib/src/System/Buffer.cs485
1 files changed, 174 insertions, 311 deletions
diff --git a/src/mscorlib/src/System/Buffer.cs b/src/mscorlib/src/System/Buffer.cs
index 662b240b34..92b938df8c 100644
--- a/src/mscorlib/src/System/Buffer.cs
+++ b/src/mscorlib/src/System/Buffer.cs
@@ -2,10 +2,14 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
-namespace System {
-
+#if AMD64 || (BIT32 && !ARM)
+#define HAS_CUSTOM_BLOCKS
+#endif
+
+namespace System
+{
//Only contains static methods. Does not require serialization
-
+
using System;
using System.Runtime.CompilerServices;
using System.Runtime.ConstrainedExecution;
@@ -57,7 +61,7 @@ namespace System {
if (count == 0)
return -1;
else if (*pByte == value)
- return (int) (pByte - src);
+ return (int)(pByte - src);
count--;
pByte++;
@@ -87,7 +91,7 @@ namespace System {
if (t1 != 0)
{
// We've found a match for value, figure out which position it's in.
- int foundIndex = (int) (pByte - src);
+ int foundIndex = (int)(pByte - src);
if (pByte[0] == value)
return foundIndex;
else if (pByte[1] == value)
@@ -100,14 +104,13 @@ namespace System {
count -= 4;
pByte += 4;
-
}
// Catch any bytes that might be left at the tail of the buffer
while (count > 0)
{
if (*pByte == value)
- return (int) (pByte - src);
+ return (int)(pByte - src);
count--;
pByte++;
@@ -116,7 +119,7 @@ namespace System {
// If we don't have a match return -1;
return -1;
}
-
+
// Returns a bool to indicate if the array is of primitive data types
// or not.
[MethodImplAttribute(MethodImplOptions.InternalCall)]
@@ -139,7 +142,7 @@ namespace System {
// Is it of primitive types?
if (!IsPrimitiveTypeArray(array))
- throw new ArgumentException(Environment.GetResourceString("Arg_MustBePrimArray"), nameof(array));
+ throw new ArgumentException(SR.Arg_MustBePrimArray, nameof(array));
// Is the index in valid range of the array?
if (index < 0 || index >= _ByteLength(array))
@@ -165,7 +168,7 @@ namespace System {
// Is it of primitive types?
if (!IsPrimitiveTypeArray(array))
- throw new ArgumentException(Environment.GetResourceString("Arg_MustBePrimArray"), nameof(array));
+ throw new ArgumentException(SR.Arg_MustBePrimArray, nameof(array));
// Is the index in valid range of the array?
if (index < 0 || index >= _ByteLength(array))
@@ -175,7 +178,7 @@ namespace System {
_SetByte(array, index, value);
}
-
+
// Gets a particular byte out of the array. The array must be an
// array of primitives.
//
@@ -193,38 +196,41 @@ namespace System {
// Is it of primitive types?
if (!IsPrimitiveTypeArray(array))
- throw new ArgumentException(Environment.GetResourceString("Arg_MustBePrimArray"), nameof(array));
+ throw new ArgumentException(SR.Arg_MustBePrimArray, nameof(array));
return _ByteLength(array);
}
internal unsafe static void ZeroMemory(byte* src, long len)
{
- while(len-- > 0)
+ while (len-- > 0)
*(src + len) = 0;
}
- internal unsafe static void Memcpy(byte[] dest, int destIndex, byte* src, int srcIndex, int len) {
- Debug.Assert( (srcIndex >= 0) && (destIndex >= 0) && (len >= 0), "Index and length must be non-negative!");
+ internal unsafe static void Memcpy(byte[] dest, int destIndex, byte* src, int srcIndex, int len)
+ {
+ Debug.Assert((srcIndex >= 0) && (destIndex >= 0) && (len >= 0), "Index and length must be non-negative!");
Debug.Assert(dest.Length - destIndex >= len, "not enough bytes in dest");
// If dest has 0 elements, the fixed statement will throw an
// IndexOutOfRangeException. Special-case 0-byte copies.
- if (len==0)
+ if (len == 0)
return;
- fixed(byte* pDest = dest) {
+ fixed (byte* pDest = dest)
+ {
Memcpy(pDest + destIndex, src + srcIndex, len);
}
}
internal unsafe static void Memcpy(byte* pDest, int destIndex, byte[] src, int srcIndex, int len)
{
- Debug.Assert( (srcIndex >= 0) && (destIndex >= 0) && (len >= 0), "Index and length must be non-negative!");
+ Debug.Assert((srcIndex >= 0) && (destIndex >= 0) && (len >= 0), "Index and length must be non-negative!");
Debug.Assert(src.Length - srcIndex >= len, "not enough bytes in src");
// If dest has 0 elements, the fixed statement will throw an
// IndexOutOfRangeException. Special-case 0-byte copies.
- if (len==0)
+ if (len == 0)
return;
- fixed(byte* pSrc = src) {
+ fixed (byte* pSrc = src)
+ {
Memcpy(pDest + destIndex, pSrc + srcIndex, len);
}
}
@@ -244,7 +250,8 @@ namespace System {
internal unsafe static extern void Memcpy(byte* dest, byte* src, int len);
#else // ARM
[MethodImplAttribute(MethodImplOptions.AggressiveInlining)]
- internal unsafe static void Memcpy(byte* dest, byte* src, int len) {
+ internal unsafe static void Memcpy(byte* dest, byte* src, int len)
+ {
Debug.Assert(len >= 0, "Negative length in memcopy!");
Memmove(dest, src, (uint)len);
}
@@ -253,327 +260,175 @@ namespace System {
// This method has different signature for x64 and other platforms and is done for performance reasons.
internal unsafe static void Memmove(byte* dest, byte* src, nuint len)
{
- // P/Invoke into the native version when the buffers are overlapping and the copy needs to be performed backwards
- // This check can produce false positives for lengths greater than Int32.MaxInt. It is fine because we want to use PInvoke path for the large lengths anyway.
+#if AMD64 || (BIT32 && !ARM)
+ const nuint CopyThreshold = 2048;
+#else
+ const nuint CopyThreshold = 512;
+#endif // AMD64 || (BIT32 && !ARM)
- if ((nuint)dest - (nuint)src < len) goto PInvoke;
+ // P/Invoke into the native version when the buffers are overlapping.
- // This is portable version of memcpy. It mirrors what the hand optimized assembly versions of memcpy typically do.
- //
- // Ideally, we would just use the cpblk IL instruction here. Unfortunately, cpblk IL instruction is not as efficient as
- // possible yet and so we have this implementation here for now.
+ if (((nuint)dest - (nuint)src < len) || ((nuint)src - (nuint)dest < len)) goto PInvoke;
- // Note: It's important that this switch handles lengths at least up to 22.
- // See notes below near the main loop for why.
+ byte* srcEnd = src + len;
+ byte* destEnd = dest + len;
- // The switch will be very fast since it can be implemented using a jump
- // table in assembly. See http://stackoverflow.com/a/449297/4077294 for more info.
+ if (len <= 16) goto MCPY02;
+ if (len > 64) goto MCPY05;
- switch (len)
- {
- case 0:
- return;
- case 1:
- *dest = *src;
- return;
- case 2:
- *(short*)dest = *(short*)src;
- return;
- case 3:
- *(short*)dest = *(short*)src;
- *(dest + 2) = *(src + 2);
- return;
- case 4:
- *(int*)dest = *(int*)src;
- return;
- case 5:
- *(int*)dest = *(int*)src;
- *(dest + 4) = *(src + 4);
- return;
- case 6:
- *(int*)dest = *(int*)src;
- *(short*)(dest + 4) = *(short*)(src + 4);
- return;
- case 7:
- *(int*)dest = *(int*)src;
- *(short*)(dest + 4) = *(short*)(src + 4);
- *(dest + 6) = *(src + 6);
- return;
- case 8:
-#if BIT64
- *(long*)dest = *(long*)src;
-#else
- *(int*)dest = *(int*)src;
- *(int*)(dest + 4) = *(int*)(src + 4);
-#endif
- return;
- case 9:
-#if BIT64
- *(long*)dest = *(long*)src;
-#else
- *(int*)dest = *(int*)src;
- *(int*)(dest + 4) = *(int*)(src + 4);
-#endif
- *(dest + 8) = *(src + 8);
- return;
- case 10:
-#if BIT64
- *(long*)dest = *(long*)src;
-#else
- *(int*)dest = *(int*)src;
- *(int*)(dest + 4) = *(int*)(src + 4);
-#endif
- *(short*)(dest + 8) = *(short*)(src + 8);
- return;
- case 11:
-#if BIT64
- *(long*)dest = *(long*)src;
-#else
- *(int*)dest = *(int*)src;
- *(int*)(dest + 4) = *(int*)(src + 4);
-#endif
- *(short*)(dest + 8) = *(short*)(src + 8);
- *(dest + 10) = *(src + 10);
- return;
- case 12:
-#if BIT64
- *(long*)dest = *(long*)src;
-#else
- *(int*)dest = *(int*)src;
- *(int*)(dest + 4) = *(int*)(src + 4);
-#endif
- *(int*)(dest + 8) = *(int*)(src + 8);
- return;
- case 13:
-#if BIT64
- *(long*)dest = *(long*)src;
-#else
- *(int*)dest = *(int*)src;
- *(int*)(dest + 4) = *(int*)(src + 4);
-#endif
- *(int*)(dest + 8) = *(int*)(src + 8);
- *(dest + 12) = *(src + 12);
- return;
- case 14:
-#if BIT64
- *(long*)dest = *(long*)src;
-#else
- *(int*)dest = *(int*)src;
- *(int*)(dest + 4) = *(int*)(src + 4);
-#endif
- *(int*)(dest + 8) = *(int*)(src + 8);
- *(short*)(dest + 12) = *(short*)(src + 12);
- return;
- case 15:
-#if BIT64
- *(long*)dest = *(long*)src;
-#else
- *(int*)dest = *(int*)src;
- *(int*)(dest + 4) = *(int*)(src + 4);
-#endif
- *(int*)(dest + 8) = *(int*)(src + 8);
- *(short*)(dest + 12) = *(short*)(src + 12);
- *(dest + 14) = *(src + 14);
- return;
- case 16:
-#if BIT64
- *(long*)dest = *(long*)src;
- *(long*)(dest + 8) = *(long*)(src + 8);
-#else
- *(int*)dest = *(int*)src;
- *(int*)(dest + 4) = *(int*)(src + 4);
- *(int*)(dest + 8) = *(int*)(src + 8);
- *(int*)(dest + 12) = *(int*)(src + 12);
-#endif
- return;
- case 17:
-#if BIT64
- *(long*)dest = *(long*)src;
- *(long*)(dest + 8) = *(long*)(src + 8);
-#else
- *(int*)dest = *(int*)src;
- *(int*)(dest + 4) = *(int*)(src + 4);
- *(int*)(dest + 8) = *(int*)(src + 8);
- *(int*)(dest + 12) = *(int*)(src + 12);
-#endif
- *(dest + 16) = *(src + 16);
- return;
- case 18:
-#if BIT64
- *(long*)dest = *(long*)src;
- *(long*)(dest + 8) = *(long*)(src + 8);
+ MCPY00:
+ // Copy bytes which are multiples of 16 and leave the remainder for MCPY01 to handle.
+ Debug.Assert(len > 16 && len <= 64);
+#if HAS_CUSTOM_BLOCKS
+ *(Block16*)dest = *(Block16*)src; // [0,16]
+#elif BIT64
+ *(long*)dest = *(long*)src;
+ *(long*)(dest + 8) = *(long*)(src + 8); // [0,16]
#else
- *(int*)dest = *(int*)src;
- *(int*)(dest + 4) = *(int*)(src + 4);
- *(int*)(dest + 8) = *(int*)(src + 8);
- *(int*)(dest + 12) = *(int*)(src + 12);
+ *(int*)dest = *(int*)src;
+ *(int*)(dest + 4) = *(int*)(src + 4);
+ *(int*)(dest + 8) = *(int*)(src + 8);
+ *(int*)(dest + 12) = *(int*)(src + 12); // [0,16]
#endif
- *(short*)(dest + 16) = *(short*)(src + 16);
- return;
- case 19:
-#if BIT64
- *(long*)dest = *(long*)src;
- *(long*)(dest + 8) = *(long*)(src + 8);
+ if (len <= 32) goto MCPY01;
+#if HAS_CUSTOM_BLOCKS
+ *(Block16*)(dest + 16) = *(Block16*)(src + 16); // [0,32]
+#elif BIT64
+ *(long*)(dest + 16) = *(long*)(src + 16);
+ *(long*)(dest + 24) = *(long*)(src + 24); // [0,32]
#else
- *(int*)dest = *(int*)src;
- *(int*)(dest + 4) = *(int*)(src + 4);
- *(int*)(dest + 8) = *(int*)(src + 8);
- *(int*)(dest + 12) = *(int*)(src + 12);
+ *(int*)(dest + 16) = *(int*)(src + 16);
+ *(int*)(dest + 20) = *(int*)(src + 20);
+ *(int*)(dest + 24) = *(int*)(src + 24);
+ *(int*)(dest + 28) = *(int*)(src + 28); // [0,32]
#endif
- *(short*)(dest + 16) = *(short*)(src + 16);
- *(dest + 18) = *(src + 18);
- return;
- case 20:
-#if BIT64
- *(long*)dest = *(long*)src;
- *(long*)(dest + 8) = *(long*)(src + 8);
+ if (len <= 48) goto MCPY01;
+#if HAS_CUSTOM_BLOCKS
+ *(Block16*)(dest + 32) = *(Block16*)(src + 32); // [0,48]
+#elif BIT64
+ *(long*)(dest + 32) = *(long*)(src + 32);
+ *(long*)(dest + 40) = *(long*)(src + 40); // [0,48]
#else
- *(int*)dest = *(int*)src;
- *(int*)(dest + 4) = *(int*)(src + 4);
- *(int*)(dest + 8) = *(int*)(src + 8);
- *(int*)(dest + 12) = *(int*)(src + 12);
+ *(int*)(dest + 32) = *(int*)(src + 32);
+ *(int*)(dest + 36) = *(int*)(src + 36);
+ *(int*)(dest + 40) = *(int*)(src + 40);
+ *(int*)(dest + 44) = *(int*)(src + 44); // [0,48]
#endif
- *(int*)(dest + 16) = *(int*)(src + 16);
- return;
- case 21:
-#if BIT64
- *(long*)dest = *(long*)src;
- *(long*)(dest + 8) = *(long*)(src + 8);
+
+ MCPY01:
+ // Unconditionally copy the last 16 bytes using destEnd and srcEnd and return.
+ Debug.Assert(len > 16 && len <= 64);
+#if HAS_CUSTOM_BLOCKS
+ *(Block16*)(destEnd - 16) = *(Block16*)(srcEnd - 16);
+#elif BIT64
+ *(long*)(destEnd - 16) = *(long*)(srcEnd - 16);
+ *(long*)(destEnd - 8) = *(long*)(srcEnd - 8);
#else
- *(int*)dest = *(int*)src;
- *(int*)(dest + 4) = *(int*)(src + 4);
- *(int*)(dest + 8) = *(int*)(src + 8);
- *(int*)(dest + 12) = *(int*)(src + 12);
+ *(int*)(destEnd - 16) = *(int*)(srcEnd - 16);
+ *(int*)(destEnd - 12) = *(int*)(srcEnd - 12);
+ *(int*)(destEnd - 8) = *(int*)(srcEnd - 8);
+ *(int*)(destEnd - 4) = *(int*)(srcEnd - 4);
#endif
- *(int*)(dest + 16) = *(int*)(src + 16);
- *(dest + 20) = *(src + 20);
- return;
- case 22:
+ return;
+
+ MCPY02:
+ // Copy the first 8 bytes and then unconditionally copy the last 8 bytes and return.
+ if ((len & 24) == 0) goto MCPY03;
+ Debug.Assert(len >= 8 && len <= 16);
#if BIT64
- *(long*)dest = *(long*)src;
- *(long*)(dest + 8) = *(long*)(src + 8);
+ *(long*)dest = *(long*)src;
+ *(long*)(destEnd - 8) = *(long*)(srcEnd - 8);
#else
- *(int*)dest = *(int*)src;
- *(int*)(dest + 4) = *(int*)(src + 4);
- *(int*)(dest + 8) = *(int*)(src + 8);
- *(int*)(dest + 12) = *(int*)(src + 12);
+ *(int*)dest = *(int*)src;
+ *(int*)(dest + 4) = *(int*)(src + 4);
+ *(int*)(destEnd - 8) = *(int*)(srcEnd - 8);
+ *(int*)(destEnd - 4) = *(int*)(srcEnd - 4);
#endif
- *(int*)(dest + 16) = *(int*)(src + 16);
- *(short*)(dest + 20) = *(short*)(src + 20);
- return;
- }
-
- // P/Invoke into the native version for large lengths
- if (len >= 512) goto PInvoke;
-
- nuint i = 0; // byte offset at which we're copying
+ return;
- if (((int)dest & 3) != 0)
- {
- if (((int)dest & 1) != 0)
- {
- *(dest + i) = *(src + i);
- i += 1;
- if (((int)dest & 2) != 0)
- goto IntAligned;
- }
- *(short*)(dest + i) = *(short*)(src + i);
- i += 2;
- }
+ MCPY03:
+ // Copy the first 4 bytes and then unconditionally copy the last 4 bytes and return.
+ if ((len & 4) == 0) goto MCPY04;
+ Debug.Assert(len >= 4 && len < 8);
+ *(int*)dest = *(int*)src;
+ *(int*)(destEnd - 4) = *(int*)(srcEnd - 4);
+ return;
- IntAligned:
+ MCPY04:
+ // Copy the first byte. For pending bytes, do an unconditionally copy of the last 2 bytes and return.
+ Debug.Assert(len < 4);
+ if (len == 0) return;
+ *dest = *src;
+ if ((len & 2) == 0) return;
+ *(short*)(destEnd - 2) = *(short*)(srcEnd - 2);
+ return;
-#if BIT64
- // On 64-bit IntPtr.Size == 8, so we want to advance to the next 8-aligned address. If
- // (int)dest % 8 is 0, 5, 6, or 7, we will already have advanced by 0, 3, 2, or 1
- // bytes to the next aligned address (respectively), so do nothing. On the other hand,
- // if it is 1, 2, 3, or 4 we will want to copy-and-advance another 4 bytes until
- // we're aligned.
- // The thing 1, 2, 3, and 4 have in common that the others don't is that if you
- // subtract one from them, their 3rd lsb will not be set. Hence, the below check.
-
- if ((((int)dest - 1) & 4) == 0)
+ MCPY05:
+ // PInvoke to the native version when the copy length exceeds the threshold.
+ if (len > CopyThreshold)
{
- *(int*)(dest + i) = *(int*)(src + i);
- i += 4;
+ goto PInvoke;
}
-#endif // BIT64
-
- nuint end = len - 16;
- len -= i; // lower 4 bits of len represent how many bytes are left *after* the unrolled loop
-
- // We know due to the above switch-case that this loop will always run 1 iteration; max
- // bytes we copy before checking is 23 (7 to align the pointers, 16 for 1 iteration) so
- // the switch handles lengths 0-22.
- Debug.Assert(end >= 7 && i <= end);
-
- // This is separated out into a different variable, so the i + 16 addition can be
- // performed at the start of the pipeline and the loop condition does not have
- // a dependency on the writes.
- nuint counter;
-
- do
- {
- counter = i + 16;
- // This loop looks very costly since there appear to be a bunch of temporary values
- // being created with the adds, but the jit (for x86 anyways) will convert each of
- // these to use memory addressing operands.
-
- // So the only cost is a bit of code size, which is made up for by the fact that
- // we save on writes to dest/src.
-
-#if BIT64
- *(long*)(dest + i) = *(long*)(src + i);
- *(long*)(dest + i + 8) = *(long*)(src + i + 8);
+ // Copy 64-bytes at a time until the remainder is less than 64.
+ // If remainder is greater than 16 bytes, then jump to MCPY00. Otherwise, unconditionally copy the last 16 bytes and return.
+ Debug.Assert(len > 64 && len <= CopyThreshold);
+ nuint n = len >> 6;
+
+ MCPY06:
+#if HAS_CUSTOM_BLOCKS
+ *(Block64*)dest = *(Block64*)src;
+#elif BIT64
+ *(long*)dest = *(long*)src;
+ *(long*)(dest + 8) = *(long*)(src + 8);
+ *(long*)(dest + 16) = *(long*)(src + 16);
+ *(long*)(dest + 24) = *(long*)(src + 24);
+ *(long*)(dest + 32) = *(long*)(src + 32);
+ *(long*)(dest + 40) = *(long*)(src + 40);
+ *(long*)(dest + 48) = *(long*)(src + 48);
+ *(long*)(dest + 56) = *(long*)(src + 56);
#else
- *(int*)(dest + i) = *(int*)(src + i);
- *(int*)(dest + i + 4) = *(int*)(src + i + 4);
- *(int*)(dest + i + 8) = *(int*)(src + i + 8);
- *(int*)(dest + i + 12) = *(int*)(src + i + 12);
+ *(int*)dest = *(int*)src;
+ *(int*)(dest + 4) = *(int*)(src + 4);
+ *(int*)(dest + 8) = *(int*)(src + 8);
+ *(int*)(dest + 12) = *(int*)(src + 12);
+ *(int*)(dest + 16) = *(int*)(src + 16);
+ *(int*)(dest + 20) = *(int*)(src + 20);
+ *(int*)(dest + 24) = *(int*)(src + 24);
+ *(int*)(dest + 28) = *(int*)(src + 28);
+ *(int*)(dest + 32) = *(int*)(src + 32);
+ *(int*)(dest + 36) = *(int*)(src + 36);
+ *(int*)(dest + 40) = *(int*)(src + 40);
+ *(int*)(dest + 44) = *(int*)(src + 44);
+ *(int*)(dest + 48) = *(int*)(src + 48);
+ *(int*)(dest + 52) = *(int*)(src + 52);
+ *(int*)(dest + 56) = *(int*)(src + 56);
+ *(int*)(dest + 60) = *(int*)(src + 60);
#endif
-
- i = counter;
-
- // See notes above for why this wasn't used instead
- // i += 16;
- }
- while (counter <= end);
-
- if ((len & 8) != 0)
- {
-#if BIT64
- *(long*)(dest + i) = *(long*)(src + i);
+ dest += 64;
+ src += 64;
+ n--;
+ if (n != 0) goto MCPY06;
+
+ len %= 64;
+ if (len > 16) goto MCPY00;
+#if HAS_CUSTOM_BLOCKS
+ *(Block16*)(destEnd - 16) = *(Block16*)(srcEnd - 16);
+#elif BIT64
+ *(long*)(destEnd - 16) = *(long*)(srcEnd - 16);
+ *(long*)(destEnd - 8) = *(long*)(srcEnd - 8);
#else
- *(int*)(dest + i) = *(int*)(src + i);
- *(int*)(dest + i + 4) = *(int*)(src + i + 4);
+ *(int*)(destEnd - 16) = *(int*)(srcEnd - 16);
+ *(int*)(destEnd - 12) = *(int*)(srcEnd - 12);
+ *(int*)(destEnd - 8) = *(int*)(srcEnd - 8);
+ *(int*)(destEnd - 4) = *(int*)(srcEnd - 4);
#endif
- i += 8;
- }
- if ((len & 4) != 0)
- {
- *(int*)(dest + i) = *(int*)(src + i);
- i += 4;
- }
- if ((len & 2) != 0)
- {
- *(short*)(dest + i) = *(short*)(src + i);
- i += 2;
- }
- if ((len & 1) != 0)
- {
- *(dest + i) = *(src + i);
- // We're not using i after this, so not needed
- // i += 1;
- }
-
return;
PInvoke:
_Memmove(dest, src, len);
-
}
-
+
// Non-inlinable wrapper around the QCall that avoids poluting the fast path
// with P/Invoke prolog/epilog.
[MethodImplAttribute(MethodImplOptions.NoInlining)]
@@ -616,5 +471,13 @@ namespace System {
Memmove((byte*)destination, (byte*)source, checked((uint)sourceBytesToCopy));
#endif // BIT64
}
+
+#if HAS_CUSTOM_BLOCKS
+ [StructLayout(LayoutKind.Sequential, Size = 16)]
+ private struct Block16 { }
+
+ [StructLayout(LayoutKind.Sequential, Size = 64)]
+ private struct Block64 { }
+#endif // HAS_CUSTOM_BLOCKS
}
}