summaryrefslogtreecommitdiff
path: root/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf8Utility.Transcoding.cs
diff options
context:
space:
mode:
Diffstat (limited to 'src/System.Private.CoreLib/shared/System/Text/Unicode/Utf8Utility.Transcoding.cs')
-rw-r--r--src/System.Private.CoreLib/shared/System/Text/Unicode/Utf8Utility.Transcoding.cs106
1 files changed, 56 insertions, 50 deletions
diff --git a/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf8Utility.Transcoding.cs b/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf8Utility.Transcoding.cs
index 126974c892..f050248601 100644
--- a/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf8Utility.Transcoding.cs
+++ b/src/System.Private.CoreLib/shared/System/Text/Unicode/Utf8Utility.Transcoding.cs
@@ -7,6 +7,7 @@ using System.Buffers.Binary;
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
+using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using Internal.Runtime.CompilerServices;
@@ -78,7 +79,8 @@ namespace System.Text.Unicode
byte* pLastBufferPosProcessed = null; // used for invariant checking in debug builds
#endif
- while (pInputBuffer <= pFinalPosWhereCanReadDWordFromInputBuffer)
+ Debug.Assert(pInputBuffer <= pFinalPosWhereCanReadDWordFromInputBuffer);
+ do
{
// Read 32 bits at a time. This is enough to hold any possible UTF8-encoded scalar.
@@ -101,7 +103,7 @@ namespace System.Text.Unicode
goto ProcessRemainingBytesSlow; // running out of space, but may be able to write some data
}
- Widen4AsciiBytesToCharsAndWrite(ref *pOutputBuffer, thisDWord);
+ ASCIIUtility.WidenFourAsciiBytesToUtf16AndWriteToBuffer(ref *pOutputBuffer, thisDWord);
pInputBuffer += 4;
pOutputBuffer += 4;
outputCharsRemaining -= 4;
@@ -127,8 +129,8 @@ namespace System.Text.Unicode
pInputBuffer += 8;
- Widen4AsciiBytesToCharsAndWrite(ref pOutputBuffer[0], thisDWord);
- Widen4AsciiBytesToCharsAndWrite(ref pOutputBuffer[4], secondDWord);
+ ASCIIUtility.WidenFourAsciiBytesToUtf16AndWriteToBuffer(ref pOutputBuffer[0], thisDWord);
+ ASCIIUtility.WidenFourAsciiBytesToUtf16AndWriteToBuffer(ref pOutputBuffer[4], secondDWord);
pOutputBuffer += 8;
}
@@ -143,7 +145,7 @@ namespace System.Text.Unicode
{
// The first DWORD contained all-ASCII bytes, so expand it.
- Widen4AsciiBytesToCharsAndWrite(ref *pOutputBuffer, thisDWord);
+ ASCIIUtility.WidenFourAsciiBytesToUtf16AndWriteToBuffer(ref *pOutputBuffer, thisDWord);
// continue the outer loop from the second DWORD
@@ -487,12 +489,10 @@ namespace System.Text.Unicode
}
// As an optimization, on compatible platforms check if a second three-byte sequence immediately
- // follows the one we just read, and if so use BSWAP and BMI2 to extract them together.
+ // follows the one we just read, and if so extract them together.
- if (Bmi2.X64.IsSupported)
+ if (BitConverter.IsLittleEndian)
{
- Debug.Assert(BitConverter.IsLittleEndian, "BMI2 requires little-endian.");
-
// First, check that the leftover byte from the original DWORD is in the range [ E0..EF ], which
// would indicate the potential start of a second three-byte sequence.
@@ -504,7 +504,7 @@ namespace System.Text.Unicode
if (outputCharsRemaining > 1 && (nint)(void*)Unsafe.ByteOffset(ref *pInputBuffer, ref *pFinalPosWhereCanReadDWordFromInputBuffer) >= 3)
{
- // We're going to attempt to read a second 3-byte sequence and write them both out simultaneously using PEXT.
+ // We're going to attempt to read a second 3-byte sequence and write them both out one after the other.
// We need to check the continuation bit mask on the remaining two bytes (and we may as well check the leading
// byte mask again since it's free), then perform overlong + surrogate checks. If the overlong or surrogate
// checks fail, we'll fall through to the remainder of the logic which will transcode the original valid
@@ -517,14 +517,8 @@ namespace System.Text.Unicode
&& ((secondDWord & 0x0000_200Fu) != 0)
&& (((secondDWord - 0x0000_200Du) & 0x0000_200Fu) != 0))
{
- // combinedQWord = [ 1110ZZZZ 10YYYYYY 10XXXXXX ######## | 1110zzzz 10yyyyyy 10xxxxxx ######## ], where xyz are from first DWORD, XYZ are from second DWORD
- ulong combinedQWord = ((ulong)BinaryPrimitives.ReverseEndianness(secondDWord) << 32) | BinaryPrimitives.ReverseEndianness(thisDWord);
- thisDWord = secondDWord; // store this value in the correct local for the ASCII drain logic
-
- // extractedQWord = [ 00000000 00000000 00000000 00000000 | ZZZZYYYYYYXXXXXX zzzzyyyyyyxxxxxx ]
- ulong extractedQWord = Bmi2.X64.ParallelBitExtract(combinedQWord, 0x0F3F3F00_0F3F3F00ul);
-
- Unsafe.WriteUnaligned<uint>(pOutputBuffer, (uint)extractedQWord);
+ pOutputBuffer[0] = (char)ExtractCharFromFirstThreeByteSequence(thisDWord);
+ pOutputBuffer[1] = (char)ExtractCharFromFirstThreeByteSequence(secondDWord);
pInputBuffer += 6;
pOutputBuffer += 2;
outputCharsRemaining -= 2;
@@ -658,7 +652,7 @@ namespace System.Text.Unicode
continue; // go back to beginning of loop for processing
}
- }
+ } while (pInputBuffer <= pFinalPosWhereCanReadDWordFromInputBuffer);
ProcessRemainingBytesSlow:
inputLength = (int)(void*)Unsafe.ByteOffset(ref *pInputBuffer, ref *pFinalPosWhereCanReadDWordFromInputBuffer) + 4;
@@ -900,6 +894,16 @@ namespace System.Text.Unicode
char* pFinalPosWhereCanReadDWordFromInputBuffer = pInputBuffer + (uint)inputLength - CharsPerDWord;
+ // We have paths for SSE4.1 vectorization inside the inner loop. Since the below
+ // vector is only used in those code paths, we leave it uninitialized if SSE4.1
+ // is not enabled.
+
+ Vector128<short> nonAsciiUtf16DataMask = default;
+ if (Sse41.X64.IsSupported)
+ {
+ nonAsciiUtf16DataMask = Vector128.Create(unchecked((short)0xFF80)); // mask of non-ASCII bits in a UTF-16 char
+ }
+
// Begin the main loop.
#if DEBUG
@@ -908,7 +912,8 @@ namespace System.Text.Unicode
uint thisDWord;
- while (pInputBuffer <= pFinalPosWhereCanReadDWordFromInputBuffer)
+ Debug.Assert(pInputBuffer <= pFinalPosWhereCanReadDWordFromInputBuffer);
+ do
{
// Read 32 bits at a time. This is enough to hold any possible UTF16-encoded scalar.
@@ -952,27 +957,26 @@ namespace System.Text.Unicode
uint inputCharsRemaining = (uint)(pFinalPosWhereCanReadDWordFromInputBuffer - pInputBuffer) + 2;
uint minElementsRemaining = (uint)Math.Min(inputCharsRemaining, outputBytesRemaining);
- if (Bmi2.X64.IsSupported)
+ if (Sse41.X64.IsSupported)
{
- Debug.Assert(BitConverter.IsLittleEndian, "BMI2 requires little-endian.");
- const ulong PEXT_MASK = 0x00FF00FF_00FF00FFul;
+ Debug.Assert(BitConverter.IsLittleEndian, "SSE41 requires little-endian.");
// Try reading and writing 8 elements per iteration.
uint maxIters = minElementsRemaining / 8;
- ulong firstQWord, secondQWord;
+ ulong possibleNonAsciiQWord;
int i;
+ Vector128<short> utf16Data;
for (i = 0; (uint)i < maxIters; i++)
{
- firstQWord = Unsafe.ReadUnaligned<ulong>(pInputBuffer);
- secondQWord = Unsafe.ReadUnaligned<ulong>(pInputBuffer + 4);
-
- if (!Utf16Utility.AllCharsInUInt64AreAscii(firstQWord | secondQWord))
+ utf16Data = Unsafe.ReadUnaligned<Vector128<short>>(pInputBuffer);
+ if (!Sse41.TestZ(utf16Data, nonAsciiUtf16DataMask))
{
- goto LoopTerminatedDueToNonAsciiData;
+ goto LoopTerminatedDueToNonAsciiDataInVectorLocal;
}
- Unsafe.WriteUnaligned<uint>(pOutputBuffer, (uint)Bmi2.X64.ParallelBitExtract(firstQWord, PEXT_MASK));
- Unsafe.WriteUnaligned<uint>(pOutputBuffer + 4, (uint)Bmi2.X64.ParallelBitExtract(secondQWord, PEXT_MASK));
+ // narrow and write
+
+ Sse2.StoreScalar((ulong*)pOutputBuffer /* unaligned */, Sse2.PackUnsignedSaturate(utf16Data, utf16Data).AsUInt64());
pInputBuffer += 8;
pOutputBuffer += 8;
@@ -984,14 +988,14 @@ namespace System.Text.Unicode
if ((minElementsRemaining & 4) != 0)
{
- secondQWord = Unsafe.ReadUnaligned<ulong>(pInputBuffer);
-
- if (!Utf16Utility.AllCharsInUInt64AreAscii(secondQWord))
+ possibleNonAsciiQWord = Unsafe.ReadUnaligned<ulong>(pInputBuffer);
+ if (!Utf16Utility.AllCharsInUInt64AreAscii(possibleNonAsciiQWord))
{
- goto LoopTerminatedDueToNonAsciiDataInSecondQWord;
+ goto LoopTerminatedDueToNonAsciiDataInPossibleNonAsciiQWordLocal;
}
- Unsafe.WriteUnaligned<uint>(pOutputBuffer, (uint)Bmi2.X64.ParallelBitExtract(secondQWord, PEXT_MASK));
+ utf16Data = Vector128.CreateScalarUnsafe(possibleNonAsciiQWord).AsInt16();
+ Unsafe.WriteUnaligned<uint>(pOutputBuffer, Sse2.ConvertToUInt32(Sse2.PackUnsignedSaturate(utf16Data, utf16Data).AsUInt32()));
pInputBuffer += 4;
pOutputBuffer += 4;
@@ -1000,29 +1004,31 @@ namespace System.Text.Unicode
continue; // Go back to beginning of main loop, read data, check for ASCII
- LoopTerminatedDueToNonAsciiData:
+ LoopTerminatedDueToNonAsciiDataInVectorLocal:
outputBytesRemaining -= 8 * i;
+ possibleNonAsciiQWord = Sse2.X64.ConvertToUInt64(utf16Data.AsUInt64());
- // First, see if we can drain any ASCII data from the first QWORD.
+ // Temporarily set 'possibleNonAsciiQWord' to be the low 64 bits of the vector,
+ // then check whether it's all-ASCII. If so, narrow and write to the destination
+ // buffer. Since we know that either the high 64 bits or the low 64 bits of the
+ // vector contains non-ASCII data, by the end of the following block the
+ // 'possibleNonAsciiQWord' local is guaranteed to contain the non-ASCII segment.
- if (Utf16Utility.AllCharsInUInt64AreAscii(firstQWord))
+ if (Utf16Utility.AllCharsInUInt64AreAscii(possibleNonAsciiQWord)) // all chars in first QWORD are ASCII
{
- Unsafe.WriteUnaligned<uint>(pOutputBuffer, (uint)Bmi2.X64.ParallelBitExtract(firstQWord, PEXT_MASK));
+ Unsafe.WriteUnaligned<uint>(pOutputBuffer, Sse2.ConvertToUInt32(Sse2.PackUnsignedSaturate(utf16Data, utf16Data).AsUInt32()));
pInputBuffer += 4;
pOutputBuffer += 4;
outputBytesRemaining -= 4;
- }
- else
- {
- secondQWord = firstQWord;
+ possibleNonAsciiQWord = utf16Data.AsUInt64().GetElement(1);
}
- LoopTerminatedDueToNonAsciiDataInSecondQWord:
+ LoopTerminatedDueToNonAsciiDataInPossibleNonAsciiQWordLocal:
- Debug.Assert(!Utf16Utility.AllCharsInUInt64AreAscii(secondQWord)); // this condition should've been checked earlier
+ Debug.Assert(!Utf16Utility.AllCharsInUInt64AreAscii(possibleNonAsciiQWord)); // this condition should've been checked earlier
- thisDWord = (uint)secondQWord;
+ thisDWord = (uint)possibleNonAsciiQWord;
if (Utf16Utility.AllCharsInUInt32AreAscii(thisDWord))
{
// [ 00000000 0bbbbbbb | 00000000 0aaaaaaa ] -> [ 00000000 0bbbbbbb | 0bbbbbbb 0aaaaaaa ]
@@ -1030,14 +1036,14 @@ namespace System.Text.Unicode
pInputBuffer += 2;
pOutputBuffer += 2;
outputBytesRemaining -= 2;
- thisDWord = (uint)(secondQWord >> 32);
+ thisDWord = (uint)(possibleNonAsciiQWord >> 32);
}
goto AfterReadDWordSkipAllCharsAsciiCheck;
}
else
{
- // Can't use BMI2 x64, so we'll only read and write 4 elements per iteration.
+ // Can't use SSE41 x64, so we'll only read and write 4 elements per iteration.
uint maxIters = minElementsRemaining / 4;
uint secondDWord;
int i;
@@ -1358,7 +1364,7 @@ namespace System.Text.Unicode
}
goto Error; // an ill-formed surrogate sequence: high not followed by low, or low not preceded by high
- }
+ } while (pInputBuffer <= pFinalPosWhereCanReadDWordFromInputBuffer);
ProcessNextCharAndFinish:
inputLength = (int)(pFinalPosWhereCanReadDWordFromInputBuffer - pInputBuffer) + CharsPerDWord;