summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorLevi Broderick <GrabYourPitchforks@users.noreply.github.com>2019-03-10 21:40:27 -0700
committerGitHub <noreply@github.com>2019-03-10 21:40:27 -0700
commit43a5159d39bd52195c5095da4006183f791c696b (patch)
treea7a4161a8e280fc68927bee1511002ec7a7ea550 /src
parent60773aa089840f5e944d3f72969cde0561b9135b (diff)
downloadcoreclr-43a5159d39bd52195c5095da4006183f791c696b.tar.gz
coreclr-43a5159d39bd52195c5095da4006183f791c696b.tar.bz2
coreclr-43a5159d39bd52195c5095da4006183f791c696b.zip
Refactor Encoding to split fast-path and fallback logic (#23098)
This refactoring is limited to ASCIIEncoding at the moment, but it can easily be applied to UTF-8 / UTF-16 / UTF-32. High-level changes: - Fallback logic has been split from the fast-path, improving performance of GetBytes and similar routines. - All of the plumbing of when to invoke the fallback logic and how to manage leftover data has been moved into the base class. - Almost all of the logic except for the fast-path is now written in terms of verifiable code (Span and ReadOnlySpan). - Minor bug fixes in EncoderNLS.Convert (see https://github.com/dotnet/coreclr/issues/23020).
Diffstat (limited to 'src')
-rw-r--r--src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems2
-rw-r--r--src/System.Private.CoreLib/shared/System/String.cs4
-rw-r--r--src/System.Private.CoreLib/shared/System/Text/ASCIIEncoding.cs1128
-rw-r--r--src/System.Private.CoreLib/shared/System/Text/ASCIIUtility.cs76
-rw-r--r--src/System.Private.CoreLib/shared/System/Text/DecoderFallback.cs104
-rw-r--r--src/System.Private.CoreLib/shared/System/Text/DecoderNLS.cs198
-rw-r--r--src/System.Private.CoreLib/shared/System/Text/EncoderFallback.cs185
-rw-r--r--src/System.Private.CoreLib/shared/System/Text/EncoderNLS.cs167
-rw-r--r--src/System.Private.CoreLib/shared/System/Text/Encoding.Internal.cs1277
-rw-r--r--src/System.Private.CoreLib/shared/System/Text/Encoding.cs50
-rw-r--r--src/System.Private.CoreLib/shared/System/Text/EncodingNLS.cs2
-rw-r--r--src/System.Private.CoreLib/shared/System/ThrowHelper.cs27
12 files changed, 2562 insertions, 658 deletions
diff --git a/src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems b/src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems
index 71459faf79..ce0f102587 100644
--- a/src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems
+++ b/src/System.Private.CoreLib/shared/System.Private.CoreLib.Shared.projitems
@@ -761,6 +761,7 @@
<Compile Include="$(MSBuildThisFileDirectory)System\StringSplitOptions.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SystemException.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\ASCIIEncoding.cs" />
+ <Compile Include="$(MSBuildThisFileDirectory)System\Text\ASCIIUtility.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\StringBuilderCache.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\CodePageDataItem.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Decoder.cs" />
@@ -776,6 +777,7 @@
<Compile Include="$(MSBuildThisFileDirectory)System\Text\EncoderFallback.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\EncoderReplacementFallback.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\Encoding.cs" />
+ <Compile Include="$(MSBuildThisFileDirectory)System\Text\Encoding.Internal.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\EncodingData.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\EncodingInfo.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Text\EncodingNLS.cs" />
diff --git a/src/System.Private.CoreLib/shared/System/String.cs b/src/System.Private.CoreLib/shared/System/String.cs
index 22f830a0e4..49afbc8c8c 100644
--- a/src/System.Private.CoreLib/shared/System/String.cs
+++ b/src/System.Private.CoreLib/shared/System/String.cs
@@ -480,7 +480,7 @@ namespace System
Debug.Assert(byteLength >= 0);
// Get our string length
- int stringLength = encoding.GetCharCount(bytes, byteLength, null);
+ int stringLength = encoding.GetCharCount(bytes, byteLength);
Debug.Assert(stringLength >= 0, "stringLength >= 0");
// They gave us an empty string if they needed one
@@ -491,7 +491,7 @@ namespace System
string s = FastAllocateString(stringLength);
fixed (char* pTempChars = &s._firstChar)
{
- int doubleCheck = encoding.GetChars(bytes, byteLength, pTempChars, stringLength, null);
+ int doubleCheck = encoding.GetChars(bytes, byteLength, pTempChars, stringLength);
Debug.Assert(stringLength == doubleCheck,
"Expected encoding.GetChars to return same length as encoding.GetCharCount");
}
diff --git a/src/System.Private.CoreLib/shared/System/Text/ASCIIEncoding.cs b/src/System.Private.CoreLib/shared/System/Text/ASCIIEncoding.cs
index 217d934677..8cf1f57ccb 100644
--- a/src/System.Private.CoreLib/shared/System/Text/ASCIIEncoding.cs
+++ b/src/System.Private.CoreLib/shared/System/Text/ASCIIEncoding.cs
@@ -2,8 +2,9 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
-using System;
+using System.Buffers;
using System.Diagnostics;
+using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace System.Text
@@ -18,10 +19,30 @@ namespace System.Text
// Note: IsAlwaysNormalized remains false because 1/2 the code points are unassigned, so they'd
// use fallbacks, and we cannot guarantee that fallbacks are normalized.
- public class ASCIIEncoding : Encoding
+ public partial class ASCIIEncoding : Encoding
{
- // Allow for devirtualization (see https://github.com/dotnet/coreclr/pull/9230)
- internal sealed class ASCIIEncodingSealed : ASCIIEncoding { }
+ // This specialized sealed type has two benefits:
+ // 1) it allows for devirtualization (see https://github.com/dotnet/coreclr/pull/9230), and
+ // 2) it allows us to provide highly optimized implementations of certain routines because
+ // we can make assumptions about the fallback mechanisms in use (in particular, always
+ // replace with "?").
+ //
+ // (We don't take advantage of #2 yet, but we can do so in the future because the implementation
+ // of cloning below allows us to make assumptions about the behaviors of the sealed type.)
+ internal sealed class ASCIIEncodingSealed : ASCIIEncoding
+ {
+ public override object Clone()
+ {
+ // The base implementation of Encoding.Clone calls object.MemberwiseClone and marks the new object mutable.
+ // We don't want to do this because it violates the invariants we have set for the sealed type.
+ // Instead, we'll create a new instance of the base ASCIIEncoding type and mark it mutable.
+
+ return new ASCIIEncoding()
+ {
+ IsReadOnly = false
+ };
+ }
+ }
// Used by Encoding.ASCII for lazy initialization
// The initialization code will not be run until a static member of the class is referenced
@@ -58,22 +79,26 @@ namespace System.Text
public override unsafe int GetByteCount(char[] chars, int index, int count)
{
// Validate input parameters
- if (chars == null)
- throw new ArgumentNullException(nameof(chars), SR.ArgumentNull_Array);
- if (index < 0 || count < 0)
- throw new ArgumentOutOfRangeException((index < 0 ? nameof(index) : nameof(count)), SR.ArgumentOutOfRange_NeedNonNegNum);
+ if (chars is null)
+ {
+ ThrowHelper.ThrowArgumentNullException(ExceptionArgument.chars, ExceptionResource.ArgumentNull_Array);
+ }
- if (chars.Length - index < count)
- throw new ArgumentOutOfRangeException(nameof(chars), SR.ArgumentOutOfRange_IndexCountBuffer);
+ if ((index | count) < 0)
+ {
+ ThrowHelper.ThrowArgumentOutOfRangeException((index < 0) ? ExceptionArgument.index : ExceptionArgument.count, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
+ }
- // If no input, return 0, avoid fixed empty array problem
- if (count == 0)
- return 0;
+ if (chars.Length - index < count)
+ {
+ ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.chars, ExceptionResource.ArgumentOutOfRange_IndexCountBuffer);
+ }
- // Just call the pointer version
fixed (char* pChars = chars)
- return GetByteCount(pChars + index, count, null);
+ {
+ return GetByteCountCommon(pChars + index, count);
+ }
}
// All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
@@ -83,12 +108,17 @@ namespace System.Text
public override unsafe int GetByteCount(string chars)
{
- // Validate input
- if (chars==null)
- throw new ArgumentNullException(nameof(chars));
+ // Validate input parameters
+
+ if (chars is null)
+ {
+ ThrowHelper.ThrowArgumentNullException(ExceptionArgument.chars);
+ }
fixed (char* pChars = chars)
- return GetByteCount(pChars, chars.Length, null);
+ {
+ return GetByteCountCommon(pChars, chars.Length);
+ }
}
// All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
@@ -99,22 +129,81 @@ namespace System.Text
public override unsafe int GetByteCount(char* chars, int count)
{
// Validate Parameters
+
if (chars == null)
- throw new ArgumentNullException(nameof(chars), SR.ArgumentNull_Array);
+ {
+ ThrowHelper.ThrowArgumentNullException(ExceptionArgument.chars);
+ }
if (count < 0)
- throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
+ {
+ ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.count, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
+ }
- // Call it with empty encoder
- return GetByteCount(chars, count, null);
+ return GetByteCountCommon(chars, count);
}
public override unsafe int GetByteCount(ReadOnlySpan<char> chars)
{
- fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
+ // It's ok for us to pass null pointers down to the workhorse below.
+
+ fixed (char* charsPtr = &MemoryMarshal.GetReference(chars))
+ {
+ return GetByteCountCommon(charsPtr, chars.Length);
+ }
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private unsafe int GetByteCountCommon(char* pChars, int charCount)
+ {
+ // Common helper method for all non-EncoderNLS entry points to GetByteCount.
+ // A modification of this method should be copied in to each of the supported encodings: ASCII, UTF8, UTF16, UTF32.
+
+ Debug.Assert(charCount >= 0, "Caller should't specify negative length buffer.");
+ Debug.Assert(pChars != null || charCount == 0, "Input pointer shouldn't be null if non-zero length specified.");
+
+ // First call into the fast path.
+
+ int totalByteCount = GetByteCountFast(pChars, charCount, EncoderFallback, out int charsConsumed);
+
+ if (charsConsumed != charCount)
+ {
+ // If there's still data remaining in the source buffer, go down the fallback path.
+ // We need to check for integer overflow since the fallback could change the required
+ // output count in unexpected ways.
+
+ totalByteCount += GetByteCountWithFallback(pChars, charCount, charsConsumed);
+ if (totalByteCount < 0)
+ {
+ ThrowConversionOverflow();
+ }
+ }
+
+ return totalByteCount;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)] // called directly by GetByteCountCommon
+ private protected sealed override unsafe int GetByteCountFast(char* pChars, int charsLength, EncoderFallback fallback, out int charsConsumed)
+ {
+ // First: Can we short-circuit the entire calculation?
+ // If an EncoderReplacementFallback is in use, all non-ASCII chars
+ // (including surrogate halves) are replaced with the default string.
+ // If the default string consists of a single ASCII value, then we
+ // know there's a 1:1 char->byte transcoding in all cases.
+
+ int byteCount = charsLength;
+
+ if (!(fallback is EncoderReplacementFallback replacementFallback
+ && replacementFallback.MaxCharCount == 1
+ && replacementFallback.DefaultString[0] <= 0x7F))
{
- return GetByteCount(charsPtr, chars.Length, encoder: null);
+ // Unrecognized fallback mechanism - count chars manually.
+
+ byteCount = (int)ASCIIUtility.GetIndexOfFirstNonAsciiChar(pChars, (uint)charsLength);
}
+
+ charsConsumed = byteCount;
+ return byteCount;
}
// Parent method is safe.
@@ -125,22 +214,37 @@ namespace System.Text
public override unsafe int GetBytes(string chars, int charIndex, int charCount,
byte[] bytes, int byteIndex)
{
- if (chars == null || bytes == null)
- throw new ArgumentNullException((chars == null ? nameof(chars) : nameof(bytes)), SR.ArgumentNull_Array);
+ // Validate Parameters
- if (charIndex < 0 || charCount < 0)
- throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
+ if (chars is null || bytes is null)
+ {
+ ThrowHelper.ThrowArgumentNullException(
+ argument: (chars is null) ? ExceptionArgument.chars : ExceptionArgument.bytes,
+ resource: ExceptionResource.ArgumentNull_Array);
+ }
- if (chars.Length - charIndex < charCount)
- throw new ArgumentOutOfRangeException(nameof(chars), SR.ArgumentOutOfRange_IndexCount);
+ if ((charIndex | charCount) < 0)
+ {
+ ThrowHelper.ThrowArgumentOutOfRangeException(
+ argument: (charIndex < 0) ? ExceptionArgument.charIndex : ExceptionArgument.charCount,
+ resource: ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
+ }
- if (byteIndex < 0 || byteIndex > bytes.Length)
- throw new ArgumentOutOfRangeException(nameof(byteIndex), SR.ArgumentOutOfRange_Index);
+ if (chars.Length - charIndex < charCount)
+ {
+ ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.chars, ExceptionResource.ArgumentOutOfRange_IndexCount);
+ }
- int byteCount = bytes.Length - byteIndex;
+ if ((uint)byteIndex > bytes.Length)
+ {
+ ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.byteIndex, ExceptionResource.ArgumentOutOfRange_Index);
+ }
- fixed (char* pChars = chars) fixed (byte* pBytes = &MemoryMarshal.GetReference((Span<byte>)bytes))
- return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
+ fixed (char* pChars = chars)
+ fixed (byte* pBytes = bytes)
+ {
+ return GetBytesCommon(pChars + charIndex, charCount, pBytes + byteIndex, bytes.Length - byteIndex);
+ }
}
// Encodes a range of characters in a character array into a range of bytes
@@ -161,28 +265,36 @@ namespace System.Text
byte[] bytes, int byteIndex)
{
// Validate parameters
- if (chars == null || bytes == null)
- throw new ArgumentNullException((chars == null ? nameof(chars) : nameof(bytes)), SR.ArgumentNull_Array);
-
- if (charIndex < 0 || charCount < 0)
- throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
- if (chars.Length - charIndex < charCount)
- throw new ArgumentOutOfRangeException(nameof(chars), SR.ArgumentOutOfRange_IndexCountBuffer);
+ if (chars is null || bytes is null)
+ {
+ ThrowHelper.ThrowArgumentNullException(
+ argument: (chars is null) ? ExceptionArgument.chars : ExceptionArgument.bytes,
+ resource: ExceptionResource.ArgumentNull_Array);
+ }
- if (byteIndex < 0 || byteIndex > bytes.Length)
- throw new ArgumentOutOfRangeException(nameof(byteIndex), SR.ArgumentOutOfRange_Index);
+ if ((charIndex | charCount) < 0)
+ {
+ ThrowHelper.ThrowArgumentOutOfRangeException(
+ argument: (charIndex < 0) ? ExceptionArgument.charIndex : ExceptionArgument.charCount,
+ resource: ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
+ }
- // If nothing to encode return 0
- if (charCount == 0)
- return 0;
+ if (chars.Length - charIndex < charCount)
+ {
+ ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.chars, ExceptionResource.ArgumentOutOfRange_IndexCount);
+ }
- // Just call pointer version
- int byteCount = bytes.Length - byteIndex;
+ if ((uint)byteIndex > bytes.Length)
+ {
+ ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.byteIndex, ExceptionResource.ArgumentOutOfRange_Index);
+ }
- fixed (char* pChars = chars) fixed (byte* pBytes = &MemoryMarshal.GetReference((Span<byte>)bytes))
- // Remember that byteCount is # to decode, not size of array.
- return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
+ fixed (char* pChars = chars)
+ fixed (byte* pBytes = bytes)
+ {
+ return GetBytesCommon(pChars + charIndex, charCount, pBytes + byteIndex, bytes.Length - byteIndex);
+ }
}
// All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
@@ -193,21 +305,123 @@ namespace System.Text
public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
{
// Validate Parameters
- if (bytes == null || chars == null)
- throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), SR.ArgumentNull_Array);
- if (charCount < 0 || byteCount < 0)
- throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
+ if (chars == null || bytes == null)
+ {
+ ThrowHelper.ThrowArgumentNullException(
+ argument: (chars is null) ? ExceptionArgument.chars : ExceptionArgument.bytes,
+ resource: ExceptionResource.ArgumentNull_Array);
+ }
- return GetBytes(chars, charCount, bytes, byteCount, null);
+ if ((charCount | byteCount) < 0)
+ {
+ ThrowHelper.ThrowArgumentOutOfRangeException(
+ argument: (charCount < 0) ? ExceptionArgument.charCount : ExceptionArgument.byteCount,
+ resource: ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
+ }
+
+ return GetBytesCommon(chars, charCount, bytes, byteCount);
}
public override unsafe int GetBytes(ReadOnlySpan<char> chars, Span<byte> bytes)
{
- fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
- fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
+ // It's ok for us to operate on null / empty spans.
+
+ fixed (char* charsPtr = &MemoryMarshal.GetReference(chars))
+ fixed (byte* bytesPtr = &MemoryMarshal.GetReference(bytes))
{
- return GetBytes(charsPtr, chars.Length, bytesPtr, bytes.Length, encoder: null);
+ return GetBytesCommon(charsPtr, chars.Length, bytesPtr, bytes.Length);
+ }
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private unsafe int GetBytesCommon(char* pChars, int charCount, byte* pBytes, int byteCount)
+ {
+ // Common helper method for all non-EncoderNLS entry points to GetBytes.
+ // A modification of this method should be copied in to each of the supported encodings: ASCII, UTF8, UTF16, UTF32.
+
+ Debug.Assert(charCount >= 0, "Caller should't specify negative length buffer.");
+ Debug.Assert(pChars != null || charCount == 0, "Input pointer shouldn't be null if non-zero length specified.");
+ Debug.Assert(byteCount >= 0, "Caller should't specify negative length buffer.");
+ Debug.Assert(pBytes != null || byteCount == 0, "Input pointer shouldn't be null if non-zero length specified.");
+
+ // First call into the fast path.
+
+ int bytesWritten = GetBytesFast(pChars, charCount, pBytes, byteCount, out int charsConsumed);
+
+ if (charsConsumed == charCount)
+ {
+ // All elements converted - return immediately.
+
+ return bytesWritten;
+ }
+ else
+ {
+ // Simple narrowing conversion couldn't operate on entire buffer - invoke fallback.
+
+ return GetBytesWithFallback(pChars, charCount, pBytes, byteCount, charsConsumed, bytesWritten);
+ }
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)] // called directly by GetBytesCommon
+ private protected sealed override unsafe int GetBytesFast(char* pChars, int charsLength, byte* pBytes, int bytesLength, out int charsConsumed)
+ {
+ int bytesWritten = (int)ASCIIUtility.NarrowUtf16ToAscii(pChars, pBytes, (uint)Math.Min(charsLength, bytesLength));
+
+ charsConsumed = bytesWritten;
+ return bytesWritten;
+ }
+
+ private protected sealed override unsafe int GetBytesWithFallback(ReadOnlySpan<char> chars, int originalCharsLength, Span<byte> bytes, int originalBytesLength, EncoderNLS encoder)
+ {
+ // We special-case EncoderReplacementFallback if it's telling us to write a single ASCII char,
+ // since we believe this to be relatively common and we can handle it more efficiently than
+ // the base implementation.
+
+ if (((encoder is null) ? this.EncoderFallback : encoder.Fallback) is EncoderReplacementFallback replacementFallback
+ && replacementFallback.MaxCharCount == 1
+ && replacementFallback.DefaultString[0] <= 0x7F)
+ {
+ byte replacementByte = (byte)replacementFallback.DefaultString[0];
+
+ int numElementsToConvert = Math.Min(chars.Length, bytes.Length);
+ int idx = 0;
+
+ fixed (char* pChars = &MemoryMarshal.GetReference(chars))
+ fixed (byte* pBytes = &MemoryMarshal.GetReference(bytes))
+ {
+ // In a loop, replace the non-convertible data, then bulk-convert as much as we can.
+
+ while (idx < numElementsToConvert)
+ {
+ pBytes[idx++] = replacementByte;
+
+ if (idx < numElementsToConvert)
+ {
+ idx += (int)ASCIIUtility.NarrowUtf16ToAscii(&pChars[idx], &pBytes[idx], (uint)(numElementsToConvert - idx));
+ }
+
+ Debug.Assert(idx <= numElementsToConvert, "Somehow went beyond bounds of source or destination buffer?");
+ }
+ }
+
+ // Slice off how much we consumed / wrote.
+
+ chars = chars.Slice(numElementsToConvert);
+ bytes = bytes.Slice(numElementsToConvert);
+ }
+
+ // If we couldn't go through our fast fallback mechanism, or if we still have leftover
+ // data because we couldn't consume everything in the loop above, we need to go down the
+ // slow fallback path.
+
+ if (chars.IsEmpty)
+ {
+ return originalBytesLength - bytes.Length; // total number of bytes written
+ }
+ else
+ {
+ return base.GetBytesWithFallback(chars, originalCharsLength, bytes, originalBytesLength, encoder);
}
}
@@ -222,22 +436,26 @@ namespace System.Text
public override unsafe int GetCharCount(byte[] bytes, int index, int count)
{
// Validate Parameters
- if (bytes == null)
- throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
- if (index < 0 || count < 0)
- throw new ArgumentOutOfRangeException((index < 0 ? nameof(index) : nameof(count)), SR.ArgumentOutOfRange_NeedNonNegNum);
+ if (bytes is null)
+ {
+ ThrowHelper.ThrowArgumentNullException(ExceptionArgument.bytes, ExceptionResource.ArgumentNull_Array);
+ }
- if (bytes.Length - index < count)
- throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
+ if ((index | count) < 0)
+ {
+ ThrowHelper.ThrowArgumentOutOfRangeException((index < 0) ? ExceptionArgument.index : ExceptionArgument.count, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
+ }
- // If no input just return 0, fixed doesn't like 0 length arrays
- if (count == 0)
- return 0;
+ if (bytes.Length - index < count)
+ {
+ ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.bytes, ExceptionResource.ArgumentOutOfRange_IndexCountBuffer);
+ }
- // Just call pointer version
fixed (byte* pBytes = bytes)
- return GetCharCount(pBytes + index, count, null);
+ {
+ return GetCharCountCommon(pBytes + index, count);
+ }
}
// All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
@@ -248,673 +466,367 @@ namespace System.Text
public override unsafe int GetCharCount(byte* bytes, int count)
{
// Validate Parameters
+
if (bytes == null)
- throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
+ {
+ ThrowHelper.ThrowArgumentNullException(ExceptionArgument.bytes, ExceptionResource.ArgumentNull_Array);
+ }
if (count < 0)
- throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_NeedNonNegNum);
+ {
+ ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.count, ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
+ }
- return GetCharCount(bytes, count, null);
+ return GetCharCountCommon(bytes, count);
}
public override unsafe int GetCharCount(ReadOnlySpan<byte> bytes)
{
- fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
+ // It's ok for us to pass null pointers down to the workhorse routine.
+
+ fixed (byte* bytesPtr = &MemoryMarshal.GetReference(bytes))
{
- return GetCharCount(bytesPtr, bytes.Length, decoder: null);
+ return GetCharCountCommon(bytesPtr, bytes.Length);
}
}
- // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
- // So if you fix this, fix the others. Currently those include:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- // parent method is safe
-
- public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount,
- char[] chars, int charIndex)
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private unsafe int GetCharCountCommon(byte* pBytes, int byteCount)
{
- // Validate Parameters
- if (bytes == null || chars == null)
- throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), SR.ArgumentNull_Array);
+ // Common helper method for all non-DecoderNLS entry points to GetCharCount.
+ // A modification of this method should be copied in to each of the supported encodings: ASCII, UTF8, UTF16, UTF32.
- if (byteIndex < 0 || byteCount < 0)
- throw new ArgumentOutOfRangeException((byteIndex < 0 ? nameof(byteIndex) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
+ Debug.Assert(byteCount >= 0, "Caller should't specify negative length buffer.");
+ Debug.Assert(pBytes != null || byteCount == 0, "Input pointer shouldn't be null if non-zero length specified.");
- if ( bytes.Length - byteIndex < byteCount)
- throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
+ // First call into the fast path.
- if (charIndex < 0 || charIndex > chars.Length)
- throw new ArgumentOutOfRangeException(nameof(charIndex), SR.ArgumentOutOfRange_Index);
+ int totalCharCount = GetCharCountFast(pBytes, byteCount, DecoderFallback, out int bytesConsumed);
- // If no input, return 0 & avoid fixed problem
- if (byteCount == 0)
- return 0;
+ if (bytesConsumed != byteCount)
+ {
+ // If there's still data remaining in the source buffer, go down the fallback path.
+ // We need to check for integer overflow since the fallback could change the required
+ // output count in unexpected ways.
- // Just call pointer version
- int charCount = chars.Length - charIndex;
+ totalCharCount += GetCharCountWithFallback(pBytes, byteCount, bytesConsumed);
+ if (totalCharCount < 0)
+ {
+ ThrowConversionOverflow();
+ }
+ }
- fixed (byte* pBytes = bytes) fixed (char* pChars = &MemoryMarshal.GetReference((Span<char>)chars))
- // Remember that charCount is # to decode, not size of array
- return GetChars(pBytes + byteIndex, byteCount, pChars + charIndex, charCount, null);
+ return totalCharCount;
}
- // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
- // So if you fix this, fix the others. Currently those include:
- // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
-
- [CLSCompliant(false)]
- public unsafe override int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
+ [MethodImpl(MethodImplOptions.AggressiveInlining)] // called directly by GetCharCountCommon
+ private protected sealed override unsafe int GetCharCountFast(byte* pBytes, int bytesLength, DecoderFallback fallback, out int bytesConsumed)
{
- // Validate Parameters
- if (bytes == null || chars == null)
- throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), SR.ArgumentNull_Array);
+ // First: Can we short-circuit the entire calculation?
+ // If a DecoderReplacementFallback is in use, all non-ASCII bytes are replaced with
+ // the default string. If the default string consists of a single BMP value, then we
+ // know there's a 1:1 byte->char transcoding in all cases.
- if (charCount < 0 || byteCount < 0)
- throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
+ int charCount = bytesLength;
- return GetChars(bytes, byteCount, chars, charCount, null);
- }
-
- public override unsafe int GetChars(ReadOnlySpan<byte> bytes, Span<char> chars)
- {
- fixed (byte* bytesPtr = &MemoryMarshal.GetNonNullPinnableReference(bytes))
- fixed (char* charsPtr = &MemoryMarshal.GetNonNullPinnableReference(chars))
+ if (!(fallback is DecoderReplacementFallback replacementFallback) || replacementFallback.MaxCharCount != 1)
{
- return GetChars(bytesPtr, bytes.Length, charsPtr, chars.Length, decoder: null);
+ // Unrecognized fallback mechanism - count bytes manually.
+
+ charCount = (int)ASCIIUtility.GetIndexOfFirstNonAsciiByte(pBytes, (uint)bytesLength);
}
+
+ bytesConsumed = charCount;
+ return charCount;
}
- // Returns a string containing the decoded representation of a range of
- // bytes in a byte array.
- //
// All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
// So if you fix this, fix the others. Currently those include:
// EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
// parent method is safe
- public override unsafe string GetString(byte[] bytes, int byteIndex, int byteCount)
+ public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount,
+ char[] chars, int charIndex)
{
// Validate Parameters
- if (bytes == null)
- throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
- if (byteIndex < 0 || byteCount < 0)
- throw new ArgumentOutOfRangeException((byteIndex < 0 ? nameof(byteIndex) : nameof(byteCount)), SR.ArgumentOutOfRange_NeedNonNegNum);
+ if (bytes is null || chars is null)
+ {
+ ThrowHelper.ThrowArgumentNullException(
+ argument: (bytes is null) ? ExceptionArgument.bytes : ExceptionArgument.chars,
+ resource: ExceptionResource.ArgumentNull_Array);
+ }
+ if ((byteIndex | byteCount) < 0)
+ {
+ ThrowHelper.ThrowArgumentOutOfRangeException(
+ argument: (byteIndex < 0) ? ExceptionArgument.byteIndex : ExceptionArgument.byteCount,
+ resource: ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
+ }
if (bytes.Length - byteIndex < byteCount)
- throw new ArgumentOutOfRangeException(nameof(bytes), SR.ArgumentOutOfRange_IndexCountBuffer);
+ {
+ ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.bytes, ExceptionResource.ArgumentOutOfRange_IndexCountBuffer);
+ }
- // Avoid problems with empty input buffer
- if (byteCount == 0) return string.Empty;
+ if ((uint)charIndex > (uint)chars.Length)
+ {
+ ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.charIndex, ExceptionResource.ArgumentOutOfRange_Index);
+ }
fixed (byte* pBytes = bytes)
- return string.CreateStringFromEncoding(
- pBytes + byteIndex, byteCount, this);
+ fixed (char* pChars = chars)
+ {
+ return GetCharsCommon(pBytes + byteIndex, byteCount, pChars + charIndex, chars.Length - charIndex);
+ }
}
- //
- // End of standard methods copied from EncodingNLS.cs
- //
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
- // GetByteCount
- // Note: We start by assuming that the output will be the same as count. Having
- // an encoder or fallback may change that assumption
- internal sealed override unsafe int GetByteCount(char* chars, int charCount, EncoderNLS encoder)
+ [CLSCompliant(false)]
+ public unsafe override int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
{
- // Just need to ASSERT, this is called by something else internal that checked parameters already
- Debug.Assert(charCount >= 0, "[ASCIIEncoding.GetByteCount]count is negative");
- Debug.Assert(chars != null, "[ASCIIEncoding.GetByteCount]chars is null");
-
- // Assert because we shouldn't be able to have a null encoder.
- Debug.Assert(encoderFallback != null, "[ASCIIEncoding.GetByteCount]Attempting to use null fallback encoder");
-
- char charLeftOver = (char)0;
- EncoderReplacementFallback fallback = null;
-
- // Start by assuming default count, then +/- for fallback characters
- char* charEnd = chars + charCount;
-
- // For fallback we may need a fallback buffer, we know we aren't default fallback.
- EncoderFallbackBuffer fallbackBuffer = null;
- char* charsForFallback;
-
- if (encoder != null)
- {
- charLeftOver = encoder._charLeftOver;
- Debug.Assert(charLeftOver == 0 || char.IsHighSurrogate(charLeftOver),
- "[ASCIIEncoding.GetByteCount]leftover character should be high surrogate");
-
- fallback = encoder.Fallback as EncoderReplacementFallback;
-
- // We mustn't have left over fallback data when counting
- if (encoder.InternalHasFallbackBuffer)
- {
- // We always need the fallback buffer in get bytes so we can flush any remaining ones if necessary
- fallbackBuffer = encoder.FallbackBuffer;
- if (fallbackBuffer.Remaining > 0 && encoder._throwOnOverflow)
- throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType()));
-
- // Set our internal fallback interesting things.
- fallbackBuffer.InternalInitialize(chars, charEnd, encoder, false);
- }
+ // Validate Parameters
- // Verify that we have no fallbackbuffer, for ASCII its always empty, so just assert
- Debug.Assert(!encoder._throwOnOverflow || !encoder.InternalHasFallbackBuffer ||
- encoder.FallbackBuffer.Remaining == 0,
- "[ASCIICodePageEncoding.GetByteCount]Expected empty fallback buffer");
- }
- else
+ if (bytes is null || chars is null)
{
- fallback = this.EncoderFallback as EncoderReplacementFallback;
+ ThrowHelper.ThrowArgumentNullException(
+ argument: (bytes is null) ? ExceptionArgument.bytes : ExceptionArgument.chars,
+ resource: ExceptionResource.ArgumentNull_Array);
}
- // If we have an encoder AND we aren't using default fallback,
- // then we may have a complicated count.
- if (fallback != null && fallback.MaxCharCount == 1)
+ if ((byteCount | charCount) < 0)
{
- // Replacement fallback encodes surrogate pairs as two ?? (or two whatever), so return size is always
- // same as input size.
- // Note that no existing SBCS code pages map code points to supplimentary characters, so this is easy.
-
- // We could however have 1 extra byte if the last call had an encoder and a funky fallback and
- // if we don't use the funky fallback this time.
-
- // Do we have an extra char left over from last time?
- if (charLeftOver > 0)
- charCount++;
-
- return (charCount);
+ ThrowHelper.ThrowArgumentOutOfRangeException(
+ argument: (byteCount < 0) ? ExceptionArgument.byteCount : ExceptionArgument.charCount,
+ resource: ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
}
- // Count is more complicated if you have a funky fallback
- // For fallback we may need a fallback buffer, we know we're not default fallback
- int byteCount = 0;
-
- // We may have a left over character from last time, try and process it.
- if (charLeftOver > 0)
- {
- Debug.Assert(char.IsHighSurrogate(charLeftOver), "[ASCIIEncoding.GetByteCount]leftover character should be high surrogate");
- Debug.Assert(encoder != null, "[ASCIIEncoding.GetByteCount]Expected encoder");
-
- // Since left over char was a surrogate, it'll have to be fallen back.
- // Get Fallback
- fallbackBuffer = encoder.FallbackBuffer;
- fallbackBuffer.InternalInitialize(chars, charEnd, encoder, false);
-
- // This will fallback a pair if *chars is a low surrogate
- charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
- fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
- chars = charsForFallback;
- }
+ return GetCharsCommon(bytes, byteCount, chars, charCount);
+ }
- // Now we may have fallback char[] already from the encoder
+ public override unsafe int GetChars(ReadOnlySpan<byte> bytes, Span<char> chars)
+ {
+ // It's ok for us to pass null pointers down to the workhorse below.
- // Go ahead and do it, including the fallback.
- char ch;
- while ((ch = (fallbackBuffer == null) ? '\0' : fallbackBuffer.InternalGetNextChar()) != 0 ||
- chars < charEnd)
+ fixed (byte* bytesPtr = &MemoryMarshal.GetReference(bytes))
+ fixed (char* charsPtr = &MemoryMarshal.GetReference(chars))
{
- // First unwind any fallback
- if (ch == 0)
- {
- // No fallback, just get next char
- ch = *chars;
- chars++;
- }
-
- // Check for fallback, this'll catch surrogate pairs too.
- // no chars >= 0x80 are allowed.
- if (ch > 0x7f)
- {
- if (fallbackBuffer == null)
- {
- // Initialize the buffer
- if (encoder == null)
- fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
- else
- fallbackBuffer = encoder.FallbackBuffer;
- fallbackBuffer.InternalInitialize(charEnd - charCount, charEnd, encoder, false);
- }
-
- // Get Fallback
- charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
- fallbackBuffer.InternalFallback(ch, ref charsForFallback);
- chars = charsForFallback;
- continue;
- }
-
- // We'll use this one
- byteCount++;
+ return GetCharsCommon(bytesPtr, bytes.Length, charsPtr, chars.Length);
}
-
- Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
- "[ASCIIEncoding.GetByteCount]Expected Empty fallback buffer");
-
- return byteCount;
}
- internal sealed override unsafe int GetBytes(
- char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS encoder)
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private unsafe int GetCharsCommon(byte* pBytes, int byteCount, char* pChars, int charCount)
{
- // Just need to ASSERT, this is called by something else internal that checked parameters already
- Debug.Assert(bytes != null, "[ASCIIEncoding.GetBytes]bytes is null");
- Debug.Assert(byteCount >= 0, "[ASCIIEncoding.GetBytes]byteCount is negative");
- Debug.Assert(chars != null, "[ASCIIEncoding.GetBytes]chars is null");
- Debug.Assert(charCount >= 0, "[ASCIIEncoding.GetBytes]charCount is negative");
+ // Common helper method for all non-DecoderNLS entry points to GetChars.
+ // A modification of this method should be copied in to each of the supported encodings: ASCII, UTF8, UTF16, UTF32.
- // Assert because we shouldn't be able to have a null encoder.
- Debug.Assert(encoderFallback != null, "[ASCIIEncoding.GetBytes]Attempting to use null encoder fallback");
+ Debug.Assert(byteCount >= 0, "Caller should't specify negative length buffer.");
+ Debug.Assert(pBytes != null || byteCount == 0, "Input pointer shouldn't be null if non-zero length specified.");
+ Debug.Assert(charCount >= 0, "Caller should't specify negative length buffer.");
+ Debug.Assert(pChars != null || charCount == 0, "Input pointer shouldn't be null if non-zero length specified.");
- // Get any left over characters
- char charLeftOver = (char)0;
- EncoderReplacementFallback fallback = null;
+ // First call into the fast path.
- // For fallback we may need a fallback buffer, we know we aren't default fallback.
- EncoderFallbackBuffer fallbackBuffer = null;
- char* charsForFallback;
+ int charsWritten = GetCharsFast(pBytes, byteCount, pChars, charCount, out int bytesConsumed);
- // prepare our end
- char* charEnd = chars + charCount;
- byte* byteStart = bytes;
- char* charStart = chars;
-
- if (encoder != null)
+ if (bytesConsumed == byteCount)
{
- charLeftOver = encoder._charLeftOver;
- fallback = encoder.Fallback as EncoderReplacementFallback;
-
- // We mustn't have left over fallback data when counting
- if (encoder.InternalHasFallbackBuffer)
- {
- // We always need the fallback buffer in get bytes so we can flush any remaining ones if necessary
- fallbackBuffer = encoder.FallbackBuffer;
- if (fallbackBuffer.Remaining > 0 && encoder._throwOnOverflow)
- throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType()));
-
- // Set our internal fallback interesting things.
- fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
- }
+ // All elements converted - return immediately.
- Debug.Assert(charLeftOver == 0 || char.IsHighSurrogate(charLeftOver),
- "[ASCIIEncoding.GetBytes]leftover character should be high surrogate");
-
- // Verify that we have no fallbackbuffer, for ASCII its always empty, so just assert
- Debug.Assert(!encoder._throwOnOverflow || !encoder.InternalHasFallbackBuffer ||
- encoder.FallbackBuffer.Remaining == 0,
- "[ASCIICodePageEncoding.GetBytes]Expected empty fallback buffer");
+ return charsWritten;
}
else
{
- fallback = this.EncoderFallback as EncoderReplacementFallback;
+ // Simple narrowing conversion couldn't operate on entire buffer - invoke fallback.
+
+ return GetCharsWithFallback(pBytes, byteCount, pChars, charCount, bytesConsumed, charsWritten);
}
+ }
+ [MethodImpl(MethodImplOptions.AggressiveInlining)] // called directly by GetCharsCommon
+ private protected sealed override unsafe int GetCharsFast(byte* pBytes, int bytesLength, char* pChars, int charsLength, out int bytesConsumed)
+ {
+ int charsWritten = (int)ASCIIUtility.WidenAsciiToUtf16(pBytes, pChars, (uint)Math.Min(bytesLength, charsLength));
+
+ bytesConsumed = charsWritten;
+ return charsWritten;
+ }
- // See if we do the fast default or slightly slower fallback
- if (fallback != null && fallback.MaxCharCount == 1)
+ private protected sealed override unsafe int GetCharsWithFallback(ReadOnlySpan<byte> bytes, int originalBytesLength, Span<char> chars, int originalCharsLength, DecoderNLS decoder)
+ {
+ // We special-case DecoderReplacementFallback if it's telling us to write a single BMP char,
+ // since we believe this to be relatively common and we can handle it more efficiently than
+ // the base implementation.
+
+ if (((decoder is null) ? this.DecoderFallback: decoder.Fallback) is DecoderReplacementFallback replacementFallback
+ && replacementFallback.MaxCharCount == 1)
{
- // Fast version
- char cReplacement = fallback.DefaultString[0];
+ char replacementChar = replacementFallback.DefaultString[0];
+
+ int numElementsToConvert = Math.Min( bytes.Length, chars.Length);
+ int idx = 0;
- // Check for replacements in range, otherwise fall back to slow version.
- if (cReplacement <= (char)0x7f)
+ fixed (byte* pBytes = &MemoryMarshal.GetReference(bytes))
+ fixed (char* pChars = &MemoryMarshal.GetReference(chars))
{
- // We should have exactly as many output bytes as input bytes, unless there's a left
- // over character, in which case we may need one more.
- // If we had a left over character will have to add a ? (This happens if they had a funky
- // fallback last time, but not this time.) (We can't spit any out though
- // because with fallback encoder each surrogate is treated as a seperate code point)
- if (charLeftOver > 0)
- {
- // Have to have room
- // Throw even if doing no throw version because this is just 1 char,
- // so buffer will never be big enough
- if (byteCount == 0)
- ThrowBytesOverflow(encoder, true);
-
- // This'll make sure we still have more room and also make sure our return value is correct.
- *(bytes++) = (byte)cReplacement;
- byteCount--; // We used one of the ones we were counting.
- }
+ // In a loop, replace the non-convertible data, then bulk-convert as much as we can.
- // This keeps us from overrunning our output buffer
- if (byteCount < charCount)
+ while (idx < numElementsToConvert)
{
- // Throw or make buffer smaller?
- ThrowBytesOverflow(encoder, byteCount < 1);
+ pChars[idx++] = replacementChar;
- // Just use what we can
- charEnd = chars + byteCount;
- }
-
- // We just do a quick copy
- while (chars < charEnd)
- {
- char ch2 = *(chars++);
- if (ch2 >= 0x0080) *(bytes++) = (byte)cReplacement;
- else *(bytes++) = unchecked((byte)(ch2));
- }
+ if (idx < numElementsToConvert)
+ {
+ idx += (int)ASCIIUtility.WidenAsciiToUtf16(&pBytes[idx], &pChars[idx], (uint)(numElementsToConvert - idx));
+ }
- // Clear encoder
- if (encoder != null)
- {
- encoder._charLeftOver = (char)0;
- encoder._charsUsed = (int)(chars - charStart);
+ Debug.Assert(idx <= numElementsToConvert, "Somehow went beyond bounds of source or destination buffer?");
}
-
- return (int)(bytes - byteStart);
}
- }
-
- // Slower version, have to do real fallback.
- // prepare our end
- byte* byteEnd = bytes + byteCount;
+ // Slice off how much we consumed / wrote.
- // We may have a left over character from last time, try and process it.
- if (charLeftOver > 0)
- {
- // Initialize the buffer
- Debug.Assert(encoder != null,
- "[ASCIIEncoding.GetBytes]Expected non null encoder if we have surrogate left over");
- fallbackBuffer = encoder.FallbackBuffer;
- fallbackBuffer.InternalInitialize(chars, charEnd, encoder, true);
-
- // Since left over char was a surrogate, it'll have to be fallen back.
- // Get Fallback
- // This will fallback a pair if *chars is a low surrogate
- charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
- fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
- chars = charsForFallback;
+ bytes = bytes.Slice(numElementsToConvert);
+ chars = chars.Slice(numElementsToConvert);
}
- // Now we may have fallback char[] already from the encoder
+ // If we couldn't go through our fast fallback mechanism, or if we still have leftover
+ // data because we couldn't consume everything in the loop above, we need to go down the
+ // slow fallback path.
- // Go ahead and do it, including the fallback.
- char ch;
- while ((ch = (fallbackBuffer == null) ? '\0' : fallbackBuffer.InternalGetNextChar()) != 0 ||
- chars < charEnd)
+ if (bytes.IsEmpty)
{
- // First unwind any fallback
- if (ch == 0)
- {
- // No fallback, just get next char
- ch = *chars;
- chars++;
- }
-
- // Check for fallback, this'll catch surrogate pairs too.
- // All characters >= 0x80 must fall back.
- if (ch > 0x7f)
- {
- // Initialize the buffer
- if (fallbackBuffer == null)
- {
- if (encoder == null)
- fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
- else
- fallbackBuffer = encoder.FallbackBuffer;
- fallbackBuffer.InternalInitialize(charEnd - charCount, charEnd, encoder, true);
- }
-
- // Get Fallback
- charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
- fallbackBuffer.InternalFallback(ch, ref charsForFallback);
- chars = charsForFallback;
-
- // Go ahead & continue (& do the fallback)
- continue;
- }
-
- // We'll use this one
- // Bounds check
- if (bytes >= byteEnd)
- {
- // didn't use this char, we'll throw or use buffer
- if (fallbackBuffer == null || fallbackBuffer.bFallingBack == false)
- {
- Debug.Assert(chars > charStart || bytes == byteStart,
- "[ASCIIEncoding.GetBytes]Expected chars to have advanced already.");
- chars--; // don't use last char
- }
- else
- fallbackBuffer.MovePrevious();
-
- // Are we throwing or using buffer?
- ThrowBytesOverflow(encoder, bytes == byteStart); // throw?
- break; // don't throw, stop
- }
-
- // Go ahead and add it
- *bytes = unchecked((byte)ch);
- bytes++;
+ return originalCharsLength - chars.Length; // total number of chars written
}
-
- // Need to do encoder stuff
- if (encoder != null)
+ else
{
- // Fallback stuck it in encoder if necessary, but we have to clear MustFlush cases
- if (fallbackBuffer != null && !fallbackBuffer.bUsedEncoder)
- // Clear it in case of MustFlush
- encoder._charLeftOver = (char)0;
-
- // Set our chars used count
- encoder._charsUsed = (int)(chars - charStart);
+ return base.GetCharsWithFallback(bytes, originalBytesLength, chars, originalCharsLength, decoder);
}
-
- Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0 ||
- (encoder != null && !encoder._throwOnOverflow),
- "[ASCIIEncoding.GetBytes]Expected Empty fallback buffer at end");
-
- return (int)(bytes - byteStart);
}
- // This is internal and called by something else,
- internal sealed override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS decoder)
- {
- // Just assert, we're called internally so these should be safe, checked already
- Debug.Assert(bytes != null, "[ASCIIEncoding.GetCharCount]bytes is null");
- Debug.Assert(count >= 0, "[ASCIIEncoding.GetCharCount]byteCount is negative");
+ // Returns a string containing the decoded representation of a range of
+ // bytes in a byte array.
+ //
+ // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
+ // So if you fix this, fix the others. Currently those include:
+ // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
+ // parent method is safe
- // ASCII doesn't do best fit, so don't have to check for it, find out which decoder fallback we're using
- DecoderReplacementFallback fallback = null;
+ public override unsafe string GetString(byte[] bytes, int byteIndex, int byteCount)
+ {
+ // Validate Parameters
- if (decoder == null)
- fallback = this.DecoderFallback as DecoderReplacementFallback;
- else
+ if (bytes is null)
{
- fallback = decoder.Fallback as DecoderReplacementFallback;
- Debug.Assert(!decoder._throwOnOverflow || !decoder.InternalHasFallbackBuffer ||
- decoder.FallbackBuffer.Remaining == 0,
- "[ASCIICodePageEncoding.GetCharCount]Expected empty fallback buffer");
+ ThrowHelper.ThrowArgumentNullException(ExceptionArgument.bytes, ExceptionResource.ArgumentNull_Array);
}
- if (fallback != null && fallback.MaxCharCount == 1)
+ if ((byteIndex | byteCount) < 0)
{
- // Just return length, SBCS stay the same length because they don't map to surrogate
- // pairs and we don't have a decoder fallback.
-
- return count;
+ ThrowHelper.ThrowArgumentOutOfRangeException(
+ argument: (byteIndex < 0) ? ExceptionArgument.byteIndex : ExceptionArgument.byteCount,
+ resource: ExceptionResource.ArgumentOutOfRange_NeedNonNegNum);
}
- // Only need decoder fallback buffer if not using default replacement fallback, no best fit for ASCII
- DecoderFallbackBuffer fallbackBuffer = null;
-
- // Have to do it the hard way.
- // Assume charCount will be == count
- int charCount = count;
- byte[] byteBuffer = new byte[1];
-
- // Do it our fast way
- byte* byteEnd = bytes + count;
-
- // Quick loop
- while (bytes < byteEnd)
+ if (bytes.Length - byteIndex < byteCount)
{
- // Faster if don't use *bytes++;
- byte b = *bytes;
- bytes++;
-
- // If unknown we have to do fallback count
- if (b >= 0x80)
- {
- if (fallbackBuffer == null)
- {
- if (decoder == null)
- fallbackBuffer = this.DecoderFallback.CreateFallbackBuffer();
- else
- fallbackBuffer = decoder.FallbackBuffer;
- fallbackBuffer.InternalInitialize(byteEnd - count, null);
- }
-
- // Use fallback buffer
- byteBuffer[0] = b;
- charCount--; // Have to unreserve the one we already allocated for b
- charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
- }
+ ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.bytes, ExceptionResource.ArgumentOutOfRange_IndexCountBuffer);
}
- // Fallback buffer must be empty
- Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
- "[ASCIIEncoding.GetCharCount]Expected Empty fallback buffer");
+ // Avoid problems with empty input buffer
+ if (byteCount == 0)
+ return string.Empty;
- // Converted sequence is same length as input
- return charCount;
+ fixed (byte* pBytes = bytes)
+ {
+ return string.CreateStringFromEncoding(pBytes + byteIndex, byteCount, this);
+ }
}
- internal sealed override unsafe int GetChars(
- byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS decoder)
- {
- // Just need to ASSERT, this is called by something else internal that checked parameters already
- Debug.Assert(bytes != null, "[ASCIIEncoding.GetChars]bytes is null");
- Debug.Assert(byteCount >= 0, "[ASCIIEncoding.GetChars]byteCount is negative");
- Debug.Assert(chars != null, "[ASCIIEncoding.GetChars]chars is null");
- Debug.Assert(charCount >= 0, "[ASCIIEncoding.GetChars]charCount is negative");
-
- // Do it fast way if using ? replacement fallback
- byte* byteEnd = bytes + byteCount;
- byte* byteStart = bytes;
- char* charStart = chars;
+ //
+ // End of standard methods copied from EncodingNLS.cs
+ //
- // Note: ASCII doesn't do best fit, but we have to fallback if they use something > 0x7f
- // Only need decoder fallback buffer if not using ? fallback.
- // ASCII doesn't do best fit, so don't have to check for it, find out which decoder fallback we're using
- DecoderReplacementFallback fallback = null;
- char* charsForFallback;
+ //
+ // Beginning of methods used by shared fallback logic.
+ //
- if (decoder == null)
- fallback = this.DecoderFallback as DecoderReplacementFallback;
+ internal sealed override bool TryGetByteCount(Rune value, out int byteCount)
+ {
+ if (value.IsAscii)
+ {
+ byteCount = 1;
+ return true;
+ }
else
{
- fallback = decoder.Fallback as DecoderReplacementFallback;
- Debug.Assert(!decoder._throwOnOverflow || !decoder.InternalHasFallbackBuffer ||
- decoder.FallbackBuffer.Remaining == 0,
- "[ASCIICodePageEncoding.GetChars]Expected empty fallback buffer");
+ byteCount = default;
+ return false;
}
+ }
- if (fallback != null && fallback.MaxCharCount == 1)
+ internal sealed override OperationStatus EncodeRune(Rune value, Span<byte> bytes, out int bytesWritten)
+ {
+ if (value.IsAscii)
{
- // Try it the fast way
- char replacementChar = fallback.DefaultString[0];
-
- // Need byteCount chars, otherwise too small buffer
- if (charCount < byteCount)
+ if (!bytes.IsEmpty)
{
- // Need at least 1 output byte, throw if must throw
- ThrowCharsOverflow(decoder, charCount < 1);
-
- // Not throwing, use what we can
- byteEnd = bytes + charCount;
+ bytes[0] = (byte)value.Value;
+ bytesWritten = 1;
+ return OperationStatus.Done;
}
-
- // Quick loop, just do '?' replacement because we don't have fallbacks for decodings.
- while (bytes < byteEnd)
+ else
{
- byte b = *(bytes++);
- if (b >= 0x80)
- // This is an invalid byte in the ASCII encoding.
- *(chars++) = replacementChar;
- else
- *(chars++) = unchecked((char)b);
+ bytesWritten = 0;
+ return OperationStatus.DestinationTooSmall;
}
-
- // bytes & chars used are the same
- if (decoder != null)
- decoder._bytesUsed = (int)(bytes - byteStart);
- return (int)(chars - charStart);
}
-
- // Slower way's going to need a fallback buffer
- DecoderFallbackBuffer fallbackBuffer = null;
- byte[] byteBuffer = new byte[1];
- char* charEnd = chars + charCount;
-
- // Not quite so fast loop
- while (bytes < byteEnd)
+ else
{
- // Faster if don't use *bytes++;
- byte b = *(bytes);
- bytes++;
+ bytesWritten = 0;
+ return OperationStatus.InvalidData;
+ }
+ }
- if (b >= 0x80)
+ internal sealed override OperationStatus DecodeFirstRune(ReadOnlySpan<byte> bytes, out Rune value, out int bytesConsumed)
+ {
+ if (!bytes.IsEmpty)
+ {
+ byte b = bytes[0];
+ if (b <= 0x7F)
{
- // This is an invalid byte in the ASCII encoding.
- if (fallbackBuffer == null)
- {
- if (decoder == null)
- fallbackBuffer = this.DecoderFallback.CreateFallbackBuffer();
- else
- fallbackBuffer = decoder.FallbackBuffer;
- fallbackBuffer.InternalInitialize(byteEnd - byteCount, charEnd);
- }
-
- // Use fallback buffer
- byteBuffer[0] = b;
-
- // Note that chars won't get updated unless this succeeds
- charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
- bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback);
- chars = charsForFallback;
+ // ASCII byte
- if (!fallbackResult)
- {
- // May or may not throw, but we didn't get this byte
- Debug.Assert(bytes > byteStart || chars == charStart,
- "[ASCIIEncoding.GetChars]Expected bytes to have advanced already (fallback case)");
- bytes--; // unused byte
- fallbackBuffer.InternalReset(); // Didn't fall this back
- ThrowCharsOverflow(decoder, chars == charStart); // throw?
- break; // don't throw, but stop loop
- }
+ value = new Rune(b);
+ bytesConsumed = 1;
+ return OperationStatus.Done;
}
else
{
- // Make sure we have buffer space
- if (chars >= charEnd)
- {
- Debug.Assert(bytes > byteStart || chars == charStart,
- "[ASCIIEncoding.GetChars]Expected bytes to have advanced already (normal case)");
- bytes--; // unused byte
- ThrowCharsOverflow(decoder, chars == charStart); // throw?
- break; // don't throw, but stop loop
- }
+ // Non-ASCII byte
- *(chars) = unchecked((char)b);
- chars++;
+ value = Rune.ReplacementChar;
+ bytesConsumed = 1;
+ return OperationStatus.InvalidData;
}
}
+ else
+ {
+ // No data to decode
- // Might have had decoder fallback stuff.
- if (decoder != null)
- decoder._bytesUsed = (int)(bytes - byteStart);
-
- // Expect Empty fallback buffer for GetChars
- Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
- "[ASCIIEncoding.GetChars]Expected Empty fallback buffer");
-
- return (int)(chars - charStart);
+ value = Rune.ReplacementChar;
+ bytesConsumed = 0;
+ return OperationStatus.NeedMoreData;
+ }
}
+ //
+ // End of methods used by shared fallback logic.
+ //
public override int GetMaxByteCount(int charCount)
{
diff --git a/src/System.Private.CoreLib/shared/System/Text/ASCIIUtility.cs b/src/System.Private.CoreLib/shared/System/Text/ASCIIUtility.cs
new file mode 100644
index 0000000000..5bc80c35f5
--- /dev/null
+++ b/src/System.Private.CoreLib/shared/System/Text/ASCIIUtility.cs
@@ -0,0 +1,76 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Runtime.CompilerServices;
+
+namespace System.Text
+{
+ /*
+ * Contains naive unoptimized (non-SIMD) implementations of ASCII transcoding
+ * operations. Vectorized methods can be substituted here as a drop-in replacement.
+ */
+
+ internal unsafe static class ASCIIUtility
+ {
+ [MethodImpl(MethodImplOptions.NoInlining)] // the actual implementation won't be inlined, so this shouldn't be either, lest it throw off benchmarks
+ public static uint GetIndexOfFirstNonAsciiByte(byte* pBytes, uint byteCount)
+ {
+ uint idx = 0;
+ for (; idx < byteCount; idx++)
+ {
+ if ((sbyte)pBytes[idx] < 0)
+ {
+ break;
+ }
+ }
+ return idx;
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)] // the actual implementation won't be inlined, so this shouldn't be either, lest it throw off benchmarks
+ public static uint GetIndexOfFirstNonAsciiChar(char* pChars, uint charCount)
+ {
+ uint idx = 0;
+ for (; idx < charCount; idx++)
+ {
+ if (pChars[idx] > 0x7Fu)
+ {
+ break;
+ }
+ }
+ return idx;
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)] // the actual implementation won't be inlined, so this shouldn't be either, lest it throw off benchmarks
+ public static uint NarrowUtf16ToAscii(char* pChars, byte* pBytes, uint elementCount)
+ {
+ uint idx = 0;
+ for (; idx < elementCount; idx++)
+ {
+ uint ch = pChars[idx];
+ if (ch > 0x7Fu)
+ {
+ break;
+ }
+ pBytes[idx] = (byte)ch;
+ }
+ return idx;
+ }
+
+ [MethodImpl(MethodImplOptions.NoInlining)] // the actual implementation won't be inlined, so this shouldn't be either, lest it throw off benchmarks
+ public static uint WidenAsciiToUtf16(byte* pBytes, char* pChars, uint elementCount)
+ {
+ uint idx = 0;
+ for (; idx < elementCount; idx++)
+ {
+ byte b = pBytes[idx];
+ if (b > 0x7F)
+ {
+ break;
+ }
+ pChars[idx] = (char)b;
+ }
+ return idx;
+ }
+ }
+}
diff --git a/src/System.Private.CoreLib/shared/System/Text/DecoderFallback.cs b/src/System.Private.CoreLib/shared/System/Text/DecoderFallback.cs
index fff8ad1d7b..2eb03d8089 100644
--- a/src/System.Private.CoreLib/shared/System/Text/DecoderFallback.cs
+++ b/src/System.Private.CoreLib/shared/System/Text/DecoderFallback.cs
@@ -67,6 +67,10 @@ namespace System.Text
internal unsafe byte* byteStart;
internal unsafe char* charEnd;
+ internal Encoding _encoding;
+ internal DecoderNLS _decoder;
+ private int _originalByteCount;
+
// Internal Reset
internal unsafe void InternalReset()
{
@@ -82,6 +86,22 @@ namespace System.Text
this.charEnd = charEnd;
}
+ internal static DecoderFallbackBuffer CreateAndInitialize(Encoding encoding, DecoderNLS decoder, int originalByteCount)
+ {
+ // The original byte count is only used for keeping track of what 'index' value needs
+ // to be passed to the abstract Fallback method. The index value is calculated by subtracting
+ // 'bytes.Length' (where bytes is expected to be the entire remaining input buffer)
+ // from the 'originalByteCount' value specified here.
+
+ DecoderFallbackBuffer fallbackBuffer = (decoder is null) ? encoding.DecoderFallback.CreateFallbackBuffer() : decoder.FallbackBuffer;
+
+ fallbackBuffer._encoding = encoding;
+ fallbackBuffer._decoder = decoder;
+ fallbackBuffer._originalByteCount = originalByteCount;
+
+ return fallbackBuffer;
+ }
+
// Fallback the current byte by sticking it into the remaining char buffer.
// This can only be called by our encodings (other have to use the public fallback methods), so
// we can use our DecoderNLS here too (except we don't).
@@ -191,6 +211,90 @@ namespace System.Text
return 0;
}
+ internal int InternalFallbackGetCharCount(ReadOnlySpan<byte> remainingBytes, int fallbackLength)
+ {
+ return (Fallback(remainingBytes.Slice(0, fallbackLength).ToArray(), index: _originalByteCount - remainingBytes.Length))
+ ? DrainRemainingDataForGetCharCount()
+ : 0;
+ }
+
+ internal bool TryInternalFallbackGetChars(ReadOnlySpan<byte> remainingBytes, int fallbackLength, Span<char> chars, out int charsWritten)
+ {
+ if (Fallback(remainingBytes.Slice(0, fallbackLength).ToArray(), index: _originalByteCount - remainingBytes.Length))
+ {
+ return TryDrainRemainingDataForGetChars(chars, out charsWritten);
+ }
+ else
+ {
+ // Return true because we weren't asked to write anything, so this is a "success" in the sense that
+ // the output buffer was large enough to hold the desired 0 chars of output.
+
+ charsWritten = 0;
+ return true;
+ }
+ }
+
+ private Rune GetNextRune()
+ {
+ // Call GetNextChar() and try treating it as a non-surrogate character.
+ // If that fails, call GetNextChar() again and attempt to treat the two chars
+ // as a surrogate pair. If that still fails, throw an exception since the fallback
+ // mechanism is giving us a bad replacement character.
+
+ Rune rune;
+ char ch = GetNextChar();
+ if (!Rune.TryCreate(ch, out rune) && !Rune.TryCreate(ch, GetNextChar(), out rune))
+ {
+ throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex);
+ }
+
+ return rune;
+ }
+
+ internal int DrainRemainingDataForGetCharCount()
+ {
+ int totalCharCount = 0;
+
+ Rune thisRune;
+ while ((thisRune = GetNextRune()).Value != 0)
+ {
+ // We need to check for overflow while tallying the fallback char count.
+
+ totalCharCount += thisRune.Utf16SequenceLength;
+ if (totalCharCount < 0)
+ {
+ InternalReset();
+ Encoding.ThrowConversionOverflow();
+ }
+ }
+
+ return totalCharCount;
+ }
+
+ internal bool TryDrainRemainingDataForGetChars(Span<char> chars, out int charsWritten)
+ {
+ int originalCharCount = chars.Length;
+
+ Rune thisRune;
+ while ((thisRune = GetNextRune()).Value != 0)
+ {
+ if (thisRune.TryEncode(chars, out int charsWrittenJustNow))
+ {
+ chars = chars.Slice(charsWrittenJustNow);
+ continue;
+ }
+ else
+ {
+ InternalReset();
+ charsWritten = default;
+ return false;
+ }
+ }
+
+ charsWritten = originalCharCount - chars.Length;
+ return true;
+ }
+
// private helper methods
internal void ThrowLastBytesRecursive(byte[] bytesUnknown)
{
diff --git a/src/System.Private.CoreLib/shared/System/Text/DecoderNLS.cs b/src/System.Private.CoreLib/shared/System/Text/DecoderNLS.cs
index 8af4dc3a55..597d362bf7 100644
--- a/src/System.Private.CoreLib/shared/System/Text/DecoderNLS.cs
+++ b/src/System.Private.CoreLib/shared/System/Text/DecoderNLS.cs
@@ -2,9 +2,8 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
-using System.Runtime.Serialization;
-using System.Text;
-using System;
+using System.Buffers;
+using System.Diagnostics;
using System.Runtime.InteropServices;
namespace System.Text
@@ -27,6 +26,8 @@ namespace System.Text
private bool _mustFlush;
internal bool _throwOnOverflow;
internal int _bytesUsed;
+ private int _leftoverBytes; // leftover data from a previous invocation of GetChars (up to 4 bytes)
+ private int _leftoverByteCount; // number of bytes of actual data in _leftoverBytes
internal DecoderNLS(Encoding encoding)
{
@@ -44,6 +45,7 @@ namespace System.Text
public override void Reset()
{
+ ClearLeftoverData();
_fallbackBuffer?.Reset();
}
@@ -238,5 +240,195 @@ namespace System.Text
{
_mustFlush = false;
}
+
+ internal ReadOnlySpan<byte> GetLeftoverData()
+ {
+ return MemoryMarshal.AsBytes(new ReadOnlySpan<int>(ref _leftoverBytes, 1)).Slice(0, _leftoverByteCount);
+ }
+
+ internal void SetLeftoverData(ReadOnlySpan<byte> bytes)
+ {
+ bytes.CopyTo(MemoryMarshal.AsBytes(new Span<int>(ref _leftoverBytes, 1)));
+ _leftoverByteCount = bytes.Length;
+ }
+
+ internal bool HasLeftoverData => _leftoverByteCount != 0;
+
+ internal void ClearLeftoverData()
+ {
+ _leftoverByteCount = 0;
+ }
+
+ internal int DrainLeftoverDataForGetCharCount(ReadOnlySpan<byte> bytes, out int bytesConsumed)
+ {
+ // Quick check: we _should not_ have leftover fallback data from a previous invocation,
+ // as we'd end up consuming any such data and would corrupt whatever Convert call happens
+ // to be in progress. Unlike EncoderNLS, this is simply a Debug.Assert. No exception is thrown.
+
+ Debug.Assert(_fallbackBuffer is null || _fallbackBuffer.Remaining == 0, "Should have no data remaining in the fallback buffer.");
+
+ // Copy the existing leftover data plus as many bytes as possible of the new incoming data
+ // into a temporary concated buffer, then get its char count by decoding it.
+
+ Span<byte> combinedBuffer = stackalloc byte[4];
+ combinedBuffer = combinedBuffer.Slice(0, ConcatInto(GetLeftoverData(), bytes, combinedBuffer));
+ int charCount = 0;
+
+ switch (_encoding.DecodeFirstRune(combinedBuffer, out Rune value, out int combinedBufferBytesConsumed))
+ {
+ case OperationStatus.Done:
+ charCount = value.Utf16SequenceLength;
+ goto Finish; // successfully transcoded bytes -> chars
+
+ case OperationStatus.NeedMoreData:
+ if (MustFlush)
+ {
+ goto case OperationStatus.InvalidData; // treat as equivalent to bad data
+ }
+ else
+ {
+ goto Finish; // consumed some bytes, output 0 chars
+ }
+
+ case OperationStatus.InvalidData:
+ break;
+
+ default:
+ Debug.Fail("Unexpected OperationStatus return value.");
+ break;
+ }
+
+ // Couldn't decode the buffer. Fallback the buffer instead.
+
+ if (FallbackBuffer.Fallback(combinedBuffer.Slice(0, combinedBufferBytesConsumed).ToArray(), index: 0))
+ {
+ charCount = _fallbackBuffer.DrainRemainingDataForGetCharCount();
+ Debug.Assert(charCount >= 0, "Fallback buffer shouldn't have returned a negative char count.");
+ }
+
+ Finish:
+
+ bytesConsumed = combinedBufferBytesConsumed - _leftoverByteCount; // amount of 'bytes' buffer consumed just now
+ return charCount;
+ }
+
+ internal int DrainLeftoverDataForGetChars(ReadOnlySpan<byte> bytes, Span<char> chars, out int bytesConsumed)
+ {
+ // Quick check: we _should not_ have leftover fallback data from a previous invocation,
+ // as we'd end up consuming any such data and would corrupt whatever Convert call happens
+ // to be in progress. Unlike EncoderNLS, this is simply a Debug.Assert. No exception is thrown.
+
+ Debug.Assert(_fallbackBuffer is null || _fallbackBuffer.Remaining == 0, "Should have no data remaining in the fallback buffer.");
+
+ // Copy the existing leftover data plus as many bytes as possible of the new incoming data
+ // into a temporary concated buffer, then transcode it from bytes to chars.
+
+ Span<byte> combinedBuffer = stackalloc byte[4];
+ combinedBuffer = combinedBuffer.Slice(0, ConcatInto(GetLeftoverData(), bytes, combinedBuffer));
+ int charsWritten = 0;
+
+ bool persistNewCombinedBuffer = false;
+
+ switch (_encoding.DecodeFirstRune(combinedBuffer, out Rune value, out int combinedBufferBytesConsumed))
+ {
+ case OperationStatus.Done:
+ if (value.TryEncode(chars, out charsWritten))
+ {
+ goto Finish; // successfully transcoded bytes -> chars
+ }
+ else
+ {
+ goto DestinationTooSmall;
+ }
+
+ case OperationStatus.NeedMoreData:
+ if (MustFlush)
+ {
+ goto case OperationStatus.InvalidData; // treat as equivalent to bad data
+ }
+ else
+ {
+ persistNewCombinedBuffer = true;
+ goto Finish; // successfully consumed some bytes, output no chars
+ }
+
+ case OperationStatus.InvalidData:
+ break;
+
+ default:
+ Debug.Fail("Unexpected OperationStatus return value.");
+ break;
+ }
+
+ // Couldn't decode the buffer. Fallback the buffer instead.
+
+ if (FallbackBuffer.Fallback(combinedBuffer.Slice(0, combinedBufferBytesConsumed).ToArray(), index: 0)
+ && !_fallbackBuffer.TryDrainRemainingDataForGetChars(chars, out charsWritten))
+ {
+ goto DestinationTooSmall;
+ }
+
+ Finish:
+
+ if (persistNewCombinedBuffer)
+ {
+ Debug.Assert(combinedBufferBytesConsumed == combinedBuffer.Length, "We should be asked to persist the entire combined buffer.");
+ SetLeftoverData(combinedBuffer); // the buffer still only contains partial data; a future call to Convert will need it
+ }
+ else
+ {
+ ClearLeftoverData(); // the buffer contains no partial data; we'll go down the normal paths
+ }
+
+ bytesConsumed = combinedBufferBytesConsumed - _leftoverByteCount; // amount of 'bytes' buffer consumed just now
+ return charsWritten;
+
+ DestinationTooSmall:
+
+ // If we got to this point, we're trying to write chars to the output buffer, but we're unable to do
+ // so. Unlike EncoderNLS, this type does not allow partial writes to the output buffer. Since we know
+ // draining leftover data is the first operation performed by any DecoderNLS API, there was no
+ // opportunity for any code before us to make forward progress, so we must fail immediately.
+
+ _encoding.ThrowCharsOverflow(this, nothingDecoded: true);
+ throw null; // will never reach this point
+ }
+
+ /// <summary>
+ /// Given a byte buffer <paramref name="dest"/>, concatenates as much of <paramref name="srcLeft"/> followed
+ /// by <paramref name="srcRight"/> into it as will fit, then returns the total number of bytes copied.
+ /// </summary>
+ private static int ConcatInto(ReadOnlySpan<byte> srcLeft, ReadOnlySpan<byte> srcRight, Span<byte> dest)
+ {
+ int total = 0;
+
+ for (int i = 0; i < srcLeft.Length; i++)
+ {
+ if ((uint)total >= (uint)dest.Length)
+ {
+ goto Finish;
+ }
+ else
+ {
+ dest[total++] = srcLeft[i];
+ }
+ }
+
+ for (int i = 0; i < srcRight.Length; i++)
+ {
+ if ((uint)total >= (uint)dest.Length)
+ {
+ goto Finish;
+ }
+ else
+ {
+ dest[total++] = srcRight[i];
+ }
+ }
+
+ Finish:
+
+ return total;
+ }
}
}
diff --git a/src/System.Private.CoreLib/shared/System/Text/EncoderFallback.cs b/src/System.Private.CoreLib/shared/System/Text/EncoderFallback.cs
index f98b15e078..ff895d6788 100644
--- a/src/System.Private.CoreLib/shared/System/Text/EncoderFallback.cs
+++ b/src/System.Private.CoreLib/shared/System/Text/EncoderFallback.cs
@@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
+using System.Buffers;
using System.Diagnostics;
using System.Threading;
@@ -86,12 +87,14 @@ namespace System.Text
// These help us with our performance and messages internally
internal unsafe char* charStart;
internal unsafe char* charEnd;
- internal EncoderNLS encoder;
+ internal EncoderNLS encoder; // TODO: MAKE ME PRIVATE
internal bool setEncoder;
internal bool bUsedEncoder;
internal bool bFallingBack = false;
internal int iRecursionCount = 0;
private const int iMaxRecursion = 250;
+ private Encoding encoding;
+ private int originalCharCount;
// Internal Reset
// For example, what if someone fails a conversion and wants to reset one of our fallback buffers?
@@ -116,6 +119,22 @@ namespace System.Text
this.iRecursionCount = 0;
}
+ internal static EncoderFallbackBuffer CreateAndInitialize(Encoding encoding, EncoderNLS encoder, int originalCharCount)
+ {
+ // The original char count is only used for keeping track of what 'index' value needs
+ // to be passed to the abstract Fallback method. The index value is calculated by subtracting
+ // 'chars.Length' (where chars is expected to be the entire remaining input buffer)
+ // from the 'originalCharCount' value specified here.
+
+ EncoderFallbackBuffer fallbackBuffer = (encoder is null) ? encoding.EncoderFallback.CreateFallbackBuffer() : encoder.FallbackBuffer;
+
+ fallbackBuffer.encoding = encoding;
+ fallbackBuffer.encoder = encoder;
+ fallbackBuffer.originalCharCount = originalCharCount;
+
+ return fallbackBuffer;
+ }
+
internal char InternalGetNextChar()
{
char ch = GetNextChar();
@@ -124,6 +143,170 @@ namespace System.Text
return ch;
}
+ private bool InternalFallback(ReadOnlySpan<char> chars, out int charsConsumed)
+ {
+ Debug.Assert(!chars.IsEmpty, "Caller shouldn't invoke this if there's no data to fall back.");
+
+ // First, try falling back a single BMP character or a standalone low surrogate.
+ // If the first char is a high surrogate, we'll try to combine it with the next
+ // char in the input sequence.
+
+ char firstChar = chars[0];
+ char secondChar = default;
+
+ if (!chars.IsEmpty)
+ {
+ firstChar = chars[0];
+
+ if (1 < (uint)chars.Length)
+ {
+ secondChar = chars[1];
+ }
+ }
+
+ // Ask the subclassed type to initiate fallback logic.
+
+ int index = originalCharCount - chars.Length;
+
+ if (!char.IsSurrogatePair(firstChar, secondChar))
+ {
+ // This code path is also used when 'firstChar' is a standalone surrogate or
+ // if it's a high surrogate at the end of the input buffer.
+
+ charsConsumed = 1;
+ return Fallback(firstChar, index);
+ }
+ else
+ {
+ charsConsumed = 2;
+ return Fallback(firstChar, secondChar, index);
+ }
+ }
+
+ internal int InternalFallbackGetByteCount(ReadOnlySpan<char> chars, out int charsConsumed)
+ {
+ int bytesWritten = 0;
+
+ if (InternalFallback(chars, out charsConsumed))
+ {
+ // There's data in the fallback buffer - pull it out now.
+
+ bytesWritten = DrainRemainingDataForGetByteCount();
+ }
+
+ return bytesWritten;
+ }
+
+ internal bool TryInternalFallbackGetBytes(ReadOnlySpan<char> chars, Span<byte> bytes, out int charsConsumed, out int bytesWritten)
+ {
+ if (InternalFallback(chars, out charsConsumed))
+ {
+ // There's data in the fallback buffer - pull it out now.
+
+ return TryDrainRemainingDataForGetBytes(bytes, out bytesWritten);
+ }
+ else
+ {
+ // There's no data in the fallback buffer.
+
+ bytesWritten = 0;
+ return true; // true = didn't run out of space in destination buffer
+ }
+ }
+
+ internal bool TryDrainRemainingDataForGetBytes(Span<byte> bytes, out int bytesWritten)
+ {
+ int originalBytesLength = bytes.Length;
+
+ Rune thisRune;
+ while ((thisRune = GetNextRune()).Value != 0)
+ {
+ switch (encoding.EncodeRune(thisRune, bytes, out int bytesWrittenJustNow))
+ {
+ case OperationStatus.Done:
+
+ bytes = bytes.Slice(bytesWrittenJustNow);
+ continue;
+
+ case OperationStatus.DestinationTooSmall:
+
+ // Since we're not consuming the Rune we just read, back up as many chars as necessary
+ // to undo the read we just performed, then report to our caller that we ran out of space.
+
+ for (int i = 0; i < thisRune.Utf16SequenceLength; i++)
+ {
+ MovePrevious();
+ }
+
+ bytesWritten = originalBytesLength - bytes.Length;
+ return false; // ran out of destination buffer
+
+ case OperationStatus.InvalidData:
+
+ // We can't fallback the fallback. We can't make forward progress, so report to our caller
+ // that something went terribly wrong. The error message contains the fallback char that
+ // couldn't be converted. (Ideally we'd provide the first char that originally triggered
+ // the fallback, but it's complicated to keep this state around, and a fallback producing
+ // invalid data should be a very rare occurrence.)
+
+ ThrowLastCharRecursive(thisRune.Value);
+ break; // will never be hit; call above throws
+
+ default:
+
+ Debug.Fail("Unexpected return value.");
+ break;
+ }
+ }
+
+ bytesWritten = originalBytesLength - bytes.Length;
+ return true; // finished successfully
+ }
+
+ internal int DrainRemainingDataForGetByteCount()
+ {
+ int totalByteCount = 0;
+
+ Rune thisRune;
+ while ((thisRune = GetNextRune()).Value != 0)
+ {
+ if (!encoding.TryGetByteCount(thisRune, out int byteCountThisIteration))
+ {
+ // We can't fallback the fallback. We can't make forward progress, so report to our caller
+ // that something went terribly wrong. The error message contains the fallback char that
+ // couldn't be converted. (Ideally we'd provide the first char that originally triggered
+ // the fallback, but it's complicated to keep this state around, and a fallback producing
+ // invalid data should be a very rare occurrence.)
+
+ ThrowLastCharRecursive(thisRune.Value);
+ }
+
+ Debug.Assert(byteCountThisIteration >= 0, "Encoding shouldn't have returned a negative byte count.");
+
+ // We need to check for overflow while tallying the fallback byte count.
+
+ totalByteCount += byteCountThisIteration;
+ if (totalByteCount < 0)
+ {
+ InternalReset();
+ Encoding.ThrowConversionOverflow();
+ }
+ }
+
+ return totalByteCount;
+ }
+
+ private Rune GetNextRune()
+ {
+ char firstChar = GetNextChar();
+ if (Rune.TryCreate(firstChar, out Rune value) || Rune.TryCreate(firstChar, GetNextChar(), out value))
+ {
+ return value;
+ }
+
+ throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex);
+ }
+
// Fallback the current character using the remaining buffer and encoder if necessary
// This can only be called by our encodings (other have to use the public fallback methods), so
// we can use our EncoderNLS here too.
diff --git a/src/System.Private.CoreLib/shared/System/Text/EncoderNLS.cs b/src/System.Private.CoreLib/shared/System/Text/EncoderNLS.cs
index e83666f7a3..2901fc37b9 100644
--- a/src/System.Private.CoreLib/shared/System/Text/EncoderNLS.cs
+++ b/src/System.Private.CoreLib/shared/System/Text/EncoderNLS.cs
@@ -2,8 +2,8 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
-using System.Text;
-using System;
+using System.Buffers;
+using System.Diagnostics;
using System.Runtime.InteropServices;
namespace System.Text
@@ -197,9 +197,13 @@ namespace System.Text
bytesUsed = _encoding.GetBytes(chars, charCount, bytes, byteCount, this);
charsUsed = _charsUsed;
- // Its completed if they've used what they wanted AND if they didn't want flush or if we are flushed
- completed = (charsUsed == charCount) && (!flush || !this.HasState) &&
- (_fallbackBuffer == null || _fallbackBuffer.Remaining == 0);
+ // Per MSDN, "The completed output parameter indicates whether all the data in the input
+ // buffer was converted and stored in the output buffer." That means we've successfully
+ // consumed all the input _and_ there's no pending state or fallback data remaining to be output.
+
+ completed = (charsUsed == charCount)
+ && !this.HasState
+ && (_fallbackBuffer is null || _fallbackBuffer.Remaining == 0);
// Our data thingys are now full, we can return
}
@@ -220,6 +224,10 @@ namespace System.Text
}
}
+ /// <summary>
+ /// States whether a call to <see cref="Encoding.GetBytes(char*, int, byte*, int, EncoderNLS)"/> must first drain data on this <see cref="EncoderNLS"/> instance.
+ /// </summary>
+ internal bool HasLeftoverData => _charLeftOver != default || (_fallbackBuffer != null && _fallbackBuffer.Remaining > 0);
// Anything left in our encoder?
internal virtual bool HasState
@@ -235,5 +243,154 @@ namespace System.Text
{
_mustFlush = false;
}
+
+ internal int DrainLeftoverDataForGetByteCount(ReadOnlySpan<char> chars, out int charsConsumed)
+ {
+ // Quick check: we _should not_ have leftover fallback data from a previous invocation,
+ // as we'd end up consuming any such data and would corrupt whatever Convert call happens
+ // to be in progress.
+
+ if (_fallbackBuffer != null && _fallbackBuffer.Remaining > 0)
+ {
+ throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, Encoding.EncodingName, _fallbackBuffer.GetType()));
+ }
+
+ // If we have a leftover high surrogate from a previous operation, consume it now.
+ // We won't clear the _charLeftOver field since GetByteCount is supposed to be
+ // a non-mutating operation, and we need the field to retain its value for the
+ // next call to Convert.
+
+ charsConsumed = 0; // could be incorrect, will fix up later in the method
+
+ if (_charLeftOver == default)
+ {
+ return 0; // no leftover high surrogate char - short-circuit and finish
+ }
+ else
+ {
+ char secondChar = default;
+
+ if (chars.IsEmpty)
+ {
+ // If the input buffer is empty and we're not being asked to flush, no-op and return
+ // success to our caller. If we're being asked to flush, the leftover high surrogate from
+ // the previous operation will go through the fallback mechanism by itself.
+
+ if (!MustFlush)
+ {
+ return 0; // no-op = success
+ }
+ }
+ else
+ {
+ secondChar = chars[0];
+ }
+
+ // If we have to fallback the chars we're reading immediately below, populate the
+ // fallback buffer with the invalid data. We'll just fall through to the "consume
+ // fallback buffer" logic at the end of the method.
+
+ bool didFallback;
+
+ if (Rune.TryCreate(_charLeftOver, secondChar, out Rune rune))
+ {
+ charsConsumed = 1; // consumed the leftover high surrogate + the first char in the input buffer
+
+ if (_encoding.TryGetByteCount(rune, out int byteCount))
+ {
+ Debug.Assert(byteCount >= 0, "Encoding shouldn't have returned a negative byte count.");
+ return byteCount;
+ }
+ else
+ {
+ didFallback = FallbackBuffer.Fallback(_charLeftOver, secondChar, index: 0);
+ }
+ }
+ else
+ {
+ didFallback = FallbackBuffer.Fallback(_charLeftOver, index: 0);
+ }
+
+ // Now tally the number of bytes that would've been emitted as part of fallback.
+
+ return _fallbackBuffer.DrainRemainingDataForGetByteCount();
+ }
+ }
+
+ internal bool TryDrainLeftoverDataForGetBytes(ReadOnlySpan<char> chars, Span<byte> bytes, out int charsConsumed, out int bytesWritten)
+ {
+ // We may have a leftover high surrogate data from a previous invocation, or we may have leftover
+ // data in the fallback buffer, or we may have neither, but we will never have both. Check for these
+ // conditions and handle them now.
+
+ charsConsumed = 0; // could be incorrect, will fix up later in the method
+ bytesWritten = 0; // could be incorrect, will fix up later in the method
+
+ if (_charLeftOver != default)
+ {
+ char secondChar = default;
+
+ if (chars.IsEmpty)
+ {
+ // If the input buffer is empty and we're not being asked to flush, no-op and return
+ // success to our caller. If we're being asked to flush, the leftover high surrogate from
+ // the previous operation will go through the fallback mechanism by itself.
+
+ if (!MustFlush)
+ {
+ charsConsumed = 0;
+ bytesWritten = 0;
+ return true; // no-op = success
+ }
+ }
+ else
+ {
+ secondChar = chars[0];
+ }
+
+ // If we have to fallback the chars we're reading immediately below, populate the
+ // fallback buffer with the invalid data. We'll just fall through to the "consume
+ // fallback buffer" logic at the end of the method.
+
+ if (Rune.TryCreate(_charLeftOver, secondChar, out Rune rune))
+ {
+ charsConsumed = 1; // at the very least, we consumed 1 char from the input
+ switch (_encoding.EncodeRune(rune, bytes, out bytesWritten))
+ {
+ case OperationStatus.Done:
+ _charLeftOver = default; // we just consumed this char
+ return true; // that's all - we've handled the leftover data
+
+ case OperationStatus.DestinationTooSmall:
+ _charLeftOver = default; // we just consumed this char
+ _encoding.ThrowBytesOverflow(this, nothingEncoded: true); // will throw
+ break;
+
+ case OperationStatus.InvalidData:
+ FallbackBuffer.Fallback(_charLeftOver, secondChar, index: 0);
+ break;
+
+ default:
+ Debug.Fail("Unknown return value.");
+ break;
+ }
+ }
+ else
+ {
+ FallbackBuffer.Fallback(_charLeftOver, index: 0);
+ }
+ }
+
+ // Now check the fallback buffer for any remaining data.
+
+ if (_fallbackBuffer != null && _fallbackBuffer.Remaining > 0)
+ {
+ return _fallbackBuffer.TryDrainRemainingDataForGetBytes(bytes, out bytesWritten);
+ }
+
+ // And we're done!
+
+ return true; // success
+ }
}
}
diff --git a/src/System.Private.CoreLib/shared/System/Text/Encoding.Internal.cs b/src/System.Private.CoreLib/shared/System/Text/Encoding.Internal.cs
new file mode 100644
index 0000000000..09044afefe
--- /dev/null
+++ b/src/System.Private.CoreLib/shared/System/Text/Encoding.Internal.cs
@@ -0,0 +1,1277 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Buffers;
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using Internal.Runtime.CompilerServices;
+
+namespace System.Text
+{
+ public partial class Encoding
+ {
+ /*
+ * This file contains infrastructure code that supports a simplified way of writing
+ * internally-implemented Encoding types. In this system, the individual Encoding types
+ * are no longer responsible for handling anything related to the EncoderNLS / DecoderNLS
+ * infrastructure, nor are they responsible for implementing anything related to fallback
+ * buffers logic.
+ *
+ * Instead, subclassed types are responsible only for transcoding of individual scalar values
+ * to and from the encoding's byte representation (see the two methods immediately below).
+ * They can optionally implement fast-path logic to perform bulk transcoding up until the
+ * first segment of data that cannot be transcoded. They can special-case certain fallback
+ * mechanisms if desired.
+ *
+ * Most of the fast-path code is written using raw pointers as the exchange types, just as
+ * in the standard Encoding infrastructure. Since the fallback logic is more complex, most
+ * of it is written using type-safe constructs like Span<T>, with some amount of glue to
+ * allow it to work correctly with pointer-based fast-path code.
+ *
+ * A typical call graph for GetBytes is represented below, using ASCIIEncoding as an example.
+ *
+ * ASCIIEncoding.GetBytes(...) [non-EncoderNLS path, public virtual override]
+ * `- <parameter validation>
+ * - ASCIIEncoding.GetBytesCommon [private helper method per derived type, inlined]
+ * `- ASCIIEncoding.GetBytesFast [overridden fast-path implementation, inlined]
+ * - <if all data transcoded, return immediately>
+ * - <if all data not transcoded...>
+ * `- Encoding.GetBytesWithFallback [non-virtual stub method to call main GetBytesWithFallback worker]
+ * `- Encoding.GetBytesWithFallback [virtual method whose base implementation contains slow fallback logic]
+ * `- <may be overridden to provide optimized fallback logic>
+ * - <create EncodeFallbackBuffer instance>
+ * - <perform the following in a loop:>
+ * `- <invoke fast-path logic via virtual method dispatch on derived type>
+ * - <read next "bad" scalar value from source>
+ * - <run this bad value through the fallback buffer>
+ * - <drain the fallback buffer to the destination>
+ * - <loop until source is fully consumed or destination is full>
+ * - <signal full or partial success to EncoderNLS instance / throw if necessary>
+ *
+ * The call graph for GetBytes(..., EncoderNLS) is similar:
+ *
+ * Encoding.GetBytes(..., EncoderNLS) [base implementation]
+ * `- <if no leftover data from previous invocation, invoke fast-path>
+ * - <if fast-path invocation above completed, return immediately>
+ * - <if not all data transcoded, or if there was leftover data from previous invocation...>
+ * `- Encoding.GetBytesWithFallback [non-virtual stub method]
+ * `- <drain any leftover data from previous invocation>
+ * - <invoke fast-path again>
+ * - <if all data transcoded, return immediately>
+ * - <if all data not transcoded...>
+ * `- Encoding.GetBytesWithFallback [virtual method as described above]
+ *
+ * There are different considerations in each call graph for things like error handling,
+ * since the error conditions will be different depending on whether or not an EncoderNLS
+ * instance is available and what values its properties have.
+ */
+
+ /*
+ * THESE TWO METHODS MUST BE OVERRIDDEN BY A SUBCLASSED TYPE
+ */
+
+ internal virtual OperationStatus DecodeFirstRune(ReadOnlySpan<byte> bytes, out Rune value, out int bytesConsumed)
+ {
+ Debug.Fail("This should be overridden by a subclassed type.");
+ throw NotImplemented.ByDesign;
+ }
+
+ internal virtual OperationStatus EncodeRune(Rune value, Span<byte> bytes, out int bytesWritten)
+ {
+ Debug.Fail("This should be overridden by a subclassed type.");
+ throw NotImplemented.ByDesign;
+ }
+
+ /*
+ * ALL OTHER LOGIC CAN BE IMPLEMENTED IN TERMS OF THE TWO METHODS ABOVE.
+ * FOR IMPROVED PERFORMANCE, SUBCLASSED TYPES MAY WANT TO OVERRIDE ONE OR MORE VIRTUAL METHODS BELOW.
+ */
+
+ /*
+ * GETBYTECOUNT FAMILY OF FUNCTIONS
+ */
+
+ /// <summary>
+ /// Given a <see cref="Rune"/>, determines its byte count under the current <see cref="Encoding"/>.
+ /// Returns <see langword="false"/> if the <see cref="Rune"/> cannot be represented in the
+ /// current <see cref="Encoding"/>.
+ /// </summary>
+ internal virtual bool TryGetByteCount(Rune value, out int byteCount)
+ {
+ // Any production-quality type would override this method and provide a real
+ // implementation, so we won't provide a base implementation. However, a
+ // non-shipping slow reference implementation is provided below for convenience.
+
+#if false
+ Span<byte> bytes = stackalloc byte[4]; // max 4 bytes per input scalar
+
+ OperationStatus opStatus = EncodeRune(value, bytes, out byteCount);
+ Debug.Assert(opStatus == OperationStatus.Done || opStatus == OperationStatus.InvalidData, "Unexpected return value.");
+
+ return (opStatus == OperationStatus.Done);
+#else
+ Debug.Fail("This should be overridden by a subclassed type.");
+ throw NotImplemented.ByDesign;
+#endif
+ }
+
+ /// <summary>
+ /// Entry point from <see cref="EncoderNLS.GetByteCount"/>.
+ /// </summary>
+ internal virtual unsafe int GetByteCount(char* pChars, int charCount, EncoderNLS encoder)
+ {
+ Debug.Assert(encoder != null, "This code path should only be called from EncoderNLS.");
+ Debug.Assert(charCount >= 0, "Caller should've checked this condition.");
+ Debug.Assert(pChars != null || charCount == 0, "Cannot provide a null pointer and a non-zero count.");
+
+ // We're going to try to stay on the fast-path as much as we can. That means that we have
+ // no leftover data to drain and the entire source buffer can be consumed in a single
+ // fast-path invocation. If either of these doesn't hold, we'll go down the slow path of
+ // creating spans, draining the EncoderNLS instance, and falling back.
+
+ int totalByteCount = 0;
+ int charsConsumed = 0;
+
+ if (!encoder.HasLeftoverData)
+ {
+ totalByteCount = GetByteCountFast(pChars, charCount, encoder.Fallback, out charsConsumed);
+ if (charsConsumed == charCount)
+ {
+ return totalByteCount;
+ }
+ }
+
+ // We had leftover data, or we couldn't consume the entire input buffer.
+ // Let's go down the draining + fallback mechanisms.
+
+ totalByteCount += GetByteCountWithFallback(pChars, charCount, charsConsumed, encoder);
+ if (totalByteCount < 0)
+ {
+ ThrowConversionOverflow();
+ }
+
+ return totalByteCount;
+ }
+
+ /// <summary>
+ /// Counts the number of <see langword="byte"/>s that would result from transcoding the source
+ /// data, exiting when the source buffer is consumed or when the first unreadable data is encountered.
+ /// The implementation may inspect <paramref name="fallback"/> to short-circuit any counting
+ /// operation, but it should not attempt to call <see cref="EncoderFallback.CreateFallbackBuffer"/>.
+ /// </summary>
+ /// <returns>
+ /// Via <paramref name="charsConsumed"/>, the number of elements from <paramref name="pChars"/> which
+ /// were consumed; and returns the transcoded byte count up to this point.
+ /// </returns>
+ /// <exception cref="ArgumentException">
+ /// If the byte count would be greater than <see cref="int.MaxValue"/>.
+ /// (Implementation should call <see cref="ThrowConversionOverflow"/>.)
+ /// </exception>
+ /// <remarks>
+ /// The implementation should not attempt to perform any sort of fallback behavior.
+ /// If custom fallback behavior is necessary, override <see cref="GetByteCountWithFallback"/>.
+ /// </remarks>
+ private protected virtual unsafe int GetByteCountFast(char* pChars, int charsLength, EncoderFallback fallback, out int charsConsumed)
+ {
+ // Any production-quality type would override this method and provide a real
+ // implementation, so we won't provide a base implementation. However, a
+ // non-shipping slow reference implementation is provided below for convenience.
+
+#if false
+ ReadOnlySpan<char> chars = new ReadOnlySpan<char>(pChars, charsLength);
+ int totalByteCount = 0;
+
+ while (!chars.IsEmpty)
+ {
+ if (Rune.DecodeUtf16(chars, out Rune scalarValue, out int charsConsumedThisIteration) != OperationStatus.Done
+ || !TryGetByteCount(scalarValue, out int byteCountThisIteration))
+ {
+ // Invalid UTF-16 data, or not convertible to target encoding
+
+ break;
+ }
+
+ chars = chars.Slice(charsConsumedThisIteration);
+
+ totalByteCount += byteCountThisIteration;
+ if (totalByteCount < 0)
+ {
+ ThrowConversionOverflow();
+ }
+ }
+
+ charsConsumed = charsLength - chars.Length; // number of chars consumed across all loop iterations above
+ return totalByteCount;
+#else
+ Debug.Fail("This should be overridden by a subclassed type.");
+ throw NotImplemented.ByDesign;
+#endif
+ }
+
+ /// <summary>
+ /// Counts the number of bytes that would result from transcoding the provided chars,
+ /// with no associated <see cref="EncoderNLS"/>. The first two arguments are based on the
+ /// original input before invoking this method; and <paramref name="charsConsumedSoFar"/>
+ /// signals where in the provided buffer the fallback loop should begin operating.
+ /// </summary>
+ /// <returns>
+ /// The byte count resulting from transcoding the input data.
+ /// </returns>
+ /// <exception cref="ArgumentException">
+ /// If the resulting byte count is greater than <see cref="int.MaxValue"/>.
+ /// (Implementation should call <see cref="ThrowConversionOverflow"/>.)
+ /// </exception>
+ [MethodImpl(MethodImplOptions.NoInlining)] // don't stack spill spans into our caller
+ private protected unsafe int GetByteCountWithFallback(char* pCharsOriginal, int originalCharCount, int charsConsumedSoFar)
+ {
+ // This is a stub method that's marked "no-inlining" so that it we don't stack-spill spans
+ // into our immediate caller. Doing so increases the method prolog in what's supposed to
+ // be a very fast path.
+
+ Debug.Assert(0 <= charsConsumedSoFar && charsConsumedSoFar < originalCharCount, "Invalid arguments provided to method.");
+
+ return GetByteCountWithFallback(
+ chars: new ReadOnlySpan<char>(pCharsOriginal, originalCharCount).Slice(charsConsumedSoFar),
+ originalCharsLength: originalCharCount,
+ encoder: null);
+ }
+
+ /// <summary>
+ /// Gets the number of <see langword="byte"/>s that would result from transcoding the provided
+ /// input data, with an associated <see cref="EncoderNLS"/>. The first two arguments are
+ /// based on the original input before invoking this method; and <paramref name="charsConsumedSoFar"/>
+ /// signals where in the provided source buffer the fallback loop should begin operating.
+ /// The behavior of this method is to consume (non-destructively) any leftover data in the
+ /// <see cref="EncoderNLS"/> instance, then to invoke the <see cref="GetByteCountFast"/> virtual method
+ /// after data has been drained, then to call <see cref="GetByteCountWithFallback(ReadOnlySpan{char}, int, EncoderNLS)"/>.
+ /// </summary>
+ /// <returns>
+ /// The total number of bytes that would result from transcoding the remaining portion of the source buffer.
+ /// </returns>
+ /// <exception cref="ArgumentException">
+ /// If the return value would exceed <see cref="int.MaxValue"/>.
+ /// (The implementation should call <see cref="ThrowConversionOverflow"/>.)
+ /// </exception>
+ private unsafe int GetByteCountWithFallback(char* pOriginalChars, int originalCharCount, int charsConsumedSoFar, EncoderNLS encoder)
+ {
+ Debug.Assert(encoder != null, "This code path should only be called from EncoderNLS.");
+ Debug.Assert(0 <= charsConsumedSoFar && charsConsumedSoFar < originalCharCount, "Caller should've checked this condition.");
+
+ // First, try draining any data that already exists on the encoder instance. If we can't complete
+ // that operation, there's no point to continuing down to the main workhorse methods.
+
+ ReadOnlySpan<char> chars = new ReadOnlySpan<char>(pOriginalChars, originalCharCount).Slice(charsConsumedSoFar);
+
+ int totalByteCount = encoder.DrainLeftoverDataForGetByteCount(chars, out int charsConsumedJustNow);
+ chars = chars.Slice(charsConsumedJustNow);
+
+ // Now try invoking the "fast path" (no fallback) implementation.
+ // We can use Unsafe.AsPointer here since these spans are created from pinned data (raw pointers).
+
+ totalByteCount += GetByteCountFast(
+ pChars: (char*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(chars)),
+ charsLength: chars.Length,
+ fallback: encoder.Fallback,
+ charsConsumed: out charsConsumedJustNow);
+
+ if (totalByteCount < 0)
+ {
+ ThrowConversionOverflow();
+ }
+
+ chars = chars.Slice(charsConsumedJustNow);
+
+ // If there's still data remaining in the source buffer, go down the fallback path.
+ // Otherwise we're finished.
+
+ if (!chars.IsEmpty)
+ {
+ totalByteCount += GetByteCountWithFallback(chars, originalCharCount, encoder);
+ if (totalByteCount < 0)
+ {
+ ThrowConversionOverflow();
+ }
+ }
+
+ return totalByteCount;
+ }
+
+ /// <summary>
+ /// Counts the number of bytes that would result from transcoding the provided chars,
+ /// using the provided <see cref="EncoderFallbackBuffer"/> if necessary.
+ /// </summary>
+ /// <returns>
+ /// The byte count resulting from transcoding the input data.
+ /// </returns>
+ /// <exception cref="ArgumentException">
+ /// If the resulting byte count is greater than <see cref="int.MaxValue"/>.
+ /// (Implementation should call <see cref="ThrowConversionOverflow"/>.)
+ /// </exception>
+ private protected virtual unsafe int GetByteCountWithFallback(ReadOnlySpan<char> chars, int originalCharsLength, EncoderNLS encoder)
+ {
+ Debug.Assert(!chars.IsEmpty, "Caller shouldn't invoke this method with an empty input buffer.");
+ Debug.Assert(originalCharsLength >= 0, "Caller provided invalid parameter.");
+
+ // Since we're using Unsafe.AsPointer in our central loop, we want to ensure everything is pinned.
+
+ fixed (char* _pChars_Unused = &MemoryMarshal.GetReference(chars))
+ {
+ EncoderFallbackBuffer fallbackBuffer = EncoderFallbackBuffer.CreateAndInitialize(this, encoder, originalCharsLength);
+ int totalByteCount = 0;
+
+ do
+ {
+ // There's still data in the source buffer; why wasn't the previous fast-path able to consume it fully?
+ // There are two scenarios: (a) the source buffer contained invalid / incomplete UTF-16 data;
+ // or (b) the encoding can't translate this scalar value.
+
+ if (Rune.DecodeUtf16(chars, out Rune firstScalarValue, out int charsConsumedThisIteration) == OperationStatus.NeedMoreData
+ && encoder != null
+ && !encoder.MustFlush)
+ {
+ // We saw a standalone high surrogate at the end of the buffer, and the
+ // active EncoderNLS instance isn't asking us to flush. Since a call to
+ // GetBytes would've consumed this char by storing it in EncoderNLS._charLeftOver,
+ // we'll "consume" it by ignoring it. The next call to GetBytes will
+ // pick it up correctly.
+
+ goto Finish;
+ }
+
+ // We saw invalid UTF-16 data, or we saw a high surrogate that we need to flush (and
+ // thus treat as invalid), or we saw valid UTF-16 data that this encoder doesn't support.
+ // In any case we'll run it through the fallback mechanism.
+
+ int byteCountThisIteration = fallbackBuffer.InternalFallbackGetByteCount(chars, out charsConsumedThisIteration);
+
+ Debug.Assert(byteCountThisIteration >= 0, "Fallback shouldn't have returned a negative value.");
+ Debug.Assert(charsConsumedThisIteration >= 0, "Fallback shouldn't have returned a negative value.");
+
+ totalByteCount += byteCountThisIteration;
+ if (totalByteCount < 0)
+ {
+ ThrowConversionOverflow();
+ }
+
+ chars = chars.Slice(charsConsumedThisIteration);
+
+ if (!chars.IsEmpty)
+ {
+ // Still data remaining - run it through the fast-path to find the next data to fallback.
+ // While building up the tally we need to continually check for integer overflow
+ // since fallbacks can change the total byte count in unexpected ways.
+
+ byteCountThisIteration = GetByteCountFast(
+ pChars: (char*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(chars)),
+ charsLength: chars.Length,
+ fallback: null, // already tried this earlier and we still fell down the common path, so skip from now on
+ charsConsumed: out charsConsumedThisIteration);
+
+ Debug.Assert(byteCountThisIteration >= 0, "Workhorse shouldn't have returned a negative value.");
+ Debug.Assert(charsConsumedThisIteration >= 0, "Workhorse shouldn't have returned a negative value.");
+
+ totalByteCount += byteCountThisIteration;
+ if (totalByteCount < 0)
+ {
+ ThrowConversionOverflow();
+ }
+
+ chars = chars.Slice(charsConsumedThisIteration);
+ }
+ } while (!chars.IsEmpty);
+
+ Finish:
+
+ Debug.Assert(fallbackBuffer.Remaining == 0, "There should be no data in the fallback buffer after GetByteCount.");
+
+ return totalByteCount;
+ }
+ }
+
+ /*
+ * GETBYTES FAMILY OF FUNCTIONS
+ */
+
+ /// <summary>
+ /// Entry point from <see cref="EncoderNLS.GetBytes"/> and <see cref="EncoderNLS.Convert"/>.
+ /// </summary>
+ internal virtual unsafe int GetBytes(char* pChars, int charCount, byte* pBytes, int byteCount, EncoderNLS encoder)
+ {
+ Debug.Assert(encoder != null, "This code path should only be called from EncoderNLS.");
+ Debug.Assert(charCount >= 0, "Caller should've checked this condition.");
+ Debug.Assert(pChars != null || charCount == 0, "Cannot provide a null pointer and a non-zero count.");
+ Debug.Assert(byteCount >= 0, "Caller should've checked this condition.");
+ Debug.Assert(pBytes != null || byteCount == 0, "Cannot provide a null pointer and a non-zero count.");
+
+ // We're going to try to stay on the fast-path as much as we can. That means that we have
+ // no leftover data to drain and the entire source buffer can be transcoded in a single
+ // fast-path invocation. If either of these doesn't hold, we'll go down the slow path of
+ // creating spans, draining the EncoderNLS instance, and falling back.
+
+ int bytesWritten = 0;
+ int charsConsumed = 0;
+
+ if (!encoder.HasLeftoverData)
+ {
+ bytesWritten = GetBytesFast(pChars, charCount, pBytes, byteCount, out charsConsumed);
+ if (charsConsumed == charCount)
+ {
+ encoder._charsUsed = charCount;
+ return bytesWritten;
+ }
+ }
+
+ // We had leftover data, or we couldn't consume the entire input buffer.
+ // Let's go down the draining + fallback mechanisms.
+
+ return GetBytesWithFallback(pChars, charCount, pBytes, byteCount, charsConsumed, bytesWritten, encoder);
+ }
+
+ /// <summary>
+ /// Transcodes <see langword="char"/>s to <see langword="byte"/>s, exiting when the source or destination
+ /// buffer is consumed or when the first unreadable data is encountered.
+ /// </summary>
+ /// <returns>
+ /// Via <paramref name="charsConsumed"/>, the number of elements from <paramref name="pChars"/> which
+ /// were consumed; and returns the number of elements written to <paramref name="pBytes"/>.
+ /// </returns>
+ /// <remarks>
+ /// The implementation should not attempt to perform any sort of fallback behavior.
+ /// If custom fallback behavior is necessary, override <see cref="GetBytesWithFallback"/>.
+ /// </remarks>
+ private protected virtual unsafe int GetBytesFast(char* pChars, int charsLength, byte* pBytes, int bytesLength, out int charsConsumed)
+ {
+ // Any production-quality type would override this method and provide a real
+ // implementation, so we won't provide a base implementation. However, a
+ // non-shipping slow reference implementation is provided below for convenience.
+
+#if false
+ ReadOnlySpan<char> chars = new ReadOnlySpan<char>(pChars, charsLength);
+ Span<byte> bytes = new Span<byte>(pBytes, bytesLength);
+
+ while (!chars.IsEmpty)
+ {
+ if (Rune.DecodeUtf16(chars, out Rune scalarValue, out int charsConsumedJustNow) != OperationStatus.Done
+ || EncodeRune(scalarValue, bytes, out int bytesWrittenJustNow) != OperationStatus.Done)
+ {
+ // Invalid UTF-16 data, or not convertible to target encoding, or destination buffer too small to contain encoded value
+
+ break;
+ }
+
+ chars = chars.Slice(charsConsumedJustNow);
+ bytes = bytes.Slice(bytesWrittenJustNow);
+ }
+
+ charsConsumed = charsLength - chars.Length; // number of chars consumed across all loop iterations above
+ return bytesLength - bytes.Length; // number of bytes written across all loop iterations above
+#else
+ Debug.Fail("This should be overridden by a subclassed type.");
+ throw NotImplemented.ByDesign;
+#endif
+ }
+
+ /// <summary>
+ /// Transcodes chars to bytes, with no associated <see cref="EncoderNLS"/>. The first four arguments are
+ /// based on the original input before invoking this method; and <paramref name="charsConsumedSoFar"/>
+ /// and <paramref name="bytesWrittenSoFar"/> signal where in the provided buffers the fallback loop
+ /// should begin operating. The behavior of this method is to call the <see cref="GetBytesWithFallback"/>
+ /// virtual method as overridden by the specific type, and failing that go down the shared fallback path.
+ /// </summary>
+ /// <returns>
+ /// The total number of bytes written to <paramref name="pOriginalBytes"/>, including <paramref name="bytesWrittenSoFar"/>.
+ /// </returns>
+ /// <exception cref="ArgumentException">
+ /// If the destination buffer is not large enough to hold the entirety of the transcoded data.
+ /// </exception>
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ private protected unsafe int GetBytesWithFallback(char* pOriginalChars, int originalCharCount, byte* pOriginalBytes, int originalByteCount, int charsConsumedSoFar, int bytesWrittenSoFar)
+ {
+ // This is a stub method that's marked "no-inlining" so that it we don't stack-spill spans
+ // into our immediate caller. Doing so increases the method prolog in what's supposed to
+ // be a very fast path.
+
+ Debug.Assert(0 <= charsConsumedSoFar && charsConsumedSoFar < originalCharCount, "Invalid arguments provided to method.");
+ Debug.Assert(0 <= bytesWrittenSoFar && bytesWrittenSoFar <= originalByteCount, "Invalid arguments provided to method.");
+
+ return GetBytesWithFallback(
+ chars: new ReadOnlySpan<char>(pOriginalChars, originalCharCount).Slice(charsConsumedSoFar),
+ originalCharsLength: originalCharCount,
+ bytes: new Span<byte>(pOriginalBytes, originalByteCount).Slice(bytesWrittenSoFar),
+ originalBytesLength: originalByteCount,
+ encoder: null);
+ }
+
+ /// <summary>
+ /// Transcodes chars to bytes, with an associated <see cref="EncoderNLS"/>. The first four arguments are
+ /// based on the original input before invoking this method; and <paramref name="charsConsumedSoFar"/>
+ /// and <paramref name="bytesWrittenSoFar"/> signal where in the provided buffers the fallback loop
+ /// should begin operating. The behavior of this method is to drain any leftover data in the
+ /// <see cref="EncoderNLS"/> instance, then to invoke the <see cref="GetBytesFast"/> virtual method
+ /// after data has been drained, then to call <see cref="GetBytesWithFallback(ReadOnlySpan{char}, int, Span{byte}, int, EncoderNLS)"/>.
+ /// </summary>
+ /// <returns>
+ /// The total number of bytes written to <paramref name="pOriginalBytes"/>, including <paramref name="bytesWrittenSoFar"/>.
+ /// </returns>
+ /// <exception cref="ArgumentException">
+ /// If the destination buffer is too small to make any forward progress at all, or if the destination buffer is
+ /// too small to contain the entirety of the transcoded data and the <see cref="EncoderNLS"/> instance disallows
+ /// partial transcoding.
+ /// </exception>
+ private unsafe int GetBytesWithFallback(char* pOriginalChars, int originalCharCount, byte* pOriginalBytes, int originalByteCount, int charsConsumedSoFar, int bytesWrittenSoFar, EncoderNLS encoder)
+ {
+ Debug.Assert(encoder != null, "This code path should only be called from EncoderNLS.");
+ Debug.Assert(0 <= charsConsumedSoFar && charsConsumedSoFar < originalCharCount, "Caller should've checked this condition.");
+ Debug.Assert(0 <= bytesWrittenSoFar && bytesWrittenSoFar <= originalByteCount, "Caller should've checked this condition.");
+
+ // First, try draining any data that already exists on the encoder instance. If we can't complete
+ // that operation, there's no point to continuing down to the main workhorse methods.
+
+ ReadOnlySpan<char> chars = new ReadOnlySpan<char>(pOriginalChars, originalCharCount).Slice(charsConsumedSoFar);
+ Span<byte> bytes = new Span<byte>(pOriginalBytes, originalByteCount).Slice(bytesWrittenSoFar);
+
+ bool drainFinishedSuccessfully = encoder.TryDrainLeftoverDataForGetBytes(chars, bytes, out int charsConsumedJustNow, out int bytesWrittenJustNow);
+
+ chars = chars.Slice(charsConsumedJustNow); // whether or not the drain finished, we may have made some progress
+ bytes = bytes.Slice(bytesWrittenJustNow);
+
+ if (!drainFinishedSuccessfully)
+ {
+ ThrowBytesOverflow(encoder, nothingEncoded: bytes.Length == originalByteCount); // might not throw if we wrote at least one byte
+ }
+ else
+ {
+ // Now try invoking the "fast path" (no fallback) implementation.
+ // We can use Unsafe.AsPointer here since these spans are created from pinned data (raw pointers).
+
+ bytesWrittenJustNow = GetBytesFast(
+ pChars: (char*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(chars)),
+ charsLength: chars.Length,
+ pBytes: (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(bytes)),
+ bytesLength: bytes.Length,
+ charsConsumed: out charsConsumedJustNow);
+
+ chars = chars.Slice(charsConsumedJustNow);
+ bytes = bytes.Slice(bytesWrittenJustNow);
+
+ // If there's still data remaining in the source buffer, go down the fallback path.
+ // Otherwise we're finished.
+
+ if (!chars.IsEmpty)
+ {
+ // We'll optimistically tell the encoder that we're using everything; the
+ // GetBytesWithFallback method will overwrite this field if necessary.
+
+ encoder._charsUsed = originalCharCount;
+ return GetBytesWithFallback(chars, originalCharCount, bytes, originalByteCount, encoder);
+ }
+ }
+
+ encoder._charsUsed = originalCharCount - chars.Length; // total number of characters consumed up until now
+ return originalByteCount - bytes.Length; // total number of bytes written up until now
+ }
+
+ /// <summary>
+ /// Transcodes chars to bytes, using <see cref="Encoding.EncoderFallback"/> or <see cref="Encoder.Fallback"/> if needed.
+ /// </summary>
+ /// <returns>
+ /// The total number of bytes written to <paramref name="bytes"/> (based on <paramref name="originalBytesLength"/>).
+ /// </returns>
+ /// <remarks>
+ /// The derived class should override this method if it might be able to provide a more optimized fallback
+ /// implementation, deferring to the base implementation if needed. This method calls <see cref="ThrowBytesOverflow"/>
+ /// if necessary.
+ /// </remarks>
+ private protected virtual unsafe int GetBytesWithFallback(ReadOnlySpan<char> chars, int originalCharsLength, Span<byte> bytes, int originalBytesLength, EncoderNLS encoder)
+ {
+ Debug.Assert(!chars.IsEmpty, "Caller shouldn't invoke this method with an empty input buffer.");
+ Debug.Assert(originalCharsLength >= 0, "Caller provided invalid parameter.");
+ Debug.Assert(originalBytesLength >= 0, "Caller provided invalid parameter.");
+
+ // Since we're using Unsafe.AsPointer in our central loop, we want to ensure everything is pinned.
+
+ fixed (char* _pChars_Unused = &MemoryMarshal.GetReference(chars))
+ fixed (byte* _pBytes_Unused = &MemoryMarshal.GetReference(bytes))
+ {
+ EncoderFallbackBuffer fallbackBuffer = EncoderFallbackBuffer.CreateAndInitialize(this, encoder, originalCharsLength);
+
+ do
+ {
+ // There's still data in the source buffer; why wasn't the previous fast-path able to consume it fully?
+ // There are two scenarios: (a) the source buffer contained invalid / incomplete UTF-16 data;
+ // or (b) the encoding can't translate this scalar value.
+
+ switch (Rune.DecodeUtf16(chars, out Rune firstScalarValue, out int charsConsumedThisIteration))
+ {
+ case OperationStatus.NeedMoreData:
+ Debug.Assert(charsConsumedThisIteration == chars.Length, "If returning NeedMoreData, should out the entire buffer length as chars consumed.");
+ if (encoder is null || encoder.MustFlush)
+ {
+ goto case OperationStatus.InvalidData; // see comment in GetByteCountWithFallback
+ }
+ else
+ {
+ encoder._charLeftOver = chars[0]; // squirrel away remaining high surrogate char and finish
+ chars = ReadOnlySpan<char>.Empty;
+ goto Finish;
+ }
+
+ case OperationStatus.InvalidData:
+ break;
+
+ default:
+ if (EncodeRune(firstScalarValue, bytes, out _) == OperationStatus.DestinationTooSmall)
+ {
+ goto Finish; // source buffer contained valid UTF-16 but encoder ran out of space in destination buffer
+ }
+ break; // source buffer contained valid UTF-16 but encoder doesn't support this scalar value
+ }
+
+ // Now we know the reason for failure was that the original input was invalid
+ // for the encoding in use. Run it through the fallback mechanism.
+
+ bool fallbackFinished = fallbackBuffer.TryInternalFallbackGetBytes(chars, bytes, out charsConsumedThisIteration, out int bytesWrittenThisIteration);
+
+ // Regardless of whether the fallback finished, it did consume some number of
+ // chars, and it may have written some number of bytes.
+
+ chars = chars.Slice(charsConsumedThisIteration);
+ bytes = bytes.Slice(bytesWrittenThisIteration);
+
+ if (!fallbackFinished)
+ {
+ goto Finish; // fallback has pending state - it'll get written out on the next GetBytes call
+ }
+
+ if (!chars.IsEmpty)
+ {
+ // Still data remaining - run it through the fast-path to find the next data to fallback.
+
+ bytesWrittenThisIteration = GetBytesFast(
+ pChars: (char*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(chars)),
+ charsLength: chars.Length,
+ pBytes: (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(bytes)),
+ bytesLength: bytes.Length,
+ charsConsumed: out charsConsumedThisIteration);
+
+ Debug.Assert(bytesWrittenThisIteration >= 0, "Workhorse shouldn't have returned a negative value.");
+ Debug.Assert(charsConsumedThisIteration >= 0, "Workhorse shouldn't have returned a negative value.");
+
+ chars = chars.Slice(charsConsumedThisIteration);
+ bytes = bytes.Slice(bytesWrittenThisIteration);
+ }
+ } while (!chars.IsEmpty);
+
+ Finish:
+
+ // We reach this point when we deplete the source or destination buffer. There are a few
+ // cases to consider now. If the source buffer has been fully consumed and there's no
+ // leftover data in the EncoderNLS or the fallback buffer, we've completed transcoding.
+ // If the source buffer isn't empty or there's leftover data in the fallback buffer,
+ // it means we ran out of space in the destintion buffer. This is an unrecoverable error
+ // if no EncoderNLS is in use (because only EncoderNLS can handle partial success), and
+ // even if an EncoderNLS is in use this is only recoverable if the EncoderNLS instance
+ // allows partial completion. Let's check all of these conditions now.
+
+ if (!chars.IsEmpty || fallbackBuffer.Remaining > 0)
+ {
+ // The line below will also throw if the encoder couldn't make any progress at all
+ // because the output buffer wasn't large enough to contain the result of even
+ // a single scalar conversion or fallback.
+
+ ThrowBytesOverflow(encoder, nothingEncoded: bytes.Length == originalBytesLength);
+ }
+
+ // If an EncoderNLS instance is active, update its "total consumed character count" value.
+
+ if (encoder != null)
+ {
+ Debug.Assert(originalCharsLength >= chars.Length, "About to report a negative number of chars used?");
+ encoder._charsUsed = originalCharsLength - chars.Length; // number of chars consumed
+ }
+
+ Debug.Assert(fallbackBuffer.Remaining == 0 || encoder != null, "Shouldn't have any leftover data in fallback buffer unless an EncoderNLS is in use.");
+
+ return originalBytesLength - bytes.Length;
+ }
+ }
+
+ /*
+ * GETCHARCOUNT FAMILY OF FUNCTIONS
+ */
+
+ /// <summary>
+ /// Entry point from <see cref="DecoderNLS.GetCharCount"/>.
+ /// </summary>
+ internal virtual unsafe int GetCharCount(byte* pBytes, int byteCount, DecoderNLS decoder)
+ {
+ Debug.Assert(decoder != null, "This code path should only be called from DecoderNLS.");
+ Debug.Assert(byteCount >= 0, "Caller should've checked this condition.");
+ Debug.Assert(pBytes != null || byteCount == 0, "Cannot provide a null pointer and a non-zero count.");
+
+ // We're going to try to stay on the fast-path as much as we can. That means that we have
+ // no leftover data to drain and the entire source buffer can be consumed in a single
+ // fast-path invocation. If either of these doesn't hold, we'll go down the slow path of
+ // creating spans, draining the DecoderNLS instance, and falling back.
+
+ Debug.Assert(!decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0, "Fallback buffer can't hold data between GetChars invocations.");
+
+ int totalCharCount = 0;
+ int bytesConsumed = 0;
+
+ if (!decoder.HasLeftoverData)
+ {
+ totalCharCount = GetCharCountFast(pBytes, byteCount, decoder.Fallback, out bytesConsumed);
+ if (bytesConsumed == byteCount)
+ {
+ return totalCharCount;
+ }
+ }
+
+ // We had leftover data, or we couldn't consume the entire input buffer.
+ // Let's go down the draining + fallback mechanisms.
+
+ totalCharCount += GetCharCountWithFallback(pBytes, byteCount, bytesConsumed, decoder);
+ if (totalCharCount < 0)
+ {
+ ThrowConversionOverflow();
+ }
+
+ return totalCharCount;
+ }
+
+ /// <summary>
+ /// Counts the number of <see langword="char"/>s that would result from transcoding the source
+ /// data, exiting when the source buffer is consumed or when the first unreadable data is encountered.
+ /// The implementation may inspect <paramref name="fallback"/> to short-circuit any counting
+ /// operation, but it should not attempt to call <see cref="DecoderFallback.CreateFallbackBuffer"/>.
+ /// </summary>
+ /// <returns>
+ /// Via <paramref name="bytesConsumed"/>, the number of elements from <paramref name="pBytes"/> which
+ /// were consumed; and returns the transcoded char count up to this point.
+ /// </returns>
+ /// <exception cref="ArgumentException">
+ /// If the char count would be greater than <see cref="int.MaxValue"/>.
+ /// (Implementation should call <see cref="ThrowConversionOverflow"/>.)
+ /// </exception>
+ /// <remarks>
+ /// The implementation should not attempt to perform any sort of fallback behavior.
+ /// If custom fallback behavior is necessary, override <see cref="GetCharCountWithFallback"/>.
+ /// </remarks>
+ private protected virtual unsafe int GetCharCountFast(byte* pBytes, int bytesLength, DecoderFallback fallback, out int bytesConsumed)
+ {
+ // Any production-quality type would override this method and provide a real
+ // implementation, so we won't provide a base implementation. However, a
+ // non-shipping slow reference implementation is provided below for convenience.
+
+#if false
+ ReadOnlySpan<byte> bytes = new ReadOnlySpan<byte>(pBytes, bytesLength);
+ int totalCharCount = 0;
+
+ while (!bytes.IsEmpty)
+ {
+ // We don't care about statuses other than Done. The fallback mechanism will handle those.
+
+ if (DecodeFirstRune(bytes, out Rune value, out int bytesConsumedJustNow) != OperationStatus.Done)
+ {
+ break;
+ }
+
+ totalCharCount += value.Utf16SequenceLength;
+ if (totalCharCount < 0)
+ {
+ ThrowConversionOverflow();
+ }
+
+ bytes = bytes.Slice(bytesConsumedJustNow);
+ }
+
+ bytesConsumed = bytesLength - bytes.Length; // number of bytes consumed across all loop iterations above
+ return totalCharCount;
+#else
+ Debug.Fail("This should be overridden by a subclassed type.");
+ throw NotImplemented.ByDesign;
+#endif
+ }
+
+ /// <summary>
+ /// Counts the number of chars that would result from transcoding the provided bytes,
+ /// with no associated <see cref="DecoderNLS"/>. The first two arguments are based on the
+ /// original input before invoking this method; and <paramref name="bytesConsumedSoFar"/>
+ /// signals where in the provided buffer the fallback loop should begin operating.
+ /// </summary>
+ /// <returns>
+ /// The char count resulting from transcoding the input data.
+ /// </returns>
+ /// <exception cref="ArgumentException">
+ /// If the resulting char count is greater than <see cref="int.MaxValue"/>.
+ /// (Implementation should call <see cref="ThrowConversionOverflow"/>.)
+ /// </exception>
+ [MethodImpl(MethodImplOptions.NoInlining)] // don't stack spill spans into our caller
+ private protected unsafe int GetCharCountWithFallback(byte* pBytesOriginal, int originalByteCount, int bytesConsumedSoFar)
+ {
+ // This is a stub method that's marked "no-inlining" so that it we don't stack-spill spans
+ // into our immediate caller. Doing so increases the method prolog in what's supposed to
+ // be a very fast path.
+
+ Debug.Assert(0 <= bytesConsumedSoFar && bytesConsumedSoFar < originalByteCount, "Invalid arguments provided to method.");
+
+ return GetCharCountWithFallback(
+ bytes: new ReadOnlySpan<byte>(pBytesOriginal, originalByteCount).Slice(bytesConsumedSoFar),
+ originalBytesLength: originalByteCount,
+ decoder: null);
+ }
+
+ /// <summary>
+ /// Gets the number of <see langword="char"/>s that would result from transcoding the provided
+ /// input data, with an associated <see cref="DecoderNLS"/>. The first two arguments are
+ /// based on the original input before invoking this method; and <paramref name="bytesConsumedSoFar"/>
+ /// signals where in the provided source buffer the fallback loop should begin operating.
+ /// The behavior of this method is to consume (non-destructively) any leftover data in the
+ /// <see cref="DecoderNLS"/> instance, then to invoke the <see cref="GetCharCountFast"/> virtual method
+ /// after data has been drained, then to call <see cref="GetCharCountWithFallback(ReadOnlySpan{byte}, int, DecoderNLS)"/>.
+ /// </summary>
+ /// <returns>
+ /// The total number of chars that would result from transcoding the remaining portion of the source buffer.
+ /// </returns>
+ /// <exception cref="ArgumentException">
+ /// If the return value would exceed <see cref="int.MaxValue"/>.
+ /// (The implementation should call <see cref="ThrowConversionOverflow"/>.)
+ /// </exception>
+ private unsafe int GetCharCountWithFallback(byte* pOriginalBytes, int originalByteCount, int bytesConsumedSoFar, DecoderNLS decoder)
+ {
+ Debug.Assert(decoder != null, "This code path should only be called from DecoderNLS.");
+ Debug.Assert(0 <= bytesConsumedSoFar && bytesConsumedSoFar < originalByteCount, "Caller should've checked this condition.");
+
+ // First, try draining any data that already exists on the decoder instance. If we can't complete
+ // that operation, there's no point to continuing down to the main workhorse methods.
+
+ ReadOnlySpan<byte> bytes = new ReadOnlySpan<byte>(pOriginalBytes, originalByteCount).Slice(bytesConsumedSoFar);
+
+ int totalCharCount = decoder.DrainLeftoverDataForGetCharCount(bytes, out int bytesConsumedJustNow);
+ bytes = bytes.Slice(bytesConsumedJustNow);
+
+ // Now try invoking the "fast path" (no fallback) implementation.
+ // We can use Unsafe.AsPointer here since these spans are created from pinned data (raw pointers).
+
+ totalCharCount += GetCharCountFast(
+ pBytes: (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(bytes)),
+ bytesLength: bytes.Length,
+ fallback: decoder.Fallback,
+ bytesConsumed: out bytesConsumedJustNow);
+
+ if (totalCharCount < 0)
+ {
+ ThrowConversionOverflow();
+ }
+
+ bytes = bytes.Slice(bytesConsumedJustNow);
+
+ // If there's still data remaining in the source buffer, go down the fallback path.
+ // Otherwise we're finished.
+
+ if (!bytes.IsEmpty)
+ {
+ totalCharCount += GetCharCountWithFallback(bytes, originalByteCount, decoder);
+ if (totalCharCount < 0)
+ {
+ ThrowConversionOverflow();
+ }
+ }
+
+ return totalCharCount;
+ }
+
+ /// <summary>
+ /// Counts the number of chars that would result from transcoding the provided bytes,
+ /// using the provided <see cref="DecoderFallbackBuffer"/> if necessary.
+ /// </summary>
+ /// <returns>
+ /// The char count resulting from transcoding the input data.
+ /// </returns>
+ /// <exception cref="ArgumentException">
+ /// If the resulting char count is greater than <see cref="int.MaxValue"/>.
+ /// (Implementation should call <see cref="ThrowConversionOverflow"/>.)
+ /// </exception>
+ private unsafe int GetCharCountWithFallback(ReadOnlySpan<byte> bytes, int originalBytesLength, DecoderNLS decoder)
+ {
+ Debug.Assert(!bytes.IsEmpty, "Caller shouldn't invoke this method with an empty input buffer.");
+ Debug.Assert(originalBytesLength >= 0, "Caller provided invalid parameter.");
+
+ // Since we're using Unsafe.AsPointer in our central loop, we want to ensure everything is pinned.
+
+ fixed (byte* _pBytes_Unused = &MemoryMarshal.GetReference(bytes))
+ {
+ DecoderFallbackBuffer fallbackBuffer = DecoderFallbackBuffer.CreateAndInitialize(this, decoder, originalBytesLength);
+ int totalCharCount = 0;
+
+ do
+ {
+ // There's still data in the source buffer; why wasn't the previous fast-path able to consume it fully?
+ // There are two scenarios: (a) the source buffer contained invalid data, or it contained incomplete data.
+
+ if (DecodeFirstRune(bytes, out Rune firstScalarValue, out int bytesConsumedThisIteration) == OperationStatus.NeedMoreData
+ && decoder != null
+ && !decoder.MustFlush)
+ {
+ // We saw incomplete data at the end of the buffer, and the active DecoderNLS isntance
+ // isn't asking us to flush. Since a call to GetChars would've consumed this data by
+ // storing it in the DecoderNLS instance, we'll "consume" it by ignoring it.
+ // The next call to GetChars will pick it up correctly.
+
+ goto Finish;
+ }
+
+ // We saw invalid binary data, or we saw incomplete data that we need to flush (and thus
+ // treat as invalid). In any case we'll run through the fallback mechanism.
+
+ int charCountThisIteration = fallbackBuffer.InternalFallbackGetCharCount(bytes, bytesConsumedThisIteration);
+
+ Debug.Assert(charCountThisIteration >= 0, "Fallback shouldn't have returned a negative value.");
+
+ totalCharCount += charCountThisIteration;
+ if (totalCharCount < 0)
+ {
+ ThrowConversionOverflow();
+ }
+
+ bytes = bytes.Slice(bytesConsumedThisIteration);
+
+ if (!bytes.IsEmpty)
+ {
+ // Still data remaining - run it through the fast-path to find the next data to fallback.
+ // While building up the tally we need to continually check for integer overflow
+ // since fallbacks can change the total byte count in unexpected ways.
+
+ charCountThisIteration = GetCharCountFast(
+ pBytes: (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(bytes)),
+ bytesLength: bytes.Length,
+ fallback: null, // wasn't able to be short-circuited by our caller; don't bother trying again
+ bytesConsumed: out bytesConsumedThisIteration);
+
+ Debug.Assert(charCountThisIteration >= 0, "Workhorse shouldn't have returned a negative value.");
+ Debug.Assert(bytesConsumedThisIteration >= 0, "Workhorse shouldn't have returned a negative value.");
+
+ totalCharCount += charCountThisIteration;
+ if (totalCharCount < 0)
+ {
+ ThrowConversionOverflow();
+ }
+
+ bytes = bytes.Slice(bytesConsumedThisIteration);
+ }
+ } while (!bytes.IsEmpty);
+
+ Finish:
+
+ Debug.Assert(fallbackBuffer.Remaining == 0, "There should be no data in the fallback buffer after GetCharCount.");
+
+ return totalCharCount;
+ }
+ }
+
+ /*
+ * GETCHARS FAMILY OF FUNCTIONS
+ */
+
+ /// <summary>
+ /// Entry point from <see cref="DecoderNLS.GetChars"/> and <see cref="DecoderNLS.Convert"/>.
+ /// </summary>
+ internal virtual unsafe int GetChars(byte* pBytes, int byteCount, char* pChars, int charCount, DecoderNLS decoder)
+ {
+ Debug.Assert(decoder != null, "This code path should only be called from DecoderNLS.");
+ Debug.Assert(byteCount >= 0, "Caller should've checked this condition.");
+ Debug.Assert(pBytes != null || byteCount == 0, "Cannot provide a null pointer and a non-zero count.");
+ Debug.Assert(charCount >= 0, "Caller should've checked this condition.");
+ Debug.Assert(pChars != null || charCount == 0, "Cannot provide a null pointer and a non-zero count.");
+
+ // We're going to try to stay on the fast-path as much as we can. That means that we have
+ // no leftover data to drain and the entire source buffer can be transcoded in a single
+ // fast-path invocation. If either of these doesn't hold, we'll go down the slow path of
+ // creating spans, draining the DecoderNLS instance, and falling back.
+
+ int charsWritten = 0;
+ int bytesConsumed = 0;
+
+ if (!decoder.HasLeftoverData)
+ {
+ charsWritten = GetCharsFast(pBytes, byteCount, pChars, charCount, out bytesConsumed);
+ if (bytesConsumed == byteCount)
+ {
+ decoder._bytesUsed = byteCount;
+ return charsWritten;
+ }
+ }
+
+ // We had leftover data, or we couldn't consume the entire input buffer.
+ // Let's go down the draining + fallback mechanisms.
+
+ return GetCharsWithFallback(pBytes, byteCount, pChars, charCount, bytesConsumed, charsWritten, decoder);
+ }
+
+ /// <summary>
+ /// Transcodes <see langword="byte"/>s to <see langword="char"/>s, exiting when the source or destination
+ /// buffer is consumed or when the first unreadable data is encountered.
+ /// </summary>
+ /// <returns>
+ /// Via <paramref name="bytesConsumed"/>, the number of elements from <paramref name="pBytes"/> which
+ /// were consumed; and returns the number of elements written to <paramref name="pChars"/>.
+ /// </returns>
+ /// <remarks>
+ /// The implementation should not attempt to perform any sort of fallback behavior.
+ /// If custom fallback behavior is necessary, override <see cref="GetCharsWithFallback"/>.
+ /// </remarks>
+ private protected virtual unsafe int GetCharsFast(byte* pBytes, int bytesLength, char* pChars, int charsLength, out int bytesConsumed)
+ {
+ // Any production-quality type would override this method and provide a real
+ // implementation, so we won't provide a base implementation. However, a
+ // non-shipping slow reference implementation is provided below for convenience.
+
+#if false
+ ReadOnlySpan<byte> bytes = new ReadOnlySpan<byte>(pBytes, bytesLength);
+ Span<char> chars = new Span<char>(pChars, charsLength);
+
+ while (!bytes.IsEmpty)
+ {
+ if ((DecodeFirstRune(bytes, out Rune firstScalarValue, out int bytesConsumedJustNow) != OperationStatus.Done)
+ || !firstScalarValue.TryEncode(chars, out int charsWrittenJustNow))
+ {
+ // Invalid or incomplete binary data, or destination buffer too small to contain decoded value
+
+ break;
+ }
+
+ bytes = bytes.Slice(bytesConsumedJustNow);
+ chars = chars.Slice(charsWrittenJustNow);
+ }
+
+ bytesConsumed = bytesLength - bytes.Length; // number of bytes consumed across all loop iterations above
+ return charsLength - chars.Length; // number of chars written across all loop iterations above
+#else
+ Debug.Fail("This should be overridden by a subclassed type.");
+ throw NotImplemented.ByDesign;
+#endif
+ }
+
+ /// <summary>
+ /// Transcodes bytes to chars, with no associated <see cref="DecoderNLS"/>. The first four arguments are
+ /// based on the original input before invoking this method; and <paramref name="bytesConsumedSoFar"/>
+ /// and <paramref name="charsWrittenSoFar"/> signal where in the provided buffers the fallback loop
+ /// should begin operating. The behavior of this method is to call the <see cref="GetCharsWithFallback"/>
+ /// virtual method as overridden by the specific type, and failing that go down the shared fallback path.
+ /// </summary>
+ /// <returns>
+ /// The total number of chars written to <paramref name="pOriginalChars"/>, including <paramref name="charsWrittenSoFar"/>.
+ /// </returns>
+ /// <exception cref="ArgumentException">
+ /// If the destination buffer is not large enough to hold the entirety of the transcoded data.
+ /// </exception>
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ private protected unsafe int GetCharsWithFallback(byte* pOriginalBytes, int originalByteCount, char* pOriginalChars, int originalCharCount, int bytesConsumedSoFar, int charsWrittenSoFar)
+ {
+ // This is a stub method that's marked "no-inlining" so that it we don't stack-spill spans
+ // into our immediate caller. Doing so increases the method prolog in what's supposed to
+ // be a very fast path.
+
+ Debug.Assert(0 <= bytesConsumedSoFar && bytesConsumedSoFar < originalByteCount, "Invalid arguments provided to method.");
+ Debug.Assert(0 <= charsWrittenSoFar && charsWrittenSoFar <= originalCharCount, "Invalid arguments provided to method.");
+
+ return GetCharsWithFallback(
+ bytes: new ReadOnlySpan<byte>(pOriginalBytes, originalByteCount).Slice(bytesConsumedSoFar),
+ originalBytesLength: originalByteCount,
+ chars: new Span<char>(pOriginalChars, originalCharCount).Slice(charsWrittenSoFar),
+ originalCharsLength: originalCharCount,
+ decoder: null);
+ }
+
+ /// <summary>
+ /// Transcodes bytes to chars, with an associated <see cref="DecoderNLS"/>. The first four arguments are
+ /// based on the original input before invoking this method; and <paramref name="bytesConsumedSoFar"/>
+ /// and <paramref name="charsWrittenSoFar"/> signal where in the provided buffers the fallback loop
+ /// should begin operating. The behavior of this method is to drain any leftover data in the
+ /// <see cref="DecoderNLS"/> instance, then to invoke the <see cref="GetCharsFast"/> virtual method
+ /// after data has been drained, then to call <see cref="GetCharsWithFallback(ReadOnlySpan{byte}, int, Span{char}, int, DecoderNLS)"/>.
+ /// </summary>
+ /// <returns>
+ /// The total number of chars written to <paramref name="pOriginalChars"/>, including <paramref name="charsWrittenSoFar"/>.
+ /// </returns>
+ /// <exception cref="ArgumentException">
+ /// If the destination buffer is too small to make any forward progress at all, or if the destination buffer is
+ /// too small to contain the entirety of the transcoded data and the <see cref="DecoderNLS"/> instance disallows
+ /// partial transcoding.
+ /// </exception>
+ private protected unsafe int GetCharsWithFallback(byte* pOriginalBytes, int originalByteCount, char* pOriginalChars, int originalCharCount, int bytesConsumedSoFar, int charsWrittenSoFar, DecoderNLS decoder)
+ {
+ Debug.Assert(decoder != null, "This code path should only be called from DecoderNLS.");
+ Debug.Assert(0 <= bytesConsumedSoFar && bytesConsumedSoFar < originalByteCount, "Caller should've checked this condition.");
+ Debug.Assert(0 <= charsWrittenSoFar && charsWrittenSoFar <= originalCharCount, "Caller should've checked this condition.");
+
+ // First, try draining any data that already exists on the encoder instance. If we can't complete
+ // that operation, there's no point to continuing down to the main workhorse methods.
+ //
+ // Like GetBytes, there may be leftover data in the DecoderNLS instance. But unlike GetBytes,
+ // the bytes -> chars conversion doesn't allow leftover data in the fallback buffer. This means
+ // that the drain operation below will either succeed fully or fail; there's no partial success
+ // condition as with the chars -> bytes conversion. The drain method will throw if there's not
+ // enough space in the destination buffer.
+
+ ReadOnlySpan<byte> bytes = new ReadOnlySpan<byte>(pOriginalBytes, originalByteCount).Slice(bytesConsumedSoFar);
+ Span<char> chars = new Span<char>(pOriginalChars, originalCharCount).Slice(charsWrittenSoFar);
+
+ int charsWrittenJustNow = decoder.DrainLeftoverDataForGetChars(bytes, chars, out int bytesConsumedJustNow);
+
+ bytes = bytes.Slice(bytesConsumedJustNow);
+ chars = chars.Slice(charsWrittenJustNow);
+
+ Debug.Assert(!decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0, "Should be no remaining fallback data at this point.");
+
+ // Now try invoking the "fast path" (no fallback buffer) implementation.
+ // We can use Unsafe.AsPointer here since these spans are created from pinned data (raw pointers).
+
+ charsWrittenJustNow = GetCharsFast(
+ pBytes: (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(bytes)),
+ bytesLength: bytes.Length,
+ pChars: (char*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(chars)),
+ charsLength: chars.Length,
+ bytesConsumed: out bytesConsumedJustNow);
+
+ bytes = bytes.Slice(bytesConsumedJustNow);
+ chars = chars.Slice(charsWrittenJustNow);
+
+ // We'll optimistically tell the decoder that we're using everything; the
+ // GetCharsWithFallback method will overwrite this field if necessary.
+
+ decoder._bytesUsed = originalByteCount;
+
+ if (bytes.IsEmpty)
+ {
+ return originalCharCount - chars.Length; // total number of chars written
+ }
+ else
+ {
+ return GetCharsWithFallback(bytes, originalByteCount, chars, originalCharCount, decoder);
+ }
+ }
+
+ /// <summary>
+ /// Transcodes bytes to chars, using <see cref="Encoding.DecoderFallback"/> or <see cref="Decoder.Fallback"/> if needed.
+ /// </summary>
+ /// <returns>
+ /// The total number of chars written to <paramref name="chars"/> (based on <paramref name="originalCharsLength"/>).
+ /// </returns>
+ /// <remarks>
+ /// The derived class should override this method if it might be able to provide a more optimized fallback
+ /// implementation, deferring to the base implementation if needed. This method calls <see cref="ThrowCharsOverflow"/>
+ /// if necessary.
+ /// </remarks>
+ private protected virtual unsafe int GetCharsWithFallback(ReadOnlySpan<byte> bytes, int originalBytesLength, Span<char> chars, int originalCharsLength, DecoderNLS decoder)
+ {
+ Debug.Assert(!bytes.IsEmpty, "Caller shouldn't invoke this method with an empty input buffer.");
+ Debug.Assert(originalBytesLength >= 0, "Caller provided invalid parameter.");
+ Debug.Assert(originalCharsLength >= 0, "Caller provided invalid parameter.");
+
+ // Since we're using Unsafe.AsPointer in our central loop, we want to ensure everything is pinned.
+
+ fixed (byte* _pBytes_Unused = &MemoryMarshal.GetReference(bytes))
+ fixed (char* _pChars_Unused = &MemoryMarshal.GetReference(chars))
+ {
+ DecoderFallbackBuffer fallbackBuffer = DecoderFallbackBuffer.CreateAndInitialize(this, decoder, originalBytesLength);
+
+ do
+ {
+ // There's still data in the source buffer; why wasn't the previous fast-path able to consume it fully?
+ // There are two scenarios: (a) the source buffer contained invalid data, or it contained incomplete data.
+
+ int charsWrittenThisIteration;
+
+ switch (DecodeFirstRune(bytes, out _, out int bytesConsumedThisIteration))
+ {
+ case OperationStatus.NeedMoreData:
+ Debug.Assert(bytesConsumedThisIteration == bytes.Length, "If returning NeedMoreData, should out the entire buffer length as bytes consumed.");
+ if (decoder is null || decoder.MustFlush)
+ {
+ goto case OperationStatus.InvalidData; // see comment in GetCharCountWithFallback
+ }
+ else
+ {
+ decoder.SetLeftoverData(bytes); // squirrel away remaining data and finish
+ bytes = ReadOnlySpan<byte>.Empty;
+ goto Finish;
+ }
+
+ case OperationStatus.InvalidData:
+ if (fallbackBuffer.TryInternalFallbackGetChars(bytes, bytesConsumedThisIteration, chars, out charsWrittenThisIteration))
+ {
+ // We successfully consumed some bytes, sent it through the fallback, and wrote some chars.
+
+ Debug.Assert(charsWrittenThisIteration >= 0, "Fallback shouldn't have returned a negative value.");
+ break;
+ }
+ else
+ {
+ // We generated fallback data, but the destination buffer wasn't large enough to hold it.
+ // Don't mark any of the bytes we ran through the fallback as consumed, and terminate
+ // the loop now and let our caller handle this condition.
+
+ goto Finish;
+ }
+
+ default:
+ goto Finish; // no error on input, so destination must have been too small
+ }
+
+ bytes = bytes.Slice(bytesConsumedThisIteration);
+ chars = chars.Slice(charsWrittenThisIteration);
+
+ if (!bytes.IsEmpty)
+ {
+ // Still data remaining - run it through the fast-path to find the next data to fallback.
+ // We need to figure out why we weren't able to make progress.
+
+ charsWrittenThisIteration = GetCharsFast(
+ pBytes: (byte*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(bytes)),
+ bytesLength: bytes.Length,
+ pChars: (char*)Unsafe.AsPointer(ref MemoryMarshal.GetReference(chars)),
+ charsLength: chars.Length,
+ bytesConsumed: out bytesConsumedThisIteration);
+
+ Debug.Assert(charsWrittenThisIteration >= 0, "Workhorse shouldn't have returned a negative value.");
+ Debug.Assert(bytesConsumedThisIteration >= 0, "Workhorse shouldn't have returned a negative value.");
+
+ bytes = bytes.Slice(bytesConsumedThisIteration);
+ chars = chars.Slice(charsWrittenThisIteration);
+ }
+ } while (!bytes.IsEmpty);
+
+ Finish:
+
+ // We reach this point when we deplete the source or destination buffer. See main comment
+ // at the end of GetBytesWithFallback for how the below logic works; the primary difference
+ // here is that GetChars disallows leftover data in the fallback buffer between calls.
+
+ Debug.Assert(fallbackBuffer.Remaining == 0);
+
+ if (!bytes.IsEmpty)
+ {
+ // The line below will also throw if the decoder couldn't make any progress at all
+ // because the output buffer wasn't large enough to contain the result of even
+ // a single scalar conversion or fallback.
+
+ ThrowCharsOverflow(decoder, nothingDecoded: chars.Length == originalCharsLength);
+ }
+
+ // If a DecoderNLS instance is active, update its "total consumed byte count" value.
+
+ if (decoder != null)
+ {
+ Debug.Assert(originalBytesLength >= bytes.Length, "About to report a negative number of bytes used?");
+ decoder._bytesUsed = originalBytesLength - bytes.Length; // number of bytes consumed
+ }
+
+ return originalCharsLength - chars.Length; // total number of chars written
+ }
+ }
+ }
+}
diff --git a/src/System.Private.CoreLib/shared/System/Text/Encoding.cs b/src/System.Private.CoreLib/shared/System/Text/Encoding.cs
index 175e5442fd..8947b7fca0 100644
--- a/src/System.Private.CoreLib/shared/System/Text/Encoding.cs
+++ b/src/System.Private.CoreLib/shared/System/Text/Encoding.cs
@@ -3,11 +3,8 @@
// See the LICENSE file in the project root for more information.
using System.Diagnostics;
-using System.Globalization;
-using System.Threading;
using System.Runtime.InteropServices;
using System.Runtime.Serialization;
-using System.Diagnostics.CodeAnalysis;
namespace System.Text
{
@@ -74,7 +71,7 @@ namespace System.Text
// generally executes faster.
//
- public abstract class Encoding : ICloneable
+ public abstract partial class Encoding : ICloneable
{
// For netcore we use UTF8 as default encoding since ANSI isn't available
private static readonly UTF8Encoding.UTF8EncodingSealed s_defaultEncoding = new UTF8Encoding.UTF8EncodingSealed(encoderShouldEmitUTF8Identifier: false);
@@ -559,13 +556,16 @@ namespace System.Text
return newEncoding;
}
-
public bool IsReadOnly
{
get
{
return (_isReadOnly);
}
+ private protected set
+ {
+ _isReadOnly = value;
+ }
}
// Returns an encoding for the ASCII character set. The returned encoding
@@ -666,16 +666,6 @@ namespace System.Text
}
}
- // For NLS Encodings, workhorse takes an encoder (may be null)
- // Always validate parameters before calling internal version, which will only assert.
- internal virtual unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder)
- {
- Debug.Assert(chars != null);
- Debug.Assert(count >= 0);
-
- return GetByteCount(chars, count);
- }
-
// Returns a byte array containing the encoded representation of the given
// character array.
//
@@ -772,14 +762,6 @@ namespace System.Text
return GetBytes(s.ToCharArray(), charIndex, charCount, bytes, byteIndex);
}
- // This is our internal workhorse
- // Always validate parameters before calling internal version, which will only assert.
- internal virtual unsafe int GetBytes(char* chars, int charCount,
- byte* bytes, int byteCount, EncoderNLS encoder)
- {
- return GetBytes(chars, charCount, bytes, byteCount);
- }
-
// We expect this to be the workhorse for NLS Encodings, but for existing
// ones we need a working (if slow) default implementation)
//
@@ -898,13 +880,6 @@ namespace System.Text
}
}
- // This is our internal workhorse
- // Always validate parameters before calling internal version, which will only assert.
- internal virtual unsafe int GetCharCount(byte* bytes, int count, DecoderNLS decoder)
- {
- return GetCharCount(bytes, count);
- }
-
// Returns a character array containing the decoded representation of a
// given byte array.
//
@@ -1011,15 +986,6 @@ namespace System.Text
}
}
- // This is our internal workhorse
- // Always validate parameters before calling internal version, which will only assert.
- internal virtual unsafe int GetChars(byte* bytes, int byteCount,
- char* chars, int charCount, DecoderNLS decoder)
- {
- return GetChars(bytes, byteCount, chars, charCount);
- }
-
-
[CLSCompliant(false)]
public unsafe string GetString(byte* bytes, int byteCount)
{
@@ -1238,6 +1204,12 @@ namespace System.Text
encoder.ClearMustFlush();
}
+ [StackTraceHidden]
+ internal static void ThrowConversionOverflow()
+ {
+ throw new ArgumentException(SR.Argument_ConversionOverflow);
+ }
+
internal void ThrowCharsOverflow()
{
// Special message to include fallback type in case fallback's GetMaxCharCount is broken
diff --git a/src/System.Private.CoreLib/shared/System/Text/EncodingNLS.cs b/src/System.Private.CoreLib/shared/System/Text/EncodingNLS.cs
index e6fa0627d3..51d0e66044 100644
--- a/src/System.Private.CoreLib/shared/System/Text/EncodingNLS.cs
+++ b/src/System.Private.CoreLib/shared/System/Text/EncodingNLS.cs
@@ -4,6 +4,7 @@
using System;
using System.Collections;
+using System.Diagnostics;
using System.Globalization;
using System.Runtime.InteropServices;
using System.Threading;
@@ -27,6 +28,7 @@ namespace System.Text
{
protected EncodingNLS(int codePage) : base(codePage)
{
+ Debug.Assert(GetType() == typeof(Latin1Encoding), "Should be no instantiations of this type except via Latin1Encoding.");
}
// Returns the number of bytes required to encode a range of characters in
diff --git a/src/System.Private.CoreLib/shared/System/ThrowHelper.cs b/src/System.Private.CoreLib/shared/System/ThrowHelper.cs
index c3c91d8f0b..06b3ce41a6 100644
--- a/src/System.Private.CoreLib/shared/System/ThrowHelper.cs
+++ b/src/System.Private.CoreLib/shared/System/ThrowHelper.cs
@@ -452,8 +452,20 @@ namespace System
return "startIndex";
case ExceptionArgument.task:
return "task";
+ case ExceptionArgument.bytes:
+ return "bytes";
+ case ExceptionArgument.byteIndex:
+ return "byteIndex";
+ case ExceptionArgument.byteCount:
+ return "byteCount";
case ExceptionArgument.ch:
return "ch";
+ case ExceptionArgument.chars:
+ return "chars";
+ case ExceptionArgument.charIndex:
+ return "charIndex";
+ case ExceptionArgument.charCount:
+ return "charCount";
case ExceptionArgument.s:
return "s";
case ExceptionArgument.input:
@@ -612,6 +624,10 @@ namespace System
{
case ExceptionResource.ArgumentOutOfRange_Index:
return SR.ArgumentOutOfRange_Index;
+ case ExceptionResource.ArgumentOutOfRange_IndexCount:
+ return SR.ArgumentOutOfRange_IndexCount;
+ case ExceptionResource.ArgumentOutOfRange_IndexCountBuffer:
+ return SR.ArgumentOutOfRange_IndexCountBuffer;
case ExceptionResource.ArgumentOutOfRange_Count:
return SR.ArgumentOutOfRange_Count;
case ExceptionResource.Arg_ArrayPlusOffTooSmall:
@@ -694,6 +710,8 @@ namespace System
return SR.Task_WaitMulti_NullTask;
case ExceptionResource.ArgumentException_OtherNotArrayOfCorrectLength:
return SR.ArgumentException_OtherNotArrayOfCorrectLength;
+ case ExceptionResource.ArgumentNull_Array:
+ return SR.ArgumentNull_Array;
case ExceptionResource.ArgumentNull_SafeHandle:
return SR.ArgumentNull_SafeHandle;
case ExceptionResource.ArgumentOutOfRange_EndIndexStartIndex:
@@ -752,7 +770,13 @@ namespace System
value,
startIndex,
task,
+ bytes,
+ byteIndex,
+ byteCount,
ch,
+ chars,
+ charIndex,
+ charCount,
s,
input,
ownedMemory,
@@ -828,6 +852,8 @@ namespace System
internal enum ExceptionResource
{
ArgumentOutOfRange_Index,
+ ArgumentOutOfRange_IndexCount,
+ ArgumentOutOfRange_IndexCountBuffer,
ArgumentOutOfRange_Count,
Arg_ArrayPlusOffTooSmall,
NotSupported_ReadOnlyCollection,
@@ -869,6 +895,7 @@ namespace System
Task_ThrowIfDisposed,
Task_WaitMulti_NullTask,
ArgumentException_OtherNotArrayOfCorrectLength,
+ ArgumentNull_Array,
ArgumentNull_SafeHandle,
ArgumentOutOfRange_EndIndexStartIndex,
ArgumentOutOfRange_Enum,